Spaces:

manish-aggarwal
/

file-classification

Sleeping

App Files Files Community

manish-aggarwal commited on May 14

Commit

c1ccd2b

verified ·

1 Parent(s): 3faa170

Upload 2 files

Browse files

Files changed (2) hide show

app.py +59 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+from transformers import pipeline
+import PyPDF2
+from docx import Document
+# Load pipelines
+classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+ner = pipeline("ner", model="Jean-Baptiste/roberta-large-ner-english", grouped_entities=True)
+# File reading
+def read_file(file_obj):
+    name = file_obj.name
+    if name.endswith(".txt"):
+        return file_obj.read().decode("utf-8")
+    elif name.endswith(".pdf"):
+        reader = PyPDF2.PdfReader(file_obj)
+        return " ".join([page.extract_text() for page in reader.pages if page.extract_text()])
+    elif name.endswith(".docx"):
+        doc = Document(file_obj)
+        return "\n".join([para.text for para in doc.paragraphs])
+    else:
+        return "Unsupported file format"
+# Contract classification
+def is_contract(text):
+    result = classifier(text[:1000], ["contract", "not a contract"])
+    return result['labels'][0] == 'contract', result
+# Party extraction
+def extract_parties(text):
+    entities = ner(text[:1000])
+    return list(set(ent['word'] for ent in entities if ent['entity_group'] in ['ORG', 'PER']))
+# Main logic
+def process_file(file):
+    text = read_file(file)
+    if not text.strip():
+        return "Empty or unreadable file.", None
+    is_contract_flag, classification = is_contract(text)
+    if is_contract_flag:
+        parties = extract_parties(text)
+        return "✅ This is a contract.", parties
+    else:
+        return "❌ This is NOT a contract.", []
+# Gradio interface
+iface = gr.Interface(
+    fn=process_file,
+    inputs=gr.File(file_types=[".txt", ".pdf", ".docx"], label="Upload a document"),
+    outputs=[
+        gr.Textbox(label="Classification Result"),
+        gr.Label(label="Detected Parties")
+    ],
+    title="Contract Classifier with RoBERTa",
+    description="Upload a document (.pdf, .txt, .docx) to detect if it's a contract and extract involved parties using RoBERTa."
+)
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+transformers
+torch
+python-docx
+PyPDF2