Spaces:

alemmrr
/

finbert-gics-sector-classifier-ui

Sleeping

App Files Files Community

alemmrr commited on 28 days ago

Commit

ac5d59b

verified ·

1 Parent(s): 0cfd049

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -8

app.py CHANGED Viewed

@@ -1,7 +1,14 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-# Load classifier
 tokenizer = AutoTokenizer.from_pretrained("alemmrr/finbert-gics-sector-classifier")
 model = AutoModelForSequenceClassification.from_pretrained("alemmrr/finbert-gics-sector-classifier")
@@ -9,12 +16,62 @@ clf = pipeline(
     "text-classification",
     model=model,
     tokenizer=tokenizer,
-    device=-1,
-    top_k=None
 )
 def predict(text):
-    outputs = clf(text)
     # FIX: Flatten output if it's list-of-lists
     if isinstance(outputs, list) and len(outputs) == 1 and isinstance(outputs[0], list):
@@ -32,12 +89,19 @@ def predict(text):
     scores = sorted(scores, key=lambda x: x["confidence"], reverse=True)
     return scores
 demo = gr.Interface(
     fn=predict,
-    inputs=gr.Textbox(lines=3, label="Enter text"),
     outputs=gr.JSON(label="All Sector Scores"),
-    title="FinBERT GICS Sector Classifier",
-    description="Returns all sector confidence scores."
 )
 demo.launch()

 import gradio as gr
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    pipeline
+)
+# -----------------------------
+# Load Your Classifier
+# -----------------------------
 tokenizer = AutoTokenizer.from_pretrained("alemmrr/finbert-gics-sector-classifier")
 model = AutoModelForSequenceClassification.from_pretrained("alemmrr/finbert-gics-sector-classifier")
     "text-classification",
     model=model,
     tokenizer=tokenizer,
+    top_k=None,
+    device=-1
+)
+# -----------------------------
+# Load NER Model (for auto-formatting)
+# -----------------------------
+ner_pipeline = pipeline(
+    "ner",
+    model="Jean-Baptiste/roberta-large-ner-english",
+    aggregation_strategy="simple"
 )
+# -----------------------------
+# Helper: Format headline (Variant 3 Prefixing)
+# -----------------------------
+def format_headline_variant3(headline):
+    ents = ner_pipeline(headline)
+    # Buckets (same as training Variant-3)
+    entity_buckets = {
+        "ORG": [],
+        "LOC": [],
+        "PER": [],
+        "GPE": []
+    }
+    # Fill buckets
+    for ent in ents:
+        tag = ent["entity_group"]
+        word = ent["word"]
+        if tag in entity_buckets:
+            entity_buckets[tag].append(word)
+    # Build prefix
+    prefix = ""
+    for tag, values in entity_buckets.items():
+        if values:
+            prefix += f"[{tag}] " + " | ".join(values) + " "
+    # Append [SEP] if any prefix exists
+    if prefix:
+        prefix = prefix.strip() + " [SEP] "
+    # Return final formatted input for classifier
+    return prefix + headline
+# -----------------------------
+# Main Prediction Function
+# -----------------------------
 def predict(text):
+    # Auto-format headline → Variant 3
+    formatted = format_headline_variant3(text)
+    outputs = clf(formatted)
     # FIX: Flatten output if it's list-of-lists
     if isinstance(outputs, list) and len(outputs) == 1 and isinstance(outputs[0], list):
     scores = sorted(scores, key=lambda x: x["confidence"], reverse=True)
     return scores
+# -----------------------------
+# Gradio Interface
+# -----------------------------
 demo = gr.Interface(
     fn=predict,
+    inputs=gr.Textbox(lines=3, label="Enter a financial headline (plain text)"),
     outputs=gr.JSON(label="All Sector Scores"),
+    title="FinBERT GICS Sector Classifier (Auto-Formatted)",
+    description=(
+        "Enter a plain financial news headline. The app automatically applies NER tagging "
+        "and formats the text using the Variant-3 prefix structure before running classification."
+    ),
 )
 demo.launch()