Spaces:

Subayyal
/

Multilingual-Text-Summarizer

Running

File size: 5,223 Bytes

# app.py
import streamlit as st
from transformers import pipeline
from rouge_score import rouge_scorer
import torch

st.set_page_config(page_title="Multilingual Summarization Dashboard", layout="wide")

# -------------------------------
# Style
# -------------------------------
with open("style.css") as f:
    st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)

# -------------------------------
# Hugging Face API Token
# -------------------------------
st.sidebar.title("Hugging Face API Token")
api_token = st.sidebar.text_input(
    "Enter your Hugging Face API token:",
    type="password",
    help="Get your token from https://huggingface.co/settings/tokens"
)

if not api_token:
    st.warning("Please enter your Hugging Face API token to enable model inference.")

# -------------------------------
# Model Initialization
# -------------------------------
@st.cache_resource(show_spinner=True)
def load_models(token):
    models = {}
    models['urT5-base'] = pipeline(
        "summarization",
        model="mbshr/urt5-base-finetuned",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    models['mT5-small'] = pipeline(
        "summarization",
        model="google/mt5-small",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    models['mT5-base'] = pipeline(
        "summarization",
        model="google/mt5-base",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    # Translation models
    models['en→ur'] = pipeline(
        "translation",
        model="Helsinki-NLP/opus-mt-en-ur",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    models['ur→en'] = pipeline(
        "translation",
        model="Helsinki-NLP/opus-mt-ur-en",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    return models

models = load_models(api_token) if api_token else {}

# -------------------------------
# Sidebar Settings
# -------------------------------
st.sidebar.title("Settings")
selected_model = st.sidebar.selectbox("Choose Summarization Model", ["urT5-base", "mT5-small", "mT5-base"])
max_length = st.sidebar.slider("Max summary length", 50, 500, 150)
min_length = st.sidebar.slider("Min summary length", 10, 300, 40)
target_lang = st.sidebar.selectbox("Translate summary to", ["None", "English", "Urdu"])
show_comparison = st.sidebar.checkbox("Compare models")
show_rouge = st.sidebar.checkbox("Show ROUGE Score (requires reference)")

# -------------------------------
# Main Interface
# -------------------------------
st.title("🌐 Multilingual Summarization Dashboard (API Version)")
st.write("Enter text to summarize, optionally translate, compare models, and evaluate with ROUGE.")

text = st.text_area("Enter text to summarize:", height=200)
reference_text = ""
if show_rouge:
    reference_text = st.text_area("Reference summary for ROUGE evaluation:", height=100)

# -------------------------------
# Generate Summary
# -------------------------------
if st.button("Generate Summary"):
    if not api_token:
        st.error("Please provide Hugging Face API token.")
    elif not text.strip():
        st.error("Please enter some text!")
    else:
        # Chunking
        chunk_size = 500
        chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
        full_summary = ""
        for chunk in chunks:
            summ = models[selected_model](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
            full_summary += summ + " "

        st.subheader(f"Summary ({selected_model}):")
        st.write(full_summary)

        # Translation
        if target_lang != "None":
            try:
                if target_lang == "English":
                    translated = models['ur→en'](full_summary)[0]['translation_text']
                else:  # Urdu
                    translated = models['en→ur'](full_summary)[0]['translation_text']
                st.subheader(f"Summary in {target_lang}:")
                st.write(translated)
            except Exception as e:
                st.warning(f"Translation failed: {str(e)}")

        # Model comparison
        if show_comparison:
            st.subheader("Comparison with other models:")
            for model_name in ["urT5-base", "mT5-small", "mT5-base"]:
                if model_name != selected_model:
                    comp_summary = ""
                    for chunk in chunks:
                        comp_summary += models[model_name](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'] + " "
                    st.markdown(f"**{model_name} Summary:** {comp_summary}")

        # ROUGE Evaluation
        if show_rouge and reference_text.strip():
            scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
            scores = scorer.score(reference_text, full_summary)
            st.subheader("ROUGE Scores:")
            for k, v in scores.items():
                st.write(f"{k}: Precision: {v.precision:.3f}, Recall: {v.recall:.3f}, F1: {v.fmeasure:.3f}")