File size: 5,223 Bytes
f58a09d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544ba03
 
 
 
 
 
 
 
 
 
 
 
 
 
f58a09d
 
544ba03
f58a09d
 
 
 
544ba03
 
f58a09d
 
 
 
544ba03
 
f58a09d
 
 
 
544ba03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f58a09d
 
 
544ba03
f58a09d
 
544ba03
f58a09d
 
544ba03
f58a09d
 
544ba03
f58a09d
 
 
 
 
 
544ba03
f58a09d
 
 
 
 
 
 
 
 
 
 
544ba03
 
 
f58a09d
 
544ba03
 
f58a09d
 
 
 
 
 
 
 
 
 
 
 
544ba03
 
 
 
f58a09d
544ba03
f58a09d
 
 
 
 
 
544ba03
f58a09d
 
 
544ba03
f58a09d
 
 
 
544ba03
f58a09d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# app.py
import streamlit as st
from transformers import pipeline
from rouge_score import rouge_scorer
import torch

st.set_page_config(page_title="Multilingual Summarization Dashboard", layout="wide")

# -------------------------------
# Style
# -------------------------------
with open("style.css") as f:
    st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)

# -------------------------------
# Hugging Face API Token
# -------------------------------
st.sidebar.title("Hugging Face API Token")
api_token = st.sidebar.text_input(
    "Enter your Hugging Face API token:",
    type="password",
    help="Get your token from https://huggingface.co/settings/tokens"
)

if not api_token:
    st.warning("Please enter your Hugging Face API token to enable model inference.")

# -------------------------------
# Model Initialization
# -------------------------------
@st.cache_resource(show_spinner=True)
def load_models(token):
    models = {}
    models['urT5-base'] = pipeline(
        "summarization",
        model="mbshr/urt5-base-finetuned",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    models['mT5-small'] = pipeline(
        "summarization",
        model="google/mt5-small",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    models['mT5-base'] = pipeline(
        "summarization",
        model="google/mt5-base",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    # Translation models
    models['en→ur'] = pipeline(
        "translation",
        model="Helsinki-NLP/opus-mt-en-ur",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    models['ur→en'] = pipeline(
        "translation",
        model="Helsinki-NLP/opus-mt-ur-en",
        device=0 if torch.cuda.is_available() else -1,
        use_auth_token=token
    )
    return models

models = load_models(api_token) if api_token else {}

# -------------------------------
# Sidebar Settings
# -------------------------------
st.sidebar.title("Settings")
selected_model = st.sidebar.selectbox("Choose Summarization Model", ["urT5-base", "mT5-small", "mT5-base"])
max_length = st.sidebar.slider("Max summary length", 50, 500, 150)
min_length = st.sidebar.slider("Min summary length", 10, 300, 40)
target_lang = st.sidebar.selectbox("Translate summary to", ["None", "English", "Urdu"])
show_comparison = st.sidebar.checkbox("Compare models")
show_rouge = st.sidebar.checkbox("Show ROUGE Score (requires reference)")

# -------------------------------
# Main Interface
# -------------------------------
st.title("🌐 Multilingual Summarization Dashboard (API Version)")
st.write("Enter text to summarize, optionally translate, compare models, and evaluate with ROUGE.")

text = st.text_area("Enter text to summarize:", height=200)
reference_text = ""
if show_rouge:
    reference_text = st.text_area("Reference summary for ROUGE evaluation:", height=100)

# -------------------------------
# Generate Summary
# -------------------------------
if st.button("Generate Summary"):
    if not api_token:
        st.error("Please provide Hugging Face API token.")
    elif not text.strip():
        st.error("Please enter some text!")
    else:
        # Chunking
        chunk_size = 500
        chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
        full_summary = ""
        for chunk in chunks:
            summ = models[selected_model](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
            full_summary += summ + " "

        st.subheader(f"Summary ({selected_model}):")
        st.write(full_summary)

        # Translation
        if target_lang != "None":
            try:
                if target_lang == "English":
                    translated = models['ur→en'](full_summary)[0]['translation_text']
                else:  # Urdu
                    translated = models['en→ur'](full_summary)[0]['translation_text']
                st.subheader(f"Summary in {target_lang}:")
                st.write(translated)
            except Exception as e:
                st.warning(f"Translation failed: {str(e)}")

        # Model comparison
        if show_comparison:
            st.subheader("Comparison with other models:")
            for model_name in ["urT5-base", "mT5-small", "mT5-base"]:
                if model_name != selected_model:
                    comp_summary = ""
                    for chunk in chunks:
                        comp_summary += models[model_name](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'] + " "
                    st.markdown(f"**{model_name} Summary:** {comp_summary}")

        # ROUGE Evaluation
        if show_rouge and reference_text.strip():
            scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
            scores = scorer.score(reference_text, full_summary)
            st.subheader("ROUGE Scores:")
            for k, v in scores.items():
                st.write(f"{k}: Precision: {v.precision:.3f}, Recall: {v.recall:.3f}, F1: {v.fmeasure:.3f}")