File size: 5,223 Bytes
f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d 544ba03 f58a09d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# app.py
import streamlit as st
from transformers import pipeline
from rouge_score import rouge_scorer
import torch
st.set_page_config(page_title="Multilingual Summarization Dashboard", layout="wide")
# -------------------------------
# Style
# -------------------------------
with open("style.css") as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
# -------------------------------
# Hugging Face API Token
# -------------------------------
st.sidebar.title("Hugging Face API Token")
api_token = st.sidebar.text_input(
"Enter your Hugging Face API token:",
type="password",
help="Get your token from https://huggingface.co/settings/tokens"
)
if not api_token:
st.warning("Please enter your Hugging Face API token to enable model inference.")
# -------------------------------
# Model Initialization
# -------------------------------
@st.cache_resource(show_spinner=True)
def load_models(token):
models = {}
models['urT5-base'] = pipeline(
"summarization",
model="mbshr/urt5-base-finetuned",
device=0 if torch.cuda.is_available() else -1,
use_auth_token=token
)
models['mT5-small'] = pipeline(
"summarization",
model="google/mt5-small",
device=0 if torch.cuda.is_available() else -1,
use_auth_token=token
)
models['mT5-base'] = pipeline(
"summarization",
model="google/mt5-base",
device=0 if torch.cuda.is_available() else -1,
use_auth_token=token
)
# Translation models
models['en→ur'] = pipeline(
"translation",
model="Helsinki-NLP/opus-mt-en-ur",
device=0 if torch.cuda.is_available() else -1,
use_auth_token=token
)
models['ur→en'] = pipeline(
"translation",
model="Helsinki-NLP/opus-mt-ur-en",
device=0 if torch.cuda.is_available() else -1,
use_auth_token=token
)
return models
models = load_models(api_token) if api_token else {}
# -------------------------------
# Sidebar Settings
# -------------------------------
st.sidebar.title("Settings")
selected_model = st.sidebar.selectbox("Choose Summarization Model", ["urT5-base", "mT5-small", "mT5-base"])
max_length = st.sidebar.slider("Max summary length", 50, 500, 150)
min_length = st.sidebar.slider("Min summary length", 10, 300, 40)
target_lang = st.sidebar.selectbox("Translate summary to", ["None", "English", "Urdu"])
show_comparison = st.sidebar.checkbox("Compare models")
show_rouge = st.sidebar.checkbox("Show ROUGE Score (requires reference)")
# -------------------------------
# Main Interface
# -------------------------------
st.title("🌐 Multilingual Summarization Dashboard (API Version)")
st.write("Enter text to summarize, optionally translate, compare models, and evaluate with ROUGE.")
text = st.text_area("Enter text to summarize:", height=200)
reference_text = ""
if show_rouge:
reference_text = st.text_area("Reference summary for ROUGE evaluation:", height=100)
# -------------------------------
# Generate Summary
# -------------------------------
if st.button("Generate Summary"):
if not api_token:
st.error("Please provide Hugging Face API token.")
elif not text.strip():
st.error("Please enter some text!")
else:
# Chunking
chunk_size = 500
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
full_summary = ""
for chunk in chunks:
summ = models[selected_model](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text']
full_summary += summ + " "
st.subheader(f"Summary ({selected_model}):")
st.write(full_summary)
# Translation
if target_lang != "None":
try:
if target_lang == "English":
translated = models['ur→en'](full_summary)[0]['translation_text']
else: # Urdu
translated = models['en→ur'](full_summary)[0]['translation_text']
st.subheader(f"Summary in {target_lang}:")
st.write(translated)
except Exception as e:
st.warning(f"Translation failed: {str(e)}")
# Model comparison
if show_comparison:
st.subheader("Comparison with other models:")
for model_name in ["urT5-base", "mT5-small", "mT5-base"]:
if model_name != selected_model:
comp_summary = ""
for chunk in chunks:
comp_summary += models[model_name](chunk, max_length=max_length, min_length=min_length, do_sample=False)[0]['summary_text'] + " "
st.markdown(f"**{model_name} Summary:** {comp_summary}")
# ROUGE Evaluation
if show_rouge and reference_text.strip():
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = scorer.score(reference_text, full_summary)
st.subheader("ROUGE Scores:")
for k, v in scores.items():
st.write(f"{k}: Precision: {v.precision:.3f}, Recall: {v.recall:.3f}, F1: {v.fmeasure:.3f}")
|