Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Vietnamese Sentiment Analysis - Hugging Face Spaces Gradio App | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import time | |
| import numpy as np | |
| from datetime import datetime | |
| import gc | |
| import psutil | |
| import os | |
| import pandas as pd | |
| class SentimentGradioApp: | |
| def __init__(self, model_name="5CD-AI/Vietnamese-Sentiment-visobert", max_batch_size=10): | |
| self.model_name = model_name | |
| self.tokenizer = None | |
| self.model = None | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.sentiment_labels = ["Negative", "Neutral", "Positive"] | |
| self.sentiment_colors = { | |
| "Negative": "#ff4444", | |
| "Neutral": "#ffaa00", | |
| "Positive": "#44ff44" | |
| } | |
| self.model_loaded = False | |
| self.max_batch_size = max_batch_size | |
| self.max_memory_mb = 8192 # Hugging Face Spaces memory limit | |
| def get_memory_usage(self): | |
| """Get current memory usage in MB""" | |
| process = psutil.Process(os.getpid()) | |
| return process.memory_info().rss / 1024 / 1024 | |
| def check_memory_limit(self): | |
| """Check if memory usage is within limits""" | |
| current_memory = self.get_memory_usage() | |
| if current_memory > self.max_memory_mb: | |
| return False, f"Memory usage ({current_memory:.1f}MB) exceeds limit ({self.max_memory_mb}MB)" | |
| return True, f"Memory usage: {current_memory:.1f}MB" | |
| def cleanup_memory(self): | |
| """Clean up GPU and CPU memory""" | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| def load_model(self): | |
| """Load the model from Hugging Face Hub""" | |
| if self.model_loaded: | |
| return True | |
| try: | |
| # Clean up any existing memory | |
| self.cleanup_memory() | |
| # Check memory before loading | |
| memory_ok, memory_msg = self.check_memory_limit() | |
| if not memory_ok: | |
| print(f"β {memory_msg}") | |
| return False | |
| print(f"π {memory_msg}") | |
| print(f"π€ Loading model from Hugging Face Hub: {self.model_name}") | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
| self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name) | |
| self.model.to(self.device) | |
| self.model.eval() | |
| self.model_loaded = True | |
| # Check memory after loading | |
| memory_ok, memory_msg = self.check_memory_limit() | |
| print(f"β Model loaded successfully from {self.model_name}") | |
| print(f"π {memory_msg}") | |
| return True | |
| except Exception as e: | |
| print(f"β Error loading model: {e}") | |
| self.model_loaded = False | |
| self.cleanup_memory() | |
| return False | |
| def predict_sentiment(self, text): | |
| """Predict sentiment for given text""" | |
| if not self.model_loaded: | |
| return None, "β Model not loaded. Please refresh the page." | |
| if not text.strip(): | |
| return None, "β Please enter some text to analyze." | |
| try: | |
| # Check memory before prediction | |
| memory_ok, memory_msg = self.check_memory_limit() | |
| if not memory_ok: | |
| return None, f"β {memory_msg}" | |
| start_time = time.time() | |
| # Tokenize | |
| inputs = self.tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding=True, | |
| max_length=512 | |
| ) | |
| # Move to device | |
| inputs = {k: v.to(self.device) for k, v in inputs.items()} | |
| # Predict | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| logits = outputs.logits | |
| probabilities = torch.softmax(logits, dim=-1) | |
| predicted_class = torch.argmax(probabilities, dim=-1).item() | |
| confidence = torch.max(probabilities).item() | |
| inference_time = time.time() - start_time | |
| # Move to CPU and clean GPU memory | |
| probs = probabilities.cpu().numpy()[0].tolist() | |
| del probabilities, logits, outputs | |
| self.cleanup_memory() | |
| sentiment = self.sentiment_labels[predicted_class] | |
| # Create detailed results | |
| result = { | |
| "sentiment": sentiment, | |
| "confidence": confidence, | |
| "probabilities": { | |
| "Negative": probs[0], | |
| "Neutral": probs[1], | |
| "Positive": probs[2] | |
| }, | |
| "inference_time": inference_time, | |
| "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| } | |
| # Create formatted output | |
| output_text = f""" | |
| ## π― Sentiment Analysis Result | |
| **Sentiment:** {sentiment} | |
| **Confidence:** {confidence:.2%} | |
| **Processing Time:** {inference_time:.3f}s | |
| ### π Probability Distribution: | |
| - π **Negative:** {probs[0]:.2%} | |
| - π **Neutral:** {probs[1]:.2%} | |
| - π **Positive:** {probs[2]:.2%} | |
| ### π Input Text: | |
| > "{text}" | |
| --- | |
| *Analysis completed at {result['timestamp']}* | |
| *{memory_msg}* | |
| """.strip() | |
| return result, output_text | |
| except Exception as e: | |
| self.cleanup_memory() | |
| return None, f"β Error during prediction: {str(e)}" | |
| def batch_predict(self, texts): | |
| """Predict sentiment for multiple texts with memory management""" | |
| if not self.model_loaded: | |
| return [], "β Model not loaded. Please refresh the page." | |
| if not texts or not any(texts): | |
| return [], "β Please enter some texts to analyze." | |
| # Filter valid texts and apply batch size limit | |
| valid_texts = [text.strip() for text in texts if text.strip()] | |
| if len(valid_texts) > self.max_batch_size: | |
| return [], f"β Too many texts ({len(valid_texts)}). Maximum batch size is {self.max_batch_size} for memory efficiency." | |
| if not valid_texts: | |
| return [], "β No valid texts provided." | |
| # Check memory before batch processing | |
| memory_ok, memory_msg = self.check_memory_limit() | |
| if not memory_ok: | |
| return [], f"β {memory_msg}" | |
| results = [] | |
| try: | |
| for i, text in enumerate(valid_texts): | |
| # Check memory every 5 predictions | |
| if i % 5 == 0: | |
| memory_ok, memory_msg = self.check_memory_limit() | |
| if not memory_ok: | |
| break | |
| result, _ = self.predict_sentiment(text) | |
| if result: | |
| results.append(result) | |
| if not results: | |
| return [], "β No valid predictions made." | |
| # Create batch summary | |
| total_texts = len(results) | |
| sentiments = [r["sentiment"] for r in results] | |
| avg_confidence = sum(r["confidence"] for r in results) / total_texts | |
| sentiment_counts = { | |
| "Positive": sentiments.count("Positive"), | |
| "Neutral": sentiments.count("Neutral"), | |
| "Negative": sentiments.count("Negative") | |
| } | |
| summary = f""" | |
| ## π Batch Analysis Summary | |
| **Total Texts Analyzed:** {total_texts}/{len(valid_texts)} | |
| **Average Confidence:** {avg_confidence:.2%} | |
| **Memory Used:** {self.get_memory_usage():.1f}MB | |
| ### π― Sentiment Distribution: | |
| - π **Positive:** {sentiment_counts['Positive']} ({sentiment_counts['Positive']/total_texts:.1%}) | |
| - π **Neutral:** {sentiment_counts['Neutral']} ({sentiment_counts['Neutral']/total_texts:.1%}) | |
| - π **Negative:** {sentiment_counts['Negative']} ({sentiment_counts['Negative']/total_texts:.1%}) | |
| ### π Individual Results: | |
| """.strip() | |
| for i, result in enumerate(results, 1): | |
| summary += f"\n**{i}.** {result['sentiment']} ({result['confidence']:.1%})" | |
| # Final memory cleanup | |
| self.cleanup_memory() | |
| return results, summary | |
| except Exception as e: | |
| self.cleanup_memory() | |
| return [], f"β Error during batch processing: {str(e)}" | |
| def create_interface(): | |
| """Create the Gradio interface for Hugging Face Spaces""" | |
| app = SentimentGradioApp() | |
| # Load model | |
| if not app.load_model(): | |
| print("β Failed to load model. Please try again.") | |
| return None | |
| # Example texts | |
| examples = [ | |
| "GiαΊ£ng viΓͺn dαΊ‘y rαΊ₯t hay vΓ tΓ’m huyαΊΏt.", | |
| "MΓ΄n hα»c nΓ y quΓ‘ khΓ³ vΓ nhΓ m chΓ‘n.", | |
| "Lα»p hα»c α»n Δα»nh, khΓ΄ng cΓ³ gΓ¬ ΔαΊ·c biα»t.", | |
| "TΓ΄i rαΊ₯t thΓch cΓ‘ch giαΊ£ng dαΊ‘y cα»§a thαΊ§y cΓ΄.", | |
| "ChΖ°Ζ‘ng trΓ¬nh hα»c cαΊ§n cαΊ£i thiα»n nhiα»u." | |
| ] | |
| # Custom CSS | |
| css = """ | |
| .gradio-container { | |
| max-width: 900px !important; | |
| margin: auto !important; | |
| } | |
| .sentiment-positive { | |
| color: #44ff44; | |
| font-weight: bold; | |
| } | |
| .sentiment-neutral { | |
| color: #ffaa00; | |
| font-weight: bold; | |
| } | |
| .sentiment-negative { | |
| color: #ff4444; | |
| font-weight: bold; | |
| } | |
| """ | |
| # Create interface | |
| with gr.Blocks( | |
| title="Vietnamese Sentiment Analysis", | |
| theme=gr.themes.Soft(), | |
| css=css | |
| ) as interface: | |
| gr.Markdown("# π Vietnamese Sentiment Analysis") | |
| gr.Markdown("Enter Vietnamese text to analyze sentiment using a transformer model from Hugging Face.") | |
| with gr.Tabs(): | |
| # Single Text Analysis Tab | |
| with gr.Tab("π Single Text Analysis"): | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| text_input = gr.Textbox( | |
| label="Enter Vietnamese Text", | |
| placeholder="Type or paste Vietnamese text here...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("π Analyze Sentiment", variant="primary") | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
| with gr.Column(scale=2): | |
| gr.Examples( | |
| examples=examples, | |
| inputs=[text_input], | |
| label="π‘ Example Texts" | |
| ) | |
| result_output = gr.Markdown(label="Analysis Result", visible=True) | |
| confidence_plot = gr.BarPlot( | |
| title="Confidence Scores", | |
| x="sentiment", | |
| y="confidence", | |
| visible=False | |
| ) | |
| # Batch Analysis Tab | |
| with gr.Tab("π Batch Analysis"): | |
| gr.Markdown(f"### π Memory-Efficient Batch Processing") | |
| gr.Markdown(f"**Maximum batch size:** {app.max_batch_size} texts (for memory efficiency)") | |
| gr.Markdown(f"**Memory limit:** {app.max_memory_mb}MB") | |
| batch_input = gr.Textbox( | |
| label="Enter Multiple Texts (one per line)", | |
| placeholder=f"Enter up to {app.max_batch_size} Vietnamese texts, one per line...", | |
| lines=8, | |
| max_lines=20 | |
| ) | |
| with gr.Row(): | |
| batch_analyze_btn = gr.Button("π Analyze All", variant="primary") | |
| batch_clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
| memory_cleanup_btn = gr.Button("π§Ή Memory Cleanup", variant="secondary") | |
| batch_result_output = gr.Markdown(label="Batch Analysis Result") | |
| memory_info = gr.Textbox( | |
| label="Memory Usage", | |
| value=f"{app.get_memory_usage():.1f}MB used", | |
| interactive=False | |
| ) | |
| # Model Info Tab | |
| with gr.Tab("βΉοΈ Model Information"): | |
| gr.Markdown(f""" | |
| ## π€ Model Details | |
| **Model Architecture:** Transformer-based sequence classification | |
| **Base Model:** {app.model_name} | |
| **Languages:** Vietnamese (optimized) | |
| **Labels:** Negative, Neutral, Positive | |
| **Max Batch Size:** {app.max_batch_size} texts | |
| ## π Performance Metrics | |
| - **Processing Speed:** ~100ms per text | |
| - **Max Sequence Length:** 512 tokens | |
| - **Memory Limit:** {app.max_memory_mb}MB | |
| ## π‘ Usage Tips | |
| - Enter clear, grammatically correct Vietnamese text | |
| - Longer texts (20-200 words) work best | |
| - The model handles various Vietnamese dialects | |
| - Confidence scores indicate prediction certainty | |
| ## π‘οΈ Memory Management | |
| - **Automatic Cleanup:** Memory is cleaned after each prediction | |
| - **Batch Limits:** Maximum {app.max_batch_size} texts per batch to prevent overflow | |
| - **Memory Monitoring:** Real-time memory usage tracking | |
| - **GPU Optimization:** CUDA cache clearing when available | |
| ## β οΈ Performance Notes | |
| - If you encounter memory errors, try reducing batch size | |
| - Use the Memory Cleanup button if needed | |
| - Monitor memory usage in the Batch Analysis tab | |
| - Model loaded directly from Hugging Face Hub (no local training required) | |
| """) | |
| # Event handlers | |
| def analyze_text(text): | |
| result, output = app.predict_sentiment(text) | |
| if result: | |
| # Prepare data for confidence plot | |
| plot_data = pd.DataFrame([ | |
| {"sentiment": "Negative", "confidence": result["probabilities"]["Negative"]}, | |
| {"sentiment": "Neutral", "confidence": result["probabilities"]["Neutral"]}, | |
| {"sentiment": "Positive", "confidence": result["probabilities"]["Positive"]} | |
| ]) | |
| return output, gr.BarPlot(visible=True, value=plot_data) | |
| else: | |
| return output, gr.BarPlot(visible=False) | |
| def clear_inputs(): | |
| return "", "", gr.BarPlot(visible=False) | |
| def analyze_batch(texts): | |
| if texts: | |
| text_list = [line.strip() for line in texts.split('\n') if line.strip()] | |
| results, summary = app.batch_predict(text_list) | |
| return summary | |
| return "β Please enter some texts to analyze." | |
| def clear_batch(): | |
| return "" | |
| def update_memory_info(): | |
| return f"{app.get_memory_usage():.1f}MB used" | |
| def manual_memory_cleanup(): | |
| app.cleanup_memory() | |
| return f"Memory cleaned. Current usage: {app.get_memory_usage():.1f}MB" | |
| # Connect events | |
| analyze_btn.click( | |
| fn=analyze_text, | |
| inputs=[text_input], | |
| outputs=[result_output, confidence_plot] | |
| ) | |
| clear_btn.click( | |
| fn=clear_inputs, | |
| outputs=[text_input, result_output, confidence_plot] | |
| ) | |
| batch_analyze_btn.click( | |
| fn=analyze_batch, | |
| inputs=[batch_input], | |
| outputs=[batch_result_output] | |
| ) | |
| batch_clear_btn.click( | |
| fn=clear_batch, | |
| outputs=[batch_input] | |
| ) | |
| memory_cleanup_btn.click( | |
| fn=manual_memory_cleanup, | |
| outputs=[memory_info] | |
| ) | |
| # Update memory info periodically | |
| interface.load( | |
| fn=update_memory_info, | |
| outputs=[memory_info] | |
| ) | |
| return interface | |
| # Create and launch the interface | |
| if __name__ == "__main__": | |
| print("π Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...") | |
| interface = create_interface() | |
| if interface is None: | |
| print("β Failed to create interface. Exiting.") | |
| exit(1) | |
| print("β Interface created successfully!") | |
| print("π Launching web interface...") | |
| # Launch the interface | |
| interface.launch( | |
| share=True, | |
| show_error=True, | |
| quiet=False | |
| ) |