🎭 Vietnamese Sentiment Analysis
Analyze sentiment in Vietnamese text using transformer models from Hugging Face
Current Model: {model_name} | Device: {device}
""" Vietnamese Sentiment Analysis - Modular Hugging Face Spaces App Uses fine-tuned model and modular page structure """ import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification import time import gc import psutil import os import threading import subprocess import sys # Import modular pages from py.api_controller import create_api_controller from py.pages import ( create_single_analysis_page, create_batch_analysis_page, create_model_info_page ) # Global app instances app_instance = None api_controller = None api_server_thread = None class SentimentGradioApp: def __init__(self): # Always use the fine-tuned model self.finetuned_model = "./vietnamese_sentiment_finetuned" self.base_model = "5CD-AI/Vietnamese-Sentiment-visobert" # For initial fine-tuning self.tokenizer = None self.model = None self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.sentiment_labels = ["Negative", "Neutral", "Positive"] self.model_loaded = False self.max_memory_mb = 8192 self.current_model = None def get_memory_usage(self): """Get current memory usage in MB""" process = psutil.Process(os.getpid()) return process.memory_info().rss / 1024 / 1024 def cleanup_memory(self): """Clean up GPU and CPU memory""" if torch.cuda.is_available(): torch.cuda.empty_cache() gc.collect() def run_fine_tuning_if_needed(self): """Run fine-tuning if the fine-tuned model doesn't exist""" if os.path.exists(self.finetuned_model): print(f"✅ Fine-tuned model already exists at {self.finetuned_model}") return True print(f"🔧 Fine-tuned model not found at {self.finetuned_model}") print("🚀 Starting automatic fine-tuning process...") try: # Get the correct path to the fine-tuning script current_dir = os.path.dirname(os.path.abspath(__file__)) fine_tune_script = os.path.join(current_dir, "py", "fine_tune_sentiment.py") if not os.path.exists(fine_tune_script): print(f"❌ Fine-tuning script not found at: {fine_tune_script}") return False print("📋 Running fine_tune_sentiment.py...") print(f"📁 Script path: {fine_tune_script}") # Run the fine-tuning script as a subprocess result = subprocess.run([ sys.executable, fine_tune_script ], capture_output=True, text=True, cwd=current_dir) if result.returncode == 0: print("✅ Fine-tuning completed successfully!") # Show only the last few lines of output to avoid spam output_lines = result.stdout.strip().split('\n') if output_lines: print("📊 Final output:") for line in output_lines[-5:]: # Show last 5 lines print(f" {line}") return True else: print(f"❌ Fine-tuning failed with error:") print(result.stderr) return False except Exception as e: print(f"❌ Error running fine-tuning: {e}") return False def load_model(self): """Load the fine-tuned model, creating it if needed""" if self.model_loaded: return True print("🎯 Loading Vietnamese Sentiment Analysis Model") # Step 1: Check if fine-tuned model exists, if not, create it if not self.run_fine_tuning_if_needed(): print("❌ Failed to create fine-tuned model") return False # Step 2: Load the fine-tuned model try: self.cleanup_memory() print(f"🤖 Loading fine-tuned model from: {self.finetuned_model}") self.tokenizer = AutoTokenizer.from_pretrained(self.finetuned_model) self.model = AutoModelForSequenceClassification.from_pretrained(self.finetuned_model) self.model.to(self.device) self.model.eval() self.model_loaded = True print(f"✅ Fine-tuned model loaded successfully!") self.current_model = self.finetuned_model return True except Exception as e: print(f"❌ Error loading fine-tuned model: {e}") print("🔄 This should not happen if fine-tuning completed successfully") self.model_loaded = False return False def predict_sentiment(self, text): """Predict sentiment for given text""" if not self.model_loaded: return None, "❌ Model not loaded. Please refresh the page." if not text.strip(): return None, "❌ Please enter some text to analyze." try: self.cleanup_memory() start_time = time.time() # Tokenize input inputs = self.tokenizer( text.strip(), truncation=True, padding=True, max_length=512, return_tensors="pt" ).to(self.device) # Get prediction with torch.no_grad(): outputs = self.model(**inputs) probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) predicted_class = torch.argmax(probabilities, dim=-1).item() confidence = torch.max(probabilities).item() inference_time = time.time() - start_time # Move to CPU and clean GPU memory probs = probabilities.cpu().numpy()[0].tolist() del probabilities, outputs, inputs self.cleanup_memory() sentiment = self.sentiment_labels[predicted_class] # Create formatted output output_text = f""" ## 🎯 Sentiment Analysis Result **Sentiment:** {sentiment} **Confidence:** {confidence:.2%} **Processing Time:** {inference_time:.3f}s ### 📊 Probability Distribution: - 😠 **Negative:** {probs[0]:.2%} - 😐 **Neutral:** {probs[1]:.2%} - 😊 **Positive:** {probs[2]:.2%} ### 📝 Input Text: > "{text}" --- *Analysis completed at {time.strftime('%Y-%m-%d %H:%M:%S')}* *Memory usage: {self.get_memory_usage():.1f}MB* """.strip() return sentiment, output_text except Exception as e: self.cleanup_memory() return None, f"❌ Error during prediction: {str(e)}" def batch_predict(self, texts): """Predict sentiment for multiple texts""" if not self.model_loaded: return [], "❌ Model not loaded. Please refresh the page." if not texts or not any(texts): return [], "❌ Please enter some texts to analyze." # Filter valid texts valid_texts = [text.strip() for text in texts if text.strip()] if len(valid_texts) > 10: return [], "❌ Too many texts. Maximum 10 texts per batch for memory efficiency." if not valid_texts: return [], "❌ No valid texts provided." try: results = [] total_start_time = time.time() for text in valid_texts: sentiment, _ = self.predict_sentiment(text) if sentiment: results.append({ "text": text, "sentiment": sentiment, "confidence": 0.0, # Would need to extract from full output "processing_time": 0.0 # Would need to extract from full output }) total_time = time.time() - total_start_time return results, None except Exception as e: self.cleanup_memory() return [], f"❌ Error during batch prediction: {str(e)}" def start_api_server(): """Start the API server in a separate thread""" global api_controller if app_instance and api_controller is None: try: api_controller = create_api_controller(app_instance) # Run API server on a different port to avoid conflicts api_server_thread = threading.Thread( target=api_controller.run, kwargs={"host": "0.0.0.0", "port": 7861}, daemon=True ) api_server_thread.start() print("🌐 API server started on port 7861") print("📚 API Documentation: http://localhost:7861/docs") except Exception as e: print(f"❌ Failed to start API server: {e}") def create_interface(): """Create the Gradio interface for Hugging Face Spaces""" global app_instance, api_controller app_instance = SentimentGradioApp() # Load model if not app_instance.load_model(): print("❌ Failed to load model. Please try again.") return None # Start API server start_api_server() # Create the interface with gr.Blocks( title="Vietnamese Sentiment Analysis", theme=gr.themes.Soft(), css=""" .gradio-container { max-width: 1200px !important; margin: 0 auto !important; } .main-header { text-align: center; margin-bottom: 2rem; } """ ) as interface: # Main title gr.HTML("""
Analyze sentiment in Vietnamese text using transformer models from Hugging Face
Current Model: {model_name} | Device: {device}