Spaces:
Runtime error
Runtime error
| """ | |
| Vietnamese Sentiment Analysis - Modular Hugging Face Spaces App | |
| Uses fine-tuned model and modular page structure | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import time | |
| import gc | |
| import psutil | |
| import os | |
| import threading | |
| import subprocess | |
| import sys | |
| # Import modular pages | |
| from py.api_controller import create_api_controller | |
| from py.pages import ( | |
| create_single_analysis_page, | |
| create_batch_analysis_page, | |
| create_model_info_page | |
| ) | |
| # Global app instances | |
| app_instance = None | |
| api_controller = None | |
| api_server_thread = None | |
| class SentimentGradioApp: | |
| def __init__(self): | |
| # Always use the fine-tuned model | |
| self.finetuned_model = "./vietnamese_sentiment_finetuned" | |
| self.base_model = "5CD-AI/Vietnamese-Sentiment-visobert" # For initial fine-tuning | |
| self.tokenizer = None | |
| self.model = None | |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.sentiment_labels = ["Negative", "Neutral", "Positive"] | |
| self.model_loaded = False | |
| self.max_memory_mb = 8192 | |
| self.current_model = None | |
| def get_memory_usage(self): | |
| """Get current memory usage in MB""" | |
| process = psutil.Process(os.getpid()) | |
| return process.memory_info().rss / 1024 / 1024 | |
| def cleanup_memory(self): | |
| """Clean up GPU and CPU memory""" | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| def run_fine_tuning_if_needed(self): | |
| """Run fine-tuning if the fine-tuned model doesn't exist""" | |
| if os.path.exists(self.finetuned_model): | |
| print(f"β Fine-tuned model already exists at {self.finetuned_model}") | |
| return True | |
| print(f"π§ Fine-tuned model not found at {self.finetuned_model}") | |
| print("π Starting automatic fine-tuning process...") | |
| try: | |
| # Get the correct path to the fine-tuning script | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| fine_tune_script = os.path.join(current_dir, "py", "fine_tune_sentiment.py") | |
| if not os.path.exists(fine_tune_script): | |
| print(f"β Fine-tuning script not found at: {fine_tune_script}") | |
| return False | |
| print("π Running fine_tune_sentiment.py...") | |
| print(f"π Script path: {fine_tune_script}") | |
| # Run the fine-tuning script as a subprocess | |
| result = subprocess.run([ | |
| sys.executable, | |
| fine_tune_script | |
| ], capture_output=True, text=True, cwd=current_dir) | |
| if result.returncode == 0: | |
| print("β Fine-tuning completed successfully!") | |
| # Show only the last few lines of output to avoid spam | |
| output_lines = result.stdout.strip().split('\n') | |
| if output_lines: | |
| print("π Final output:") | |
| for line in output_lines[-5:]: # Show last 5 lines | |
| print(f" {line}") | |
| return True | |
| else: | |
| print(f"β Fine-tuning failed with error:") | |
| print(result.stderr) | |
| return False | |
| except Exception as e: | |
| print(f"β Error running fine-tuning: {e}") | |
| return False | |
| def load_model(self): | |
| """Load the fine-tuned model, creating it if needed""" | |
| if self.model_loaded: | |
| return True | |
| print("π― Loading Vietnamese Sentiment Analysis Model") | |
| # Step 1: Check if fine-tuned model exists, if not, create it | |
| if not self.run_fine_tuning_if_needed(): | |
| print("β Failed to create fine-tuned model") | |
| return False | |
| # Step 2: Load the fine-tuned model | |
| try: | |
| self.cleanup_memory() | |
| print(f"π€ Loading fine-tuned model from: {self.finetuned_model}") | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.finetuned_model) | |
| self.model = AutoModelForSequenceClassification.from_pretrained(self.finetuned_model) | |
| self.model.to(self.device) | |
| self.model.eval() | |
| self.model_loaded = True | |
| print(f"β Fine-tuned model loaded successfully!") | |
| self.current_model = self.finetuned_model | |
| return True | |
| except Exception as e: | |
| print(f"β Error loading fine-tuned model: {e}") | |
| print("π This should not happen if fine-tuning completed successfully") | |
| self.model_loaded = False | |
| return False | |
| def predict_sentiment(self, text): | |
| """Predict sentiment for given text""" | |
| if not self.model_loaded: | |
| return None, "β Model not loaded. Please refresh the page." | |
| if not text.strip(): | |
| return None, "β Please enter some text to analyze." | |
| try: | |
| self.cleanup_memory() | |
| start_time = time.time() | |
| # Tokenize input | |
| inputs = self.tokenizer( | |
| text.strip(), | |
| truncation=True, | |
| padding=True, | |
| max_length=512, | |
| return_tensors="pt" | |
| ).to(self.device) | |
| # Get prediction | |
| with torch.no_grad(): | |
| outputs = self.model(**inputs) | |
| probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| predicted_class = torch.argmax(probabilities, dim=-1).item() | |
| confidence = torch.max(probabilities).item() | |
| inference_time = time.time() - start_time | |
| # Move to CPU and clean GPU memory | |
| probs = probabilities.cpu().numpy()[0].tolist() | |
| del probabilities, outputs, inputs | |
| self.cleanup_memory() | |
| sentiment = self.sentiment_labels[predicted_class] | |
| # Create formatted output | |
| output_text = f""" | |
| ## π― Sentiment Analysis Result | |
| **Sentiment:** {sentiment} | |
| **Confidence:** {confidence:.2%} | |
| **Processing Time:** {inference_time:.3f}s | |
| ### π Probability Distribution: | |
| - π **Negative:** {probs[0]:.2%} | |
| - π **Neutral:** {probs[1]:.2%} | |
| - π **Positive:** {probs[2]:.2%} | |
| ### π Input Text: | |
| > "{text}" | |
| --- | |
| *Analysis completed at {time.strftime('%Y-%m-%d %H:%M:%S')}* | |
| *Memory usage: {self.get_memory_usage():.1f}MB* | |
| """.strip() | |
| return sentiment, output_text | |
| except Exception as e: | |
| self.cleanup_memory() | |
| return None, f"β Error during prediction: {str(e)}" | |
| def batch_predict(self, texts): | |
| """Predict sentiment for multiple texts""" | |
| if not self.model_loaded: | |
| return [], "β Model not loaded. Please refresh the page." | |
| if not texts or not any(texts): | |
| return [], "β Please enter some texts to analyze." | |
| # Filter valid texts | |
| valid_texts = [text.strip() for text in texts if text.strip()] | |
| if len(valid_texts) > 10: | |
| return [], "β Too many texts. Maximum 10 texts per batch for memory efficiency." | |
| if not valid_texts: | |
| return [], "β No valid texts provided." | |
| try: | |
| results = [] | |
| total_start_time = time.time() | |
| for text in valid_texts: | |
| sentiment, _ = self.predict_sentiment(text) | |
| if sentiment: | |
| results.append({ | |
| "text": text, | |
| "sentiment": sentiment, | |
| "confidence": 0.0, # Would need to extract from full output | |
| "processing_time": 0.0 # Would need to extract from full output | |
| }) | |
| total_time = time.time() - total_start_time | |
| return results, None | |
| except Exception as e: | |
| self.cleanup_memory() | |
| return [], f"β Error during batch prediction: {str(e)}" | |
| def start_api_server(): | |
| """Start the API server in a separate thread""" | |
| global api_controller | |
| if app_instance and api_controller is None: | |
| try: | |
| api_controller = create_api_controller(app_instance) | |
| # Run API server on a different port to avoid conflicts | |
| api_server_thread = threading.Thread( | |
| target=api_controller.run, | |
| kwargs={"host": "0.0.0.0", "port": 7861}, | |
| daemon=True | |
| ) | |
| api_server_thread.start() | |
| print("π API server started on port 7861") | |
| print("π API Documentation: http://localhost:7861/docs") | |
| except Exception as e: | |
| print(f"β Failed to start API server: {e}") | |
| def create_interface(): | |
| """Create the Gradio interface for Hugging Face Spaces""" | |
| global app_instance, api_controller | |
| app_instance = SentimentGradioApp() | |
| # Load model | |
| if not app_instance.load_model(): | |
| print("β Failed to load model. Please try again.") | |
| return None | |
| # Start API server | |
| start_api_server() | |
| # Create the interface | |
| with gr.Blocks( | |
| title="Vietnamese Sentiment Analysis", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: 0 auto !important; | |
| } | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| """ | |
| ) as interface: | |
| # Main title | |
| gr.HTML(""" | |
| <div class="main-header"> | |
| <h1>π Vietnamese Sentiment Analysis</h1> | |
| <p>Analyze sentiment in Vietnamese text using transformer models from Hugging Face</p> | |
| <p><strong>Current Model:</strong> {model_name} | <strong>Device:</strong> {device}</p> | |
| </div> | |
| """.format( | |
| model_name=getattr(app_instance, 'current_model', app_instance.finetuned_model), | |
| device=str(app_instance.device).upper() | |
| )) | |
| # Create tabs | |
| with gr.Tabs(): | |
| # Import and create all pages | |
| create_single_analysis_page(app_instance) | |
| create_batch_analysis_page(app_instance) | |
| create_model_info_page(app_instance) | |
| return interface | |
| # Create and launch the interface | |
| if __name__ == "__main__": | |
| print("π Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...") | |
| interface = create_interface() | |
| if interface is None: | |
| print("β Failed to create interface. Exiting.") | |
| exit(1) | |
| print("β Interface created successfully!") | |
| print("π Launching web interface...") | |
| # Launch the interface | |
| interface.launch( | |
| share=False, | |
| show_error=True, | |
| quiet=False | |
| ) |