shegga's picture
πŸ—‘οΈ Remove API Endpoints page to simplify the application
9522fcb
"""
Vietnamese Sentiment Analysis - Modular Hugging Face Spaces App
Uses fine-tuned model and modular page structure
"""
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import time
import gc
import psutil
import os
import threading
import subprocess
import sys
# Import modular pages
from py.api_controller import create_api_controller
from py.pages import (
create_single_analysis_page,
create_batch_analysis_page,
create_model_info_page
)
# Global app instances
app_instance = None
api_controller = None
api_server_thread = None
class SentimentGradioApp:
def __init__(self):
# Always use the fine-tuned model
self.finetuned_model = "./vietnamese_sentiment_finetuned"
self.base_model = "5CD-AI/Vietnamese-Sentiment-visobert" # For initial fine-tuning
self.tokenizer = None
self.model = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.sentiment_labels = ["Negative", "Neutral", "Positive"]
self.model_loaded = False
self.max_memory_mb = 8192
self.current_model = None
def get_memory_usage(self):
"""Get current memory usage in MB"""
process = psutil.Process(os.getpid())
return process.memory_info().rss / 1024 / 1024
def cleanup_memory(self):
"""Clean up GPU and CPU memory"""
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
def run_fine_tuning_if_needed(self):
"""Run fine-tuning if the fine-tuned model doesn't exist"""
if os.path.exists(self.finetuned_model):
print(f"βœ… Fine-tuned model already exists at {self.finetuned_model}")
return True
print(f"πŸ”§ Fine-tuned model not found at {self.finetuned_model}")
print("πŸš€ Starting automatic fine-tuning process...")
try:
# Get the correct path to the fine-tuning script
current_dir = os.path.dirname(os.path.abspath(__file__))
fine_tune_script = os.path.join(current_dir, "py", "fine_tune_sentiment.py")
if not os.path.exists(fine_tune_script):
print(f"❌ Fine-tuning script not found at: {fine_tune_script}")
return False
print("πŸ“‹ Running fine_tune_sentiment.py...")
print(f"πŸ“ Script path: {fine_tune_script}")
# Run the fine-tuning script as a subprocess
result = subprocess.run([
sys.executable,
fine_tune_script
], capture_output=True, text=True, cwd=current_dir)
if result.returncode == 0:
print("βœ… Fine-tuning completed successfully!")
# Show only the last few lines of output to avoid spam
output_lines = result.stdout.strip().split('\n')
if output_lines:
print("πŸ“Š Final output:")
for line in output_lines[-5:]: # Show last 5 lines
print(f" {line}")
return True
else:
print(f"❌ Fine-tuning failed with error:")
print(result.stderr)
return False
except Exception as e:
print(f"❌ Error running fine-tuning: {e}")
return False
def load_model(self):
"""Load the fine-tuned model, creating it if needed"""
if self.model_loaded:
return True
print("🎯 Loading Vietnamese Sentiment Analysis Model")
# Step 1: Check if fine-tuned model exists, if not, create it
if not self.run_fine_tuning_if_needed():
print("❌ Failed to create fine-tuned model")
return False
# Step 2: Load the fine-tuned model
try:
self.cleanup_memory()
print(f"πŸ€– Loading fine-tuned model from: {self.finetuned_model}")
self.tokenizer = AutoTokenizer.from_pretrained(self.finetuned_model)
self.model = AutoModelForSequenceClassification.from_pretrained(self.finetuned_model)
self.model.to(self.device)
self.model.eval()
self.model_loaded = True
print(f"βœ… Fine-tuned model loaded successfully!")
self.current_model = self.finetuned_model
return True
except Exception as e:
print(f"❌ Error loading fine-tuned model: {e}")
print("πŸ”„ This should not happen if fine-tuning completed successfully")
self.model_loaded = False
return False
def predict_sentiment(self, text):
"""Predict sentiment for given text"""
if not self.model_loaded:
return None, "❌ Model not loaded. Please refresh the page."
if not text.strip():
return None, "❌ Please enter some text to analyze."
try:
self.cleanup_memory()
start_time = time.time()
# Tokenize input
inputs = self.tokenizer(
text.strip(),
truncation=True,
padding=True,
max_length=512,
return_tensors="pt"
).to(self.device)
# Get prediction
with torch.no_grad():
outputs = self.model(**inputs)
probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
predicted_class = torch.argmax(probabilities, dim=-1).item()
confidence = torch.max(probabilities).item()
inference_time = time.time() - start_time
# Move to CPU and clean GPU memory
probs = probabilities.cpu().numpy()[0].tolist()
del probabilities, outputs, inputs
self.cleanup_memory()
sentiment = self.sentiment_labels[predicted_class]
# Create formatted output
output_text = f"""
## 🎯 Sentiment Analysis Result
**Sentiment:** {sentiment}
**Confidence:** {confidence:.2%}
**Processing Time:** {inference_time:.3f}s
### πŸ“Š Probability Distribution:
- 😠 **Negative:** {probs[0]:.2%}
- 😐 **Neutral:** {probs[1]:.2%}
- 😊 **Positive:** {probs[2]:.2%}
### πŸ“ Input Text:
> "{text}"
---
*Analysis completed at {time.strftime('%Y-%m-%d %H:%M:%S')}*
*Memory usage: {self.get_memory_usage():.1f}MB*
""".strip()
return sentiment, output_text
except Exception as e:
self.cleanup_memory()
return None, f"❌ Error during prediction: {str(e)}"
def batch_predict(self, texts):
"""Predict sentiment for multiple texts"""
if not self.model_loaded:
return [], "❌ Model not loaded. Please refresh the page."
if not texts or not any(texts):
return [], "❌ Please enter some texts to analyze."
# Filter valid texts
valid_texts = [text.strip() for text in texts if text.strip()]
if len(valid_texts) > 10:
return [], "❌ Too many texts. Maximum 10 texts per batch for memory efficiency."
if not valid_texts:
return [], "❌ No valid texts provided."
try:
results = []
total_start_time = time.time()
for text in valid_texts:
sentiment, _ = self.predict_sentiment(text)
if sentiment:
results.append({
"text": text,
"sentiment": sentiment,
"confidence": 0.0, # Would need to extract from full output
"processing_time": 0.0 # Would need to extract from full output
})
total_time = time.time() - total_start_time
return results, None
except Exception as e:
self.cleanup_memory()
return [], f"❌ Error during batch prediction: {str(e)}"
def start_api_server():
"""Start the API server in a separate thread"""
global api_controller
if app_instance and api_controller is None:
try:
api_controller = create_api_controller(app_instance)
# Run API server on a different port to avoid conflicts
api_server_thread = threading.Thread(
target=api_controller.run,
kwargs={"host": "0.0.0.0", "port": 7861},
daemon=True
)
api_server_thread.start()
print("🌐 API server started on port 7861")
print("πŸ“š API Documentation: http://localhost:7861/docs")
except Exception as e:
print(f"❌ Failed to start API server: {e}")
def create_interface():
"""Create the Gradio interface for Hugging Face Spaces"""
global app_instance, api_controller
app_instance = SentimentGradioApp()
# Load model
if not app_instance.load_model():
print("❌ Failed to load model. Please try again.")
return None
# Start API server
start_api_server()
# Create the interface
with gr.Blocks(
title="Vietnamese Sentiment Analysis",
theme=gr.themes.Soft(),
css="""
.gradio-container {
max-width: 1200px !important;
margin: 0 auto !important;
}
.main-header {
text-align: center;
margin-bottom: 2rem;
}
"""
) as interface:
# Main title
gr.HTML("""
<div class="main-header">
<h1>🎭 Vietnamese Sentiment Analysis</h1>
<p>Analyze sentiment in Vietnamese text using transformer models from Hugging Face</p>
<p><strong>Current Model:</strong> {model_name} | <strong>Device:</strong> {device}</p>
</div>
""".format(
model_name=getattr(app_instance, 'current_model', app_instance.finetuned_model),
device=str(app_instance.device).upper()
))
# Create tabs
with gr.Tabs():
# Import and create all pages
create_single_analysis_page(app_instance)
create_batch_analysis_page(app_instance)
create_model_info_page(app_instance)
return interface
# Create and launch the interface
if __name__ == "__main__":
print("πŸš€ Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...")
interface = create_interface()
if interface is None:
print("❌ Failed to create interface. Exiting.")
exit(1)
print("βœ… Interface created successfully!")
print("🌐 Launching web interface...")
# Launch the interface
interface.launch(
share=False,
show_error=True,
quiet=False
)