Spaces:

shegga
/

SentimentAnalysisForNMTTNT

Runtime error

App Files Files Community

SentimentAnalysisForNMTTNT / app.py

shegga

🗑️ Remove API Endpoints page to simplify the application

9522fcb 21 days ago

raw

history blame contribute delete

10.8 kB

	"""
	Vietnamese Sentiment Analysis - Modular Hugging Face Spaces App
	Uses fine-tuned model and modular page structure
	"""

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import time
	import gc
	import psutil
	import os
	import threading
	import subprocess
	import sys

	# Import modular pages
	from py.api_controller import create_api_controller
	from py.pages import (
	create_single_analysis_page,
	create_batch_analysis_page,
	create_model_info_page
	)

	# Global app instances
	app_instance = None
	api_controller = None
	api_server_thread = None

	class SentimentGradioApp:
	def __init__(self):
	# Always use the fine-tuned model
	self.finetuned_model = "./vietnamese_sentiment_finetuned"
	self.base_model = "5CD-AI/Vietnamese-Sentiment-visobert" # For initial fine-tuning
	self.tokenizer = None
	self.model = None
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.sentiment_labels = ["Negative", "Neutral", "Positive"]
	self.model_loaded = False
	self.max_memory_mb = 8192
	self.current_model = None

	def get_memory_usage(self):
	"""Get current memory usage in MB"""
	process = psutil.Process(os.getpid())
	return process.memory_info().rss / 1024 / 1024

	def cleanup_memory(self):
	"""Clean up GPU and CPU memory"""
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	def run_fine_tuning_if_needed(self):
	"""Run fine-tuning if the fine-tuned model doesn't exist"""
	if os.path.exists(self.finetuned_model):
	print(f"✅ Fine-tuned model already exists at {self.finetuned_model}")
	return True

	print(f"🔧 Fine-tuned model not found at {self.finetuned_model}")
	print("🚀 Starting automatic fine-tuning process...")

	try:
	# Get the correct path to the fine-tuning script
	current_dir = os.path.dirname(os.path.abspath(__file__))
	fine_tune_script = os.path.join(current_dir, "py", "fine_tune_sentiment.py")

	if not os.path.exists(fine_tune_script):
	print(f"❌ Fine-tuning script not found at: {fine_tune_script}")
	return False

	print("📋 Running fine_tune_sentiment.py...")
	print(f"📁 Script path: {fine_tune_script}")

	# Run the fine-tuning script as a subprocess
	result = subprocess.run([
	sys.executable,
	fine_tune_script
	], capture_output=True, text=True, cwd=current_dir)

	if result.returncode == 0:
	print("✅ Fine-tuning completed successfully!")
	# Show only the last few lines of output to avoid spam
	output_lines = result.stdout.strip().split('\n')
	if output_lines:
	print("📊 Final output:")
	for line in output_lines[-5:]: # Show last 5 lines
	print(f" {line}")
	return True
	else:
	print(f"❌ Fine-tuning failed with error:")
	print(result.stderr)
	return False

	except Exception as e:
	print(f"❌ Error running fine-tuning: {e}")
	return False

	def load_model(self):
	"""Load the fine-tuned model, creating it if needed"""
	if self.model_loaded:
	return True

	print("🎯 Loading Vietnamese Sentiment Analysis Model")

	# Step 1: Check if fine-tuned model exists, if not, create it
	if not self.run_fine_tuning_if_needed():
	print("❌ Failed to create fine-tuned model")
	return False

	# Step 2: Load the fine-tuned model
	try:
	self.cleanup_memory()
	print(f"🤖 Loading fine-tuned model from: {self.finetuned_model}")

	self.tokenizer = AutoTokenizer.from_pretrained(self.finetuned_model)
	self.model = AutoModelForSequenceClassification.from_pretrained(self.finetuned_model)

	self.model.to(self.device)
	self.model.eval()
	self.model_loaded = True

	print(f"✅ Fine-tuned model loaded successfully!")
	self.current_model = self.finetuned_model
	return True

	except Exception as e:
	print(f"❌ Error loading fine-tuned model: {e}")
	print("🔄 This should not happen if fine-tuning completed successfully")
	self.model_loaded = False
	return False

	def predict_sentiment(self, text):
	"""Predict sentiment for given text"""
	if not self.model_loaded:
	return None, "❌ Model not loaded. Please refresh the page."

	if not text.strip():
	return None, "❌ Please enter some text to analyze."

	try:
	self.cleanup_memory()
	start_time = time.time()

	# Tokenize input
	inputs = self.tokenizer(
	text.strip(),
	truncation=True,
	padding=True,
	max_length=512,
	return_tensors="pt"
	).to(self.device)

	# Get prediction
	with torch.no_grad():
	outputs = self.model(**inputs)
	probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
	predicted_class = torch.argmax(probabilities, dim=-1).item()
	confidence = torch.max(probabilities).item()

	inference_time = time.time() - start_time

	# Move to CPU and clean GPU memory
	probs = probabilities.cpu().numpy()[0].tolist()
	del probabilities, outputs, inputs
	self.cleanup_memory()

	sentiment = self.sentiment_labels[predicted_class]

	# Create formatted output
	output_text = f"""
	## 🎯 Sentiment Analysis Result

	Sentiment: {sentiment}
	Confidence: {confidence:.2%}
	Processing Time: {inference_time:.3f}s

	### 📊 Probability Distribution:
	- 😠 Negative: {probs[0]:.2%}
	- 😐 Neutral: {probs[1]:.2%}
	- 😊 Positive: {probs[2]:.2%}

	### 📝 Input Text:
	> "{text}"

	---
	Analysis completed at {time.strftime('%Y-%m-%d %H:%M:%S')}
	Memory usage: {self.get_memory_usage():.1f}MB
	""".strip()

	return sentiment, output_text

	except Exception as e:
	self.cleanup_memory()
	return None, f"❌ Error during prediction: {str(e)}"

	def batch_predict(self, texts):
	"""Predict sentiment for multiple texts"""
	if not self.model_loaded:
	return [], "❌ Model not loaded. Please refresh the page."

	if not texts or not any(texts):
	return [], "❌ Please enter some texts to analyze."

	# Filter valid texts
	valid_texts = [text.strip() for text in texts if text.strip()]

	if len(valid_texts) > 10:
	return [], "❌ Too many texts. Maximum 10 texts per batch for memory efficiency."

	if not valid_texts:
	return [], "❌ No valid texts provided."

	try:
	results = []
	total_start_time = time.time()

	for text in valid_texts:
	sentiment, _ = self.predict_sentiment(text)
	if sentiment:
	results.append({
	"text": text,
	"sentiment": sentiment,
	"confidence": 0.0, # Would need to extract from full output
	"processing_time": 0.0 # Would need to extract from full output
	})

	total_time = time.time() - total_start_time
	return results, None

	except Exception as e:
	self.cleanup_memory()
	return [], f"❌ Error during batch prediction: {str(e)}"


	def start_api_server():
	"""Start the API server in a separate thread"""
	global api_controller
	if app_instance and api_controller is None:
	try:
	api_controller = create_api_controller(app_instance)
	# Run API server on a different port to avoid conflicts
	api_server_thread = threading.Thread(
	target=api_controller.run,
	kwargs={"host": "0.0.0.0", "port": 7861},
	daemon=True
	)
	api_server_thread.start()
	print("🌐 API server started on port 7861")
	print("📚 API Documentation: http://localhost:7861/docs")
	except Exception as e:
	print(f"❌ Failed to start API server: {e}")


	def create_interface():
	"""Create the Gradio interface for Hugging Face Spaces"""
	global app_instance, api_controller

	app_instance = SentimentGradioApp()

	# Load model
	if not app_instance.load_model():
	print("❌ Failed to load model. Please try again.")
	return None

	# Start API server
	start_api_server()

	# Create the interface
	with gr.Blocks(
	title="Vietnamese Sentiment Analysis",
	theme=gr.themes.Soft(),
	css="""
	.gradio-container {
	max-width: 1200px !important;
	margin: 0 auto !important;
	}
	.main-header {
	text-align: center;
	margin-bottom: 2rem;
	}
	"""
	) as interface:
	# Main title
	gr.HTML("""
	<div class="main-header">
	<h1>🎭 Vietnamese Sentiment Analysis</h1>
	<p>Analyze sentiment in Vietnamese text using transformer models from Hugging Face</p>
	<p><strong>Current Model:</strong> {model_name} \| <strong>Device:</strong> {device}</p>
	</div>
	""".format(
	model_name=getattr(app_instance, 'current_model', app_instance.finetuned_model),
	device=str(app_instance.device).upper()
	))

	# Create tabs
	with gr.Tabs():
	# Import and create all pages
	create_single_analysis_page(app_instance)
	create_batch_analysis_page(app_instance)
	create_model_info_page(app_instance)

	return interface


	# Create and launch the interface
	if __name__ == "__main__":
	print("🚀 Starting Vietnamese Sentiment Analysis for Hugging Face Spaces...")

	interface = create_interface()
	if interface is None:
	print("❌ Failed to create interface. Exiting.")
	exit(1)

	print("✅ Interface created successfully!")
	print("🌐 Launching web interface...")

	# Launch the interface
	interface.launch(
	share=False,
	show_error=True,
	quiet=False
	)