Spaces:

joey1101
/

Comment_Reply

Running

App Files Files Community

Comment_Reply / app.py

joey1101

Update app.py

37b0c44 verified 7 months ago

raw

history blame contribute delete

9.35 kB

	##########################################
	# Step 0: Import required libraries
	##########################################
	import streamlit as st # For web interface
	from transformers import (
	pipeline, # For loading pre-trained models
	SpeechT5Processor, # For text-to-speech processing
	SpeechT5ForTextToSpeech, # TTS model
	SpeechT5HifiGan, # Vocoder for generating audio waveforms
	AutoModelForCausalLM, # For text generation
	AutoTokenizer # For tokenizing input text
	) # AI model components

	from datasets import load_dataset # To load voice embeddings
	import torch # For tensor computations
	import soundfile as sf # For handling audio files
	import re # For regular expressions in text processing

	##########################################
	# Initial configuration
	##########################################
	st.set_page_config(
	page_title="Just Comment", # Title of the web app
	page_icon="💬", # Icon displayed in the browser tab
	layout="centered", # Center the layout of the app
	initial_sidebar_state="collapsed" # Start with sidebar collapsed
	)

	##########################################
	# Global model loading with caching
	##########################################
	@st.cache_resource(show_spinner=False) # Cache the models for performance
	def _load_models():
	"""Load and cache all ML models with optimized settings"""
	return {
	# Emotion classification pipeline
	'emotion': pipeline(
	"text-classification", # Specify task type
	model="Thea231/jhartmann_emotion_finetuning", # Load the model
	truncation=True # Enable text truncation for long inputs
	),

	# Text generation components
	'textgen_tokenizer': AutoTokenizer.from_pretrained(
	"Qwen/Qwen1.5-0.5B", # Load tokenizer
	use_fast=True # Enable fast tokenization
	),
	'textgen_model': AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen1.5-0.5B", # Load text generation model
	torch_dtype=torch.float16 # Use half-precision for faster inference
	),

	# Text-to-speech components
	'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"), # Load TTS processor
	'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"), # Load TTS model
	'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"), # Load vocoder

	# Preloaded speaker embeddings
	'speaker_embeddings': torch.tensor(
	load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"] # Load speaker embeddings
	).unsqueeze(0) # Add an additional dimension for batch processing
	}

	##########################################
	# UI Components
	##########################################
	def _display_interface():
	"""Render user interface elements"""
	st.title("Just Comment") # Set the main title of the app
	st.markdown("### I'm listening to you, my friend～") # Subheading for user interaction

	return st.text_area(
	"📝 Enter your comment:", # Label for the text area
	placeholder="Type your message here...", # Placeholder text
	height=150, # Height of the text area
	key="user_input" # Unique key for the text area
	)

	##########################################
	# Core Processing Functions
	##########################################
	def _analyze_emotion(text, classifier):
	"""Identify dominant emotion with confidence threshold"""
	results = classifier(text, return_all_scores=True)[0] # Get emotion scores
	valid_emotions = {'sadness', 'joy', 'love', 'anger', 'fear', 'surprise'} # Define valid emotions
	filtered = [e for e in results if e['label'].lower() in valid_emotions] # Filter results by valid emotions
	return max(filtered, key=lambda x: x['score']) # Return the emotion with the highest score

	def _generate_prompt(text, emotion):
	"""Create structured prompts for all emotion types"""
	prompt_templates = {
	"sadness": (
	"Sadness detected: {input}\n"
	"Required response structure:\n"
	"1. Empathetic acknowledgment\n2. Support offer\n3. Solution proposal\n"
	"Response:"
	),
	"joy": (
	"Joy detected: {input}\n"
	"Required response structure:\n"
	"1. Enthusiastic thanks\n2. Positive reinforcement\n3. Future engagement\n"
	"Response:"
	),
	"love": (
	"Affection detected: {input}\n"
	"Required response structure:\n"
	"1. Warm appreciation\n2. Community focus\n3. Exclusive benefit\n"
	"Response:"
	),
	"anger": (
	"Anger detected: {input}\n"
	"Required response structure:\n"
	"1. Sincere apology\n2. Action steps\n3. Compensation\n"
	"Response:"
	),
	"fear": (
	"Concern detected: {input}\n"
	"Required response structure:\n"
	"1. Reassurance\n2. Safety measures\n3. Support options\n"
	"Response:"
	),
	"surprise": (
	"Surprise detected: {input}\n"
	"Required response structure:\n"
	"1. Acknowledge uniqueness\n2. Creative solution\n3. Follow-up\n"
	"Response:"
	)
	}
	return prompt_templates.get(emotion.lower(), "").format(input=text) # Format and return the appropriate prompt

	def _process_response(raw_text):
	"""Clean and format the generated response"""
	# Extract text after last "Response:" marker
	processed = raw_text.split("Response:")[-1].strip()

	# Remove incomplete sentences
	if '.' in processed:
	processed = processed.rsplit('.', 1)[0] + '.' # Ensure the response ends with a period

	# Ensure length between 50-200 characters
	return processed[:200].strip() if len(processed) > 50 else "Thank you for your feedback. We value your input and will respond shortly."

	def _generate_text_response(input_text, models):
	"""Generate optimized text response with timing controls"""
	# Emotion analysis
	emotion = _analyze_emotion(input_text, models['emotion']) # Analyze the emotion of user input

	# Prompt engineering
	prompt = _generate_prompt(input_text, emotion['label']) # Generate prompt based on detected emotion

	# Text generation with optimized parameters
	inputs = models['textgen_tokenizer'](prompt, return_tensors="pt").to('cpu') # Tokenize the prompt
	outputs = models['textgen_model'].generate(
	inputs.input_ids, # Input token IDs
	max_new_tokens=100, # Strict token limit for response length
	temperature=0.7, # Control randomness in text generation
	top_p=0.9, # Control diversity in sampling
	do_sample=True, # Enable sampling to generate varied responses
	pad_token_id=models['textgen_tokenizer'].eos_token_id # Use end-of-sequence token for padding
	)

	return _process_response(
	models['textgen_tokenizer'].decode(outputs[0], skip_special_tokens=True) # Decode and process the response
	)

	def _generate_audio_response(text, models):
	"""Convert text to speech with performance optimizations"""
	# Process text input for TTS
	inputs = models['tts_processor'](text=text, return_tensors="pt") # Tokenize input text for TTS

	# Generate spectrogram
	spectrogram = models['tts_model'].generate_speech(
	inputs["input_ids"], # Input token IDs for TTS
	models['speaker_embeddings'] # Use preloaded speaker embeddings
	)

	# Generate waveform with optimizations
	with torch.no_grad(): # Disable gradient calculation for inference
	waveform = models['tts_vocoder'](spectrogram) # Generate audio waveform from spectrogram

	# Save audio file
	sf.write("response.wav", waveform.numpy(), samplerate=16000) # Save waveform as a WAV file
	return "response.wav" # Return the path to the saved audio file

	##########################################
	# Main Application Flow
	##########################################
	def main():
	"""Primary execution flow"""
	# Load models once
	ml_models = _load_models() # Load all models and cache them

	# Display interface
	user_input = _display_interface() # Show the user input interface

	if user_input: # Check if user has entered input
	# Text generation stage
	with st.spinner("🔍 Analyzing emotions and generating response..."): # Show loading spinner
	text_response = _generate_text_response(user_input, ml_models) # Generate text response

	# Display results
	st.subheader("📄 Generated Response") # Subheader for response section
	st.markdown(f"```\n{text_response}\n```") # Display generated response in markdown format

	# Audio generation stage
	with st.spinner("🔊 Converting to speech..."): # Show loading spinner
	audio_file = _generate_audio_response(text_response, ml_models) # Generate audio response
	st.audio(audio_file, format="audio/wav") # Play audio file in the app

	if __name__ == "__main__":
	main() # Execute the main function when the script is run