Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from fastapi import FastAPI | |
| import librosa | |
| import openai | |
| from transformers import pipeline | |
| import requests | |
| import os | |
| from pydantic import BaseModel | |
| import numpy as np | |
| # Initialize FastAPI | |
| app = FastAPI() | |
| # Initialize emotion classifier | |
| text_emotion_classifier = pipeline("text-classification", | |
| model="bhadresh-savani/distilbert-base-uncased-emotion", | |
| device=-1) | |
| # Environment variables | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY") | |
| VOICE_ID = os.getenv("VOICE_ID", "9BWtsMINqrJLrRacOk9x") | |
| def analyze_text_emotion(text): | |
| try: | |
| emotion_result = text_emotion_classifier(text) | |
| emotion_data = emotion_result[0] | |
| return f"Emotion: {emotion_data['label']}\nConfidence: {emotion_data['score']:.2f}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def analyze_voice_emotion(audio): | |
| try: | |
| if audio is None: | |
| return "Please upload an audio file" | |
| # Ensure audio is loaded with correct format | |
| sr = audio[0] | |
| y = audio[1] | |
| # Check if the audio data is already in float format; if not, convert it | |
| if y.dtype != 'float32': | |
| y = y.astype('float32') | |
| # Calculate features and convert numpy values to Python scalars | |
| pitch = float(librosa.feature.spectral_centroid(y=y, sr=sr).mean()) | |
| intensity = float(librosa.feature.rms(y=y).mean()) | |
| tempo, _ = librosa.beat.beat_track(y=y, sr=sr) | |
| # Convert tempo to Python float to avoid numpy formatting issues | |
| tempo = float(tempo) | |
| # Determine emotion based on features | |
| if pitch < 150 and intensity < 0.02: | |
| emotion = "sadness" | |
| elif pitch > 200 and intensity > 0.05: | |
| emotion = "anger" | |
| elif pitch > 150 and intensity < 0.03: | |
| emotion = "joy" | |
| else: | |
| emotion = "anxiety" | |
| # Format the output using Python floats instead of numpy values | |
| return "Emotion: {}\nPitch: {:.2f}\nIntensity: {:.2f}\nTempo: {:.2f}".format( | |
| emotion, pitch, intensity, tempo | |
| ) | |
| except Exception as e: | |
| return f"Error analyzing audio: {str(e)}" | |
| def chat_and_tts(message): | |
| try: | |
| if not OPENAI_API_KEY or not ELEVEN_LABS_API_KEY: | |
| return "API keys not configured", None | |
| openai.api_key = OPENAI_API_KEY | |
| chat_response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": message}, | |
| ] | |
| ) | |
| response_text = chat_response['choices'][0]['message']['content'].strip() | |
| url = f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}" | |
| headers = { | |
| "xi-api-key": ELEVEN_LABS_API_KEY, | |
| "Content-Type": "application/json" | |
| } | |
| data = { | |
| "text": response_text, | |
| "voice_settings": { | |
| "stability": 0.75, | |
| "similarity_boost": 0.75 | |
| } | |
| } | |
| response = requests.post(url, json=data, headers=headers) | |
| if response.status_code != 200: | |
| return response_text, None | |
| audio_path = "response.mp3" | |
| with open(audio_path, "wb") as f: | |
| f.write(response.content) | |
| return response_text, audio_path | |
| except Exception as e: | |
| return f"Error: {str(e)}", None | |
| # Create Gradio interface | |
| demo = gr.Blocks(title="AI Therapist") | |
| with demo: | |
| gr.Markdown("# AI Virtual Therapist") | |
| with gr.Tab("Text Emotion Analysis"): | |
| text_input = gr.Textbox(label="Enter text") | |
| text_button = gr.Button("Analyze Text Emotion") | |
| text_output = gr.Textbox(label="Emotion Analysis Result") | |
| text_button.click(analyze_text_emotion, inputs=text_input, outputs=text_output) | |
| with gr.Tab("Voice Emotion Analysis"): | |
| audio_input = gr.Audio(label="Upload Audio", type="numpy") | |
| audio_button = gr.Button("Analyze Voice Emotion") | |
| audio_output = gr.Textbox(label="Voice Analysis Result") | |
| audio_button.click(analyze_voice_emotion, inputs=audio_input, outputs=audio_output) | |
| with gr.Tab("Chat with TTS"): | |
| chat_input = gr.Textbox(label="Enter your message") | |
| chat_button = gr.Button("Send Message") | |
| chat_output = gr.Textbox(label="Assistant Response") | |
| audio_output = gr.Audio(label="Voice Response") | |
| chat_button.click(chat_and_tts, inputs=chat_input, outputs=[chat_output, audio_output]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |