ReadRight / app.py
ParulPandey's picture
Update app.py
d88f855 verified
raw
history blame
56.5 kB
from dotenv import load_dotenv
load_dotenv()
import gradio as gr
import os
import difflib
from gradio_client import Client
import time
import google.generativeai as genai
# --- Configuration & Clients ---
def configure_llm_api():
api_key = None
try:
api_key = gr.Secrets.get("GOOGLE_API_KEY")
except (AttributeError, FileNotFoundError):
api_key = os.environ.get("GOOGLE_API_KEY")
if api_key:
try:
genai.configure(api_key=api_key)
return True
except Exception as e:
print(f"Error configuring LLM (Gemini) API: {e}"); return False
else:
print("WARN: LLM API Key (GOOGLE_API_KEY) not found."); return False
LLM_API_CONFIGURED = configure_llm_api()
# Initialize new TTS client
try:
tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
print("✅ Connected to advanced TTS service (Text-To-Speech-Unlimited)")
except Exception as e:
print(f"❌ Failed to connect to TTS service: {e}")
tts_client = None
try:
whisper_stt_client = Client("abidlabs/whisper-large-v2")
except Exception: whisper_stt_client = None
# --- Helper Functions ---
def generate_story_from_llm(name, grade_str, topic):
default_passage_val = ""
if not LLM_API_CONFIGURED:
return "LLM API key not configured..."
try:
if grade_str.startswith("Grade "):
grade = int(grade_str.replace("Grade ", ""))
else:
grade = int(grade_str)
except ValueError:
return "Invalid grade level selected."
if grade <= 2: word_target, max_llm_tokens = "around 40-60 words", 100
elif grade <= 5: word_target, max_llm_tokens = "around 80-100 words", 200
elif grade <= 8: word_target, max_llm_tokens = "around 100-120 words", 250
else: word_target, max_llm_tokens = "around 120-150 words", 300
story_text_result = default_passage_val
try:
model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
prompt = (
f"You are an AI assistant that creates engaging short reading passages. "
f"Generate a story of {word_target} suitable for a student named {name} in Grade {grade}. "
f"The story topic is: '{topic}'. Use age-appropriate vocabulary for Grade {grade}. Ensure the story is interesting and easy to read aloud. "
f"Do not include any introductory or concluding phrases like 'Here is a story'."
)
safety_settings = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in [
"HARM_CATEGORY_HARASSMENT",
"HARM_CATEGORY_HATE_SPEECH",
"HARM_CATEGORY_SEXUALLY_EXPLICIT",
"HARM_CATEGORY_DANGEROUS_CONTENT"
]]
generation_config = genai.types.GenerationConfig(candidate_count=1, max_output_tokens=max_llm_tokens, temperature=0.7)
response = model.generate_content(prompt, generation_config=generation_config, safety_settings=safety_settings)
if response.candidates and response.candidates[0].content.parts:
story = response.text
if response.prompt_feedback and response.prompt_feedback.block_reason:
story_text_result = f"Story idea for '{topic}' blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
elif not story.strip():
story_text_result = f"The LLM couldn't generate a story for '{topic}'. Try another topic or rephrase. ✨"
else:
story_text_result = story.strip()
else:
if response.prompt_feedback and response.prompt_feedback.block_reason:
story_text_result = f"Story idea for '{topic}' got blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
else:
story_text_result = "Hmm, LLM had trouble with that topic. Maybe try another one? 🤔"
return story_text_result
except Exception as e:
return f"Oh no! 😟 Error generating story. Details: {e}"
def text_to_speech_using_space_simple(text):
"""Simplified TTS function - Gradio will show its default loading indicator"""
global tts_client
if not text or not text.strip():
return None
# Reconnect to TTS client if needed
if not tts_client:
try:
tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
except Exception as e:
print(f"Failed to connect to TTS service: {e}")
return None
if not tts_client:
return None
try:
# Try the correct API configuration with emotion parameter
api_methods = [
{"params": [text, "alloy", "happy"], "api_name": "/text_to_speech_app"},
{"params": [text, "alloy", "neutral"], "api_name": "/text_to_speech_app"},
{"params": [text, "nova", "neutral"], "api_name": "/text_to_speech_app"},
{"params": [text], "api_name": "/predict"}
]
audio_filepath = None
for method in api_methods:
try:
print(f"Trying TTS with params: {method['params']} and api_name: {method['api_name']}")
audio_result = tts_client.predict(
*method["params"],
api_name=method["api_name"]
)
print(f"TTS result type: {type(audio_result)}, content: {audio_result}")
# Extract audio file path from result
if isinstance(audio_result, tuple) and len(audio_result) > 0:
audio_filepath = audio_result[0]
elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
audio_filepath = audio_result
elif isinstance(audio_result, list) and len(audio_result) > 0:
audio_filepath = audio_result[0]
if audio_filepath:
print(f"Successfully generated audio: {audio_filepath}")
break
except Exception as method_error:
print(f"TTS method failed: {method_error}")
continue
if audio_filepath:
print(f"FINAL: Returning audio file path: {audio_filepath}")
return audio_filepath
else:
print("All TTS methods failed, trying to reconnect...")
raise Exception("All API methods failed")
except Exception as e:
print(f"TTS error: {e}")
# Try to reconnect on error
try:
tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
if tts_client:
# Try the most basic approach with emotion parameter
audio_result = tts_client.predict(
text,
"alloy", # voice
"neutral", # emotion
api_name="/text_to_speech_app"
)
print(f"Retry result: {type(audio_result)}, {audio_result}")
audio_filepath = None
if isinstance(audio_result, tuple) and len(audio_result) > 0:
audio_filepath = audio_result[0]
elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
audio_filepath = audio_result
elif isinstance(audio_result, list) and len(audio_result) > 0:
audio_filepath = audio_result[0]
if audio_filepath:
print(f"RETRY SUCCESS: Returning audio file path: {audio_filepath}")
return audio_filepath
except Exception as retry_error:
print(f"TTS retry failed: {retry_error}")
pass
print("TTS failed completely - returning None")
return None
def speech_to_text_whisper_space(audio_filepath, max_retries=3):
if not whisper_stt_client:
return "Speech-to-text service is not available. 🛠️"
if not audio_filepath:
return "No recording received for transcription. 🎤"
for attempt in range(max_retries):
try:
result = whisper_stt_client.predict(audio_filepath, api_name="/predict")
if isinstance(result, tuple) and len(result) > 0:
transcribed_text = result[0] if result[0] else ""
elif isinstance(result, list) and len(result) > 0:
transcribed_text = result[0] if result[0] else ""
elif isinstance(result, str):
transcribed_text = result
else:
return "Hmm, STT service returned unexpected format. 🤔"
return transcribed_text if transcribed_text else "No speech detected in the recording. 🤫"
except Exception:
continue
return "Unexpected error during transcription. Please try again! 🔄"
def clean_text_for_comparison(text):
if not isinstance(text, str): return []
text = text.lower(); punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
text = text.translate(str.maketrans('', '', punctuation_to_remove)); return text.split()
def compare_texts_for_feedback(original_text, student_text):
original_words, student_words = clean_text_for_comparison(original_text), clean_text_for_comparison(student_text)
if not student_words: return "It sounds like you didn't record or it was very quiet! 🤫 Try recording again nice and clear!", ""
# Enhanced analysis metrics
total_original_words = len(original_words)
total_student_words = len(student_words)
matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
highlighted_parts = []
# Detailed tracking
correct_words = 0
substituted_words = 0
missed_words = 0
extra_words = 0
# New improved tracking
challenging_words = []
skill_areas = {
'accuracy': {'score': 0, 'tips': []},
'fluency': {'score': 0, 'tips': []},
'pronunciation': {'score': 0, 'tips': []}
}
# Enhanced pronunciation guide with more words
pronunciation_guide = {
'the': 'thuh or thee', 'through': 'threw', 'though': 'thoh', 'thought': 'thawt',
'knight': 'night', 'know': 'noh', 'write': 'right', 'wrong': 'rawng', 'what': 'wot',
'where': 'wair', 'when': 'wen', 'why': 'wy', 'who': 'hoo', 'laugh': 'laff',
'enough': 'ee-nuff', 'cough': 'koff', 'rough': 'ruff', 'tough': 'tuff', 'magic': 'maj-ik',
'school': 'skool', 'friend': 'frend', 'said': 'sed', 'says': 'sez', 'once': 'wunts',
'was': 'wuz', 'were': 'wur', 'you': 'yoo', 'your': 'yor', 'there': 'thair', 'their': 'thair', 'they': 'thay',
'because': 'bee-koz', 'beautiful': 'byoo-ti-ful', 'different': 'dif-er-ent', 'important': 'im-por-tant',
'people': 'pee-pul', 'together': 'too-geth-er', 'water': 'waw-ter', 'favorite': 'fay-vor-it',
'journey': 'jur-nee', 'treasure': 'trezh-er', 'adventure': 'ad-ven-cher', 'mysterious': 'mis-teer-ee-us'
}
def identify_challenging_words(text_words):
"""Identify potentially difficult words from the story for proactive help"""
challenging = []
for word in text_words:
word_lower = word.lower()
# Add words that are commonly mispronounced or complex
if (len(word) > 6 or # Long words
word_lower in pronunciation_guide or # Known difficult words
'tion' in word_lower or 'ough' in word_lower or # Tricky endings
word_lower.startswith(('wr', 'kn', 'ph')) or # Silent letters
'gh' in word_lower or 'th' in word_lower): # Difficult sounds
if word_lower not in challenging:
challenging.append(word_lower)
return challenging[:5] # Limit to 5 most relevant words
def get_pronunciation_tip(word):
word_lower = word.lower()
if word_lower in pronunciation_guide:
return pronunciation_guide[word_lower]
elif len(word) > 6:
# Simple syllable breakdown
vowels = 'aeiou'
syllables = []
current_syllable = ''
for i, char in enumerate(word_lower):
current_syllable += char
if char in vowels and i < len(word_lower) - 1:
if word_lower[i + 1] not in vowels:
syllables.append(current_syllable)
current_syllable = ''
if current_syllable: syllables.append(current_syllable)
if len(syllables) > 1: return '-'.join(syllables)
return word_lower
# Process each operation in the diff for highlighting
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
orig_seg_words, stud_seg_words = original_words[i1:i2], student_words[j1:j2]
orig_seg_text, stud_seg_text = " ".join(orig_seg_words), " ".join(stud_seg_words)
if tag == 'equal':
correct_words += len(orig_seg_words)
highlighted_parts.append(f'<span style="background: #22c55e; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500;">{orig_seg_text}</span>')
elif tag == 'replace':
substituted_words += len(orig_seg_words)
highlighted_parts.append(f'<span style="background: #f59e0b; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500; text-decoration: line-through;">{orig_seg_text}</span> <span style="background: #ef4444; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500;">→{stud_seg_text}</span>')
elif tag == 'delete':
missed_words += len(orig_seg_words)
highlighted_parts.append(f'<span style="background: #ef4444; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500; text-decoration: line-through;">{orig_seg_text}</span> <span style="font-style: italic; color: #9ca3af; font-size: 0.9em;">(skipped)</span>')
elif tag == 'insert':
extra_words += len(stud_seg_words)
highlighted_parts.append(f'<span style="background: #8b5cf6; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500; font-style: italic;">+{stud_seg_text}</span>')
# Calculate comprehensive metrics
accuracy_percentage = round((correct_words / total_original_words) * 100, 1) if total_original_words > 0 else 0
# Determine performance level
if accuracy_percentage >= 95:
performance_level = "🏆 Excellent"
performance_color = "#10b981"
performance_message = "Outstanding reading! You're reading like a champion!"
elif accuracy_percentage >= 85:
performance_level = "🌟 Very Good"
performance_color = "#3b82f6"
performance_message = "Great job! You're doing really well with your reading."
elif accuracy_percentage >= 70:
performance_level = "💪 Good Progress"
performance_color = "#f59e0b"
performance_message = "Nice work! Keep practicing to improve even more."
elif accuracy_percentage >= 50:
performance_level = "📚 Keep Practicing"
performance_color = "#ef4444"
performance_message = "You're learning! More practice will help you improve."
else:
performance_level = "🚀 Just Getting Started"
performance_color = "#8b5cf6"
performance_message = "Every reader starts somewhere! Keep trying and you'll get better."
# Generate challenging words for proactive help
challenging_words = identify_challenging_words(original_words)
# Assess skill areas
skill_areas['accuracy']['score'] = accuracy_percentage
if accuracy_percentage < 90:
skill_areas['accuracy']['tips'] = ['Practice reading slowly and clearly', 'Follow along with the text while listening']
skill_areas['fluency']['score'] = max(0, 100 - (missed_words * 10))
if missed_words > 2:
skill_areas['fluency']['tips'] = ['Try reading the story multiple times', 'Practice difficult words separately first']
skill_areas['pronunciation']['score'] = max(0, 100 - (substituted_words * 15))
if substituted_words > 1:
skill_areas['pronunciation']['tips'] = ['Listen carefully to each word sound', 'Break long words into smaller parts']
final_text = " ".join(highlighted_parts)
# Perfect reading case
if accuracy_percentage == 100:
feedback_html = f"""
<div style="background: linear-gradient(135deg, #10b981, #059669); padding: 24px; border-radius: 16px; color: white; text-align: center; margin-bottom: 20px;">
<h2 style="margin: 0 0 8px 0; font-size: 1.8rem;">🎉 PERFECT READING! 🎉</h2>
<p style="margin: 0; font-size: 1.1rem; opacity: 0.9;">Amazing! You read every single word correctly!</p>
</div>
<div style="background: #f0fdf4; border: 2px solid #22c55e; border-radius: 12px; padding: 20px; margin-bottom: 16px;">
<h3 style="color: #15803d; margin: 0 0 12px 0;">📊 Your Reading Score</h3>
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 16px;">
<div style="text-align: center;">
<div style="font-size: 2rem; font-weight: bold; color: #15803d;">100%</div>
<div style="font-size: 0.9rem; color: #166534;">Word Accuracy</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem; font-weight: bold, color: #15803d;">{total_original_words}/{total_original_words}</div>
<div style="font-size: 0.9rem, color: #166534;">Words Correct</div>
</div>
</div>
<div style="text-align: center; padding: 12px; background: #dcfce7; border-radius: 8px;">
<strong style="color: #15803d;">🏆 Reading Champion Level!</strong>
</div>
</div>
<div style="background: #fffbeb; border-radius: 12px; padding: 16px;">
<h4 style="color: #92400e; margin: 0 0 8px 0;">🎯 What's Next?</h4>
<ul style="margin: 8px 0; padding-left: 20px; color: #78350f;">
<li>Try a more challenging story topic</li>
<li>Practice reading faster while staying accurate</li>
<li>Help a friend or family member practice reading</li>
<li>Celebrate your excellent reading skills! 🎊</li>
</ul>
</div>
"""
return feedback_html, final_text
# Improved analysis with non-repetitive, skill-focused feedback
feedback_html = f"""
<div style="background: linear-gradient(135deg, {performance_color}, {performance_color}dd); padding: 20px; border-radius: 16px; color: white; text-align: center; margin-bottom: 20px;">
<h2 style="margin: 0 0 8px 0; font-size: 1.6rem;">{performance_level}</h2>
<p style="margin: 0; font-size: 1rem; opacity: 0.9;">{performance_message}</p>
</div>
<div style="background: #f8fafc; border-radius: 12px; padding: 20px; margin-bottom: 20px;">
<h3 style="color: #1e293b; margin: 0 0 16px 0;">📊 Reading Dashboard</h3>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); gap: 16px; margin-bottom: 16px;">
<div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
<div style="font-size: 1.8rem; font-weight: bold; color: {performance_color};">{accuracy_percentage}%</div>
<div style="font-size: 0.85rem; color: #64748b;">Accuracy</div>
</div>
<div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
<div style="font-size: 1.8rem; font-weight: bold; color: #22c55e;">{correct_words}</div>
<div style="font-size: 0.85rem; color: #64748b;">Words Correct</div>
</div>
<div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
<div style="font-size: 1.8rem; font-weight: bold; color: #ef4444;">{missed_words}</div>
<div style="font-size: 0.85rem; color: #64748b;">Missed</div>
</div>
<div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
<div style="font-size: 1.8rem; font-weight: bold; color: #f59e0b;">{substituted_words}</div>
<div style="font-size: 0.85rem; color: #64748b;">Changed</div>
</div>
</div>
<!-- Color Legend -->
<div style="background: #ffffff; border-radius: 8px; padding: 12px; margin-top: 16px;">
<h4 style="color: #374151; margin: 0 0 8px 0; font-size: 0.9rem;">📖 Word Color Guide:</h4>
<div style="display: flex; flex-wrap: wrap; gap: 8px; font-size: 0.8rem;">
<span style="background: #22c55e; color: white; padding: 2px 8px; border-radius: 4px;">✓ Correct</span>
<span style="background: #ef4444; color: white; padding: 2px 8px; border-radius: 4px;">✗ Missed</span>
<span style="background: #f59e0b; color: white; padding: 2px 8px; border-radius: 4px;">~ Changed</span>
<span style="background: #8b5cf6; color: white; padding: 2px 8px; border-radius: 4px;">+ Added</span>
</div>
</div>
</div>
"""
# Smart Focus Areas - skill-based instead of error repetition
improvement_areas = []
if accuracy_percentage < 85:
improvement_areas.append("🎯 **Reading Accuracy**: Focus on reading each word carefully")
if missed_words > 2:
improvement_areas.append("📖 **Reading Fluency**: Practice reading without skipping words")
if substituted_words > 2:
improvement_areas.append("🗣️ **Pronunciation**: Work on saying words clearly")
if extra_words > 1:
improvement_areas.append("👁️ **Focus & Attention**: Follow the text closely while reading")
if improvement_areas:
feedback_html += f"""
<div style="background: #fef9c3; border-left: 4px solid #eab308; padding: 16px; border-radius: 8px; margin-bottom: 16px;">
<h4 style="color: #a16207; margin: 0 0 12px 0;">🎯 Smart Focus Areas</h4>
<div style="color: #a16207;">
"""
for area in improvement_areas[:3]: # Limit to 3 most important areas
feedback_html += f" • {area}<br>"
feedback_html += """
</div>
</div>
"""
# Proactive Pronunciation Helper - based on story words, not just errors
if challenging_words:
feedback_html += f"""
<div style="background: #e0f2fe; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
<h4 style="color: #0277bd; margin: 0 0 12px 0;">🗣️ Story Word Pronunciation Guide</h4>
<p style="color: #0277bd; font-size: 0.9rem; margin: 0 0 12px 0;">Here are some words from your story that might be tricky:</p>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 12px;">
"""
for word in challenging_words:
pronunciation = get_pronunciation_tip(word)
feedback_html += f"""
<div style="background: #b3e5fc; padding: 8px 12px; border-radius: 6px;">
<strong style="color: #01579b;">{word.upper()}</strong><br>
<span style="color: #0277bd; font-size: 0.85rem;">"say: {pronunciation}"</span>
</div>"""
feedback_html += """
</div>
<div style="margin-top: 12px; padding: 8px; background: #b3e5fc; border-radius: 6px; font-size: 0.9rem;">
💡 <strong>Practice tip:</strong> Listen to the AI reading these words and repeat them slowly!
</div>
</div>
"""
# Progress Insights for Parents
feedback_html += f"""
<div style="background: #f0f9ff; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
<h4 style="color: #0369a1; margin: 0 0 12px 0;">📈 Reading Skills Progress</h4>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 12px;">
<div style="background: white; padding: 10px; border-radius: 6px; text-align: center;">
<div style="font-size: 1.2rem; font-weight: bold; color: #0369a1;">{skill_areas['accuracy']['score']:.0f}%</div>
<div style="font-size: 0.8rem; color: #64748b;">Word Accuracy</div>
</div>
<div style="background: white; padding: 10px; border-radius: 6px; text-align: center;">
<div style="font-size: 1.2rem; font-weight: bold; color: #0369a1;">{skill_areas['fluency']['score']:.0f}%</div>
<div style="font-size: 0.8rem; color: #64748b;">Reading Flow</div>
</div>
<div style="background: white; padding: 10px; border-radius: 6px; text-align: center;">
<div style="font-size: 1.2rem; font-weight: bold; color: #0369a1;">{skill_areas['pronunciation']['score']:.0f}%</div>
<div style="font-size: 0.8rem; color: #64748b;">Pronunciation</div>
</div>
</div>
</div>
"""
# Personalized Next Steps
if accuracy_percentage >= 85:
next_steps = [
"🎧 Practice reading along with the audio for better timing",
"📚 Try a slightly more challenging story topic",
"🗣️ Focus on reading with expression and emotion"
]
elif accuracy_percentage >= 70:
next_steps = [
"🎧 Listen to the AI reading first, then read yourself",
"🔤 Practice the tricky words from above separately",
"📱 Record yourself multiple times and compare"
]
else:
next_steps = [
"🎧 Listen to the audio several times before recording",
"👁️ Follow along with the text while listening",
"⏰ Take your time - read slowly and clearly"
]
feedback_html += f"""
<div style="background: #f0f9ff; border-radius: 12px; padding: 16px;">
<h4 style="color: #0369a1; margin: 0 0 12px 0;">🎮 Your Reading Quest - Next Steps!</h4>
<div style="color: #0369a1;">
"""
for step in next_steps:
feedback_html += f" • {step}<br>"
feedback_html += f"""
</div>
<div style="margin-top: 16px; padding: 12px; background: #dbeafe; border-radius: 8px; text-align: center;">
<strong style="color: #1e40af;">🎯 Next Goal: Reach {min(accuracy_percentage + 15, 100)}% accuracy!</strong>
</div>
</div>
"""
return feedback_html, final_text
def assess_student_reading_ui(original_passage_state, student_audio_path):
if not student_audio_path: return "🎤 Please record your reading first!", ""
if not original_passage_state: return "Hmm, the original story is missing. 😟 Please generate a story first.", ""
transcribed_text = speech_to_text_whisper_space(student_audio_path)
stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
if any(err in (transcribed_text or "").lower() for err in stt_errors): return transcribed_text, ""
feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
return feedback, highlighted_passage
css = """
body, .gradio-container {
background: #f9fafb !important;
font-family: -apple-system, BlinkMacSystemFont, 'San Francisco', 'Segoe UI', 'Roboto', Arial, sans-serif !important;
}
.main-header {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border-radius: 0 !important;
box-shadow: 0 8px 32px 0 rgba(102, 126, 234, 0.3) !important;
padding: 32px 20px 28px 20px !important;
margin: -20px -20px 28px -20px !important;
width: calc(100% + 40px) !important;
text-align: center;
border: none !important;
position: relative;
overflow: hidden;
}
.main-header::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background: linear-gradient(45deg, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0.05) 100%);
pointer-events: none;
}
.main-header h1 {
font-size: 2.4rem !important;
font-weight: 800 !important;
color: white !important;
margin: 0 0 8px 0 !important;
text-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
letter-spacing: -0.5px !important;
position: relative;
z-index: 1;
}
.main-header p {
color: rgba(255,255,255,0.9) !important;
font-size: 1.1rem !important;
margin: 0 !important;
font-weight: 400 !important;
position: relative;
z-index: 1;
}
.tech-badge {
background: rgba(255,255,255,0.2) !important;
color: white !important;
border-radius: 12px !important;
padding: 4px 12px !important;
font-size: 12px !important;
font-weight: 600 !important;
backdrop-filter: blur(10px) !important;
}
.gr-block, .gr-panel {background: white !important; border-radius: 18px !important; box-shadow: 0 2px 8px 0 rgba(60,60,90,0.07) !important; border: none !important; padding: 28px 22px !important;}
.section-header {background: transparent !important; border: none !important; padding: 0 !important; margin-bottom: 16px !important;}
.section-header h3 {color: #1e293b !important; font-size: 1.14rem !important; font-weight: 600 !important;}
.section-header p {color: #8691a2 !important; font-size: 13px !important;}
/* Enhanced button styles with click feedback */
.gr-button {
background: linear-gradient(90deg, #007AFF, #2689ff) !important;
color: white !important;
border-radius: 18px !important;
font-weight: 600 !important;
border: none !important;
box-shadow: 0 1px 4px rgba(0, 123, 255, 0.04) !important;
padding: 9px 22px !important;
font-size: 16px !important;
transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
transform: translateY(0) !important;
}
.gr-button:hover {
background: linear-gradient(90deg, #2689ff, #007AFF) !important;
box-shadow: 0 4px 12px rgba(0, 123, 255, 0.15) !important;
transform: translateY(-1px) !important;
}
.gr-button:active {
background: linear-gradient(90deg, #0056CC, #1F5FFF) !important;
box-shadow: 0 1px 3px rgba(0, 123, 255, 0.25) !important;
transform: translateY(1px) !important;
transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
}
.gr-button[variant="secondary"] {
background: linear-gradient(90deg, #e0e7ef, #dde5f2) !important;
color: #2a3140 !important;
transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
transform: translateY(0) !important
}
.gr-button[variant="secondary"]:hover {
background: linear-gradient(90deg, #dde5f2, #e0e7ef) !important;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08) !important;
transform: translateY(-1px) !important;
}
.gr-button[variant="secondary"]:active {
background: linear-gradient(90deg, #d1d9e0, #c9d1db) !important;
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15) !important;
transform: translateY(1px) !important;
transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
}
/* Processing state for buttons */
.gr-button.processing {
background: linear-gradient(90deg, #94a3b8, #cbd5e1) !important;
color: #64748b !important;
cursor: wait !important;
transform: translateY(0) !important;
box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1) !important;
}
label {color: #374151 !important; font-weight: 600 !important; font-size: 15px !important;}
.gr-textbox, .gr-dropdown {border-radius: 12px !important; border: 1.5px solid #dbeafe !important; background: #f6f8fb !important; font-size: 16px !important; padding: 10px 14px !important;}
.gr-textbox:focus, .gr-dropdown:focus {border-color: #007AFF !important; box-shadow: 0 0 0 2px rgba(0, 122, 255, 0.10) !important; outline: none !important;}
.gr-audio {background: #f9fafb !important; border-radius: 16px !important; border: 1.5px solid #e5e7eb !important; padding: 18px !important;}
.feedback-container {background: #f4f7fa !important; border-radius: 18px !important; padding: 18px 24px !important;}
/* Spinner animation for progress indicators */
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* Pulse animation for loading states */
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.7; }
}
.loading-pulse {
animation: pulse 1.5s ease-in-out infinite;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="ReadRight") as app:
gr.Markdown("""
<div class="main-header">
<h1>📚 ReadRight</h1>
<p>AI-powered reading practice and pronunciation feedback for students</p>
</div>
""")
original_passage_state = gr.State("")
with gr.Tabs():
with gr.TabItem("📖 Practice & Generate", elem_id="main_tab"):
with gr.Row(equal_height=True):
with gr.Column(scale=1, variant="panel"):
gr.Markdown("""
<div class="section-header">
<h3>📝 Story & Reading</h3>
<p>Enter details, get your story, generate audio, and record yourself—all in one flow.</p>
</div>
""")
s_name = gr.Textbox(label="👤 Your Name", placeholder="Enter your name")
s_grade = gr.Dropdown(label="🎓 Grade Level", choices=[f"Grade {i}" for i in range(1, 11)], value="Grade 3")
s_topic = gr.Textbox(label="💡 Story Topic", placeholder="E.g., space, animals, friendship")
gen_btn = gr.Button("✨ Generate Story", variant="primary")
passage_out = gr.Textbox(label="📖 Story", lines=8, interactive=False, placeholder="Your story appears here...")
audio_out = gr.Audio(label="🎵 Story Audio", type="filepath", visible=True, autoplay=False)
gr.Markdown("""
<div style="margin: 20px 0 0 0; padding: 10px 20px; background: #f4f7fa; border-radius: 16px;">
<b>➡️ Next:</b> Record yourself reading below, then check the "Analysis & Feedback" tab for results.
</div>
""")
stud_audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Your Recording")
record_again_btn = gr.Button("🔄 Record Again", variant="secondary", size="sm", visible=False)
clear_recording_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm", visible=False)
assess_btn = gr.Button("🔍 Analyze Reading", variant="primary", size="lg", interactive=False)
recording_status = gr.Markdown("", elem_id="recording_status")
analysis_status = gr.Markdown("", elem_id="analysis_status")
with gr.TabItem("📊 Analysis & Feedback", elem_id="analysis_tab"):
with gr.Row():
with gr.Column(scale=1, variant="panel"):
gr.Markdown("""
<div class="section-header">
<h3>🔍 Word-by-Word Analysis</h3>
<p>See exactly which words you read correctly</p>
</div>
""")
highlighted_out = gr.HTML(
value="""
<div style="text-align: center; color: #6b7280; padding: 20px;">
<h4>🎯 Detailed Word Analysis</h4>
<p>Color-coded word analysis will appear here.</p>
<div style="margin-top: 15px; padding: 15px; background: #f8fafc; border-radius: 12px;">
<div style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; font-size: 0.8rem; margin-bottom: 10px;">
<span style="background: #22c55e; color: white; padding: 2px 8px; border-radius: 4px;">✓ Correct</span>
<span style="background: #ef4444; color: white; padding: 2px 8px; border-radius: 4px;">✗ Missed</span>
<span style="background: #f59e0b; color: white; padding: 2px 8px; border-radius: 4px;">~ Changed</span>
<span style="background: #8b5cf6; color: white; padding: 2px 8px; border-radius: 4px;">+ Added</span>
</div>
<p style="margin: 0; font-size: 14px;">🎤 Complete a reading practice session to see your word analysis!</p>
</div>
</div>
""",
elem_id="highlighted_passage_output"
)
with gr.Row():
with gr.Column(scale=1, variant="panel"):
gr.Markdown("""
<div class="section-header">
<h3>📊 Reading Performance</h3>
<p>Your detailed feedback and scores</p>
</div>
""")
feedback_out = gr.HTML(
value="""
<div style="text-align: center; color: #6b7280; padding: 20px;">
<h4>📈 Performance Analysis</h4>
<p>Your detailed feedback will appear here after recording.</p>
<div style="margin-top: 15px; padding: 15px; background: #f8fafc; border-radius: 12px;">
<p style="margin: 0; font-size: 14px;">💡 <strong>Tip:</strong> Go to the "Practice & Generate" tab to record yourself reading!</p>
</div>
</div>
""",
elem_id="feedback_output"
)
with gr.TabItem("ℹ️ About & How It Works", elem_id="about_tab"):
gr.Markdown("""
<div class="section-header">
<h3>🔧 How ReadRight Works</h3>
<p>Understanding the technology behind your ReadRight</p>
</div>
""")
gr.HTML("""
<div style="text-align: center; margin: 20px 0;">
<h3 style="color: #1e293b; margin-bottom: 20px;">📊 Application Workflow</h3>
<svg width="1400" height="700" xmlns="http://www.w3.org/2000/svg" style="max-width: 100%; height: auto; border: 2px solid #e5e7eb; border-radius: 12px; background: white;">
<!-- Background -->
<rect width="1400" height="600" fill="#fafafa"/>
<!-- Title -->
<text x="700" y="30" text-anchor="middle" font-size="24" font-weight="bold" fill="#1f2937">Reading Practice Application Workflow</text>
<!-- Top Row - Input to Audio -->
<rect x="100" y="80" width="200" height="100" rx="20" fill="#dbeafe" stroke="#2563eb" stroke-width="3"/>
<text x="200" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#1e40af">User Input</text>
<text x="200" y="140" text-anchor="middle" font-size="14" fill="#3730a3">Student Name</text>
<text x="200" y="160" text-anchor="middle" font-size="14" fill="#3730a3">Grade Level & Topic</text>
<!-- Arrow 1 -->
<path d="M300 130 L380 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
<text x="340" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">INPUT</text>
<rect x="380" y="80" width="200" height="100" rx="20" fill="#dcfce7" stroke="#16a34a" stroke-width="3"/>
<text x="480" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#15803d">Story Generator</text>
<text x="480" y="140" text-anchor="middle" font-size="14" fill="#166534">AI creates personalized</text>
<text x="480" y="160" text-anchor="middle" font-size="14" fill="#166534">reading story</text>
<!-- Arrow 2 -->
<path d="M580 130 L660 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
<text x="620" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STORY</text>
<rect x="660" y="80" width="200" height="100" rx="20" fill="#fef3c7" stroke="#d97706" stroke-width="3"/>
<text x="760" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#b45309">Audio Synthesis</text>
<text x="760" y="140" text-anchor="middle" font-size="14" fill="#92400e">Text-to-Speech</text>
<text x="760" y="160" text-anchor="middle" font-size="14" fill="#92400e">Audio Generation</text>
<!-- Arrow 3 -->
<path d="M860 130 L960 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
<text x="910" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">AUDIO</text>
<rect x="960" y="80" width="200" height="100" rx="20" fill="#f3e8ff" stroke="#9333ea" stroke-width="3"/>
<text x="1060" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#7c3aed">Text Comparison</text>
<text x="1060" y="140" text-anchor="middle" font-size="14" fill="#6b21a8">Analysis Engine</text>
<text x="1060" y="160" text-anchor="middle" font-size="14" fill="#6b21a8">Accuracy Detection</text>
<!-- Vertical Flow Arrow (Audio to Student Recording) -->
<path d="M760 180 L760 250" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
<text x="790" y="220" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STUDENT LISTENS</text>
<!-- Bottom Row - Student Practice to Feedback -->
<rect x="660" y="250" width="200" height="100" rx="20" fill="#fce7f3" stroke="#ec4899" stroke-width="3"/>
<text x="760" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#be185d">Student Recording</text>
<text x="760" y="310" text-anchor="middle" font-size="14" fill="#9d174d">Student reads</text>
<text x="760" y="330" text-anchor="middle" font-size="14" fill="#9d174d">story aloud</text>
<!-- Arrow 4 (Student Recording to Speech Recognition) -->
<path d="M660 300 L580 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
<text x="620" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RECORDING</text>
<rect x="380" y="250" width="200" height="100" rx="20" fill="#e0e7ff" stroke="#6366f1" stroke-width="3"/>
<text x="480" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#4338ca">Speech Recognition</text>
<text x="480" y="310" text-anchor="middle" font-size="14" fill="#3730a3">Speech-to-Text</text>
<text x="480" y="330" text-anchor="middle" font-size="14" fill="#3730a3">Transcription</text>
<!-- Arrow 5 (Speech Recognition to Feedback) -->
<path d="M380 300 L300 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
<text x="340" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">TRANSCRIPT</text>
<rect x="100" y="250" width="200" height="100" rx="20" fill="#fef2f2" stroke="#ef4444" stroke-width="3"/>
<text x="200" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#dc2626">Feedback System</text>
<text x="200" y="310" text-anchor="middle" font-size="14" fill="#b91c1c">Performance Analysis</text>
<text x="200" y="330" text-anchor="middle" font-size="14" fill="#b91c1c">Improvement Tips</text>
<!-- Arrow from Feedback to Report -->
<path d="M200 350 L200 450" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
<text x="230" y="400" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RESULTS</text>
<!-- Output Box -->
<rect x="100" y="450" width="200" height="80" rx="20" fill="#f0fdf4" stroke="#22c55e" stroke-width="3"/>
<text x="200" y="480" text-anchor="middle" font-size="18" font-weight="bold" fill="#16a34a">Student Report</text>
<text x="200" y="505" text-anchor="middle" font-size="14" fill="#15803d">Reading accuracy</text>
<text x="200" y="520" text-anchor="middle" font-size="14" fill="#15803d">& improvement areas</text>
<!-- Process Flow Indicators -->
<circle cx="760" cy="400" r="8" fill="#3b82f6"/>
<text x="780" y="370" font-size="12" font-weight="bold" fill="#3b82f6">ACTIVE LEARNING</text>
<text x="780" y="385" font-size="10" fill="#3b82f6">Student practices reading</text>
<text x="780" y="415" font-size="12" font-weight="bold" fill="#3b82f6">AI ASSESSMENT</text>
<text x="780" y="430" font-size="10" fill="#3b82f6">Real-time analysis & feedback</text>
<!-- Arrowhead Definition -->
<defs>
<marker id="arrowhead" markerWidth="12" markerHeight="7" refX="10" refY="3.5" orient="auto">
<polygon points="0 0, 12 3.5, 0 7" fill="#6b7280"/>
</marker>
</defs>
</svg>
</div>
""")
gr.Markdown("""
## 🚀 How to Use the App
• **Enter details** → Name, grade, and story topic
• **Generate story** → Click "✨ Generate Story" button
• **Listen to AI** → Play the audio to hear correct pronunciation
• **Record yourself** → Use microphone to read the story aloud
• **Get feedback** → Click "🔍 Analyze Reading" for results
• **Practice more** → Try new topics and improve your reading!
---
## 🔧 Key Components
- **User Input (UI Agent)**: Collects student details (name, grade, topic) via an intuitive interface.
- **Story Generator (LLM Agent)**: Utilizes advanced language models to craft personalized, engaging stories.
- **Audio Synthesis (TTS Agent)**: Converts text stories into natural-sounding speech for accurate pronunciation guidance.
- **Student Recording (Recording Agent)**: Captures student readings for analysis.
- **Speech Recognition (STT Agent)**: Transcribes recorded readings into text for comparison.
- **Text Comparison (Analysis Agent)**: Analyzes transcription accuracy, comparing student readings to the original text.
- **Feedback Generation (Feedback Agent)**: Creates detailed feedback reports, highlighting strengths and areas for improvement.
""")
gr.Markdown("""
<div style="text-align: center; margin-top: 30px; padding: 20px; background: white; border-radius: 12px; font-size: 0.96em; color: #6b7280;">
Built for reading practice with modern AI tools.
</div>
""")
def generate_story_and_setup_ui(name, grade, topic):
story_text, audio_btn_update, audio_player_update, passage_state = "", gr.update(interactive=False, visible=False), gr.update(value=None, visible=False), ""
res = generate_story_from_llm(name, grade, topic)
if res:
story_text, audio_btn_update, audio_player_update = res
if story_text and not any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
passage_state = story_text
return story_text, audio_btn_update, audio_player_update, passage_state
def assess_reading_with_analysis(original_passage_state, student_audio_path):
if not student_audio_path:
return (
"""
<div class="status-indicator">
<p style="margin: 0; font-weight: 500;">🎤 Please record your reading first!</p>
</div>
""",
"🎤 Please record your reading first!",
""
)
if not original_passage_state:
return (
"""
<div class="status-indicator">
<p style="margin: 0; font-weight: 500;">📚 Please generate a story first in the Story Creator tab.</p>
</div>
""",
"Please generate a story first in the Story Creator tab.",
""
)
# Start transcription
transcribed_text = speech_to_text_whisper_space(student_audio_path)
stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
if any(err in (transcribed_text or "").lower() for err in stt_errors):
return (
"""
<div class="status-indicator status-error">
<p style="margin: 0; font-weight: 500;">❌ Transcription Error</p>
<p style="margin: 5px 0 0 0; font-size: 13px;">Please try recording again</p>
</div>
""",
transcribed_text,
""
)
feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
analysis_msg = """
<div class="status-indicator status-success">
<p style="margin: 0; font-weight: 500;">✅ Analysis Complete!</p>
<p style="margin: 5px 0 0 0; font-size: 13px;">Head over to the "Analysis & Feedback" tab to see your results! 🎯</p>
</div>
"""
return (analysis_msg, feedback, highlighted_passage)
def generate_story_and_audio_complete(name, grade, topic):
"""Generate story and audio in one function - Gradio will show default loading indicators"""
# Generate story text first
story_text = generate_story_from_llm(name, grade, topic)
if not story_text:
return "", None, ""
# Check if story generation was successful
if not story_text or any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
return story_text, None, story_text
# Generate audio (Gradio will show its loading indicator)
audio_filepath = text_to_speech_using_space_simple(story_text)
if audio_filepath:
print(f"COMPLETE: Story and audio generated successfully")
return story_text, audio_filepath, story_text
else:
print("COMPLETE: Story generated, but audio failed")
return story_text, None, story_text
def update_recording_status(audio_file):
if audio_file is not None:
return (
gr.update(value="""
<div class="status-indicator status-success">
<p style="margin: 0; font-weight: 500;">🎉 Recording Complete!</p>
<p style="margin: 5px 0 0 0; font-size: 12px;">Ready for analysis</p>
</div>
"""),
gr.update(visible=True),
gr.update(visible=True),
gr.update(interactive=True)
)
else:
return (
gr.update(value="""
<div class="status-indicator">
<p style="margin: 0; font-weight: 500;">🎤 Ready to Record</p>
<p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
</div>
"""),
gr.update(visible=False),
gr.update(visible=False),
gr.update(interactive=False)
)
def clear_recording():
return (
None,
gr.update(value="""
<div class="status-indicator">
<p style="margin: 0; font-weight: 500;">🎤 Ready to Record</p>
<p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
</div>
"""),
gr.update(visible=False),
gr.update(visible=False),
gr.update(interactive=False),
"""
<div style="text-align: center; color: #6b7280;">
<h4>Analysis Results</h4>
<p>Your feedback will appear here.</p>
<div class="status-indicator">
<p style="margin: 0; font-size: 14px;">💡 Record yourself reading to get started!</p>
</div>
</div>
""",
"""
<div style="text-align: center; color: #6b7280;">
<h4>Word-by-Word Analysis</h4>
<p>Get color-coded feedback below.</p>
<div class="status-indicator">
<p style="margin: 0; font-size: 14px;">🎤 Complete a reading practice session to see your analysis!</p>
</div>
</div>
"""
)
def record_again_action():
return (
None,
gr.update(value="""
<div class="status-indicator status-warning">
<p style="margin: 0; font-weight: 500;">🔄 Ready for Take 2!</p>
<p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to record again</p>
</div>
"""),
gr.update(visible=False),
gr.update(visible=False),
gr.update(interactive=False)
)
# Event handlers - Generate story first, then audio separately
gen_btn.click(
fn=generate_story_from_llm,
inputs=[s_name, s_grade, s_topic],
outputs=[passage_out]
).then(
fn=lambda story_text: story_text, # Store story in state immediately
inputs=[passage_out],
outputs=[original_passage_state]
)
# Separate audio generation triggered by story output change
passage_out.change(
fn=lambda story_text: text_to_speech_using_space_simple(story_text) if story_text and not any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]) else None,
inputs=[passage_out],
outputs=[audio_out]
)
assess_btn.click(
fn=assess_reading_with_analysis,
inputs=[original_passage_state, stud_audio_in],
outputs=[analysis_status, feedback_out, highlighted_out]
)
stud_audio_in.change(
fn=update_recording_status,
inputs=[stud_audio_in],
outputs=[recording_status, record_again_btn, clear_recording_btn, assess_btn]
)
record_again_btn.click(
fn=record_again_action,
outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn]
)
clear_recording_btn.click(
fn=clear_recording,
outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn, feedback_out, highlighted_out]
)
# Launch the application
if __name__ == "__main__":
app.launch(debug=True, share=False)