ParulPandey commited on
Commit
839431d
·
verified ·
1 Parent(s): ba5c563

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -1094
app.py DELETED
@@ -1,1094 +0,0 @@
1
- from dotenv import load_dotenv
2
- load_dotenv()
3
-
4
- import gradio as gr
5
- import os
6
- import difflib
7
- from gradio_client import Client
8
- import time
9
- import google.generativeai as genai
10
-
11
- # --- Configuration & Clients ---
12
-
13
- def configure_llm_api():
14
- api_key = None
15
- try:
16
- api_key = gr.Secrets.get("GOOGLE_API_KEY")
17
- except (AttributeError, FileNotFoundError):
18
- api_key = os.environ.get("GOOGLE_API_KEY")
19
- if api_key:
20
- try:
21
- genai.configure(api_key=api_key)
22
- return True
23
- except Exception as e:
24
- print(f"Error configuring LLM (Gemini) API: {e}"); return False
25
- else:
26
- print("WARN: LLM API Key (GOOGLE_API_KEY) not found."); return False
27
- LLM_API_CONFIGURED = configure_llm_api()
28
-
29
- # Initialize new TTS client
30
- try:
31
- tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
32
- print("✅ Connected to advanced TTS service (Text-To-Speech-Unlimited)")
33
- except Exception as e:
34
- print(f"❌ Failed to connect to TTS service: {e}")
35
- tts_client = None
36
-
37
- try:
38
- whisper_stt_client = Client("abidlabs/whisper-large-v2")
39
- except Exception: whisper_stt_client = None
40
-
41
- # --- Helper Functions ---
42
- def generate_story_from_llm(name, grade_str, topic):
43
- default_passage_val = ""
44
- if not LLM_API_CONFIGURED:
45
- return "LLM API key not configured..."
46
- try:
47
- if grade_str.startswith("Grade "):
48
- grade = int(grade_str.replace("Grade ", ""))
49
- else:
50
- grade = int(grade_str)
51
- except ValueError:
52
- return "Invalid grade level selected."
53
- if grade <= 2: word_target, max_llm_tokens = "around 40-60 words", 100
54
- elif grade <= 5: word_target, max_llm_tokens = "around 80-100 words", 200
55
- elif grade <= 8: word_target, max_llm_tokens = "around 100-120 words", 250
56
- else: word_target, max_llm_tokens = "around 120-150 words", 300
57
-
58
- story_text_result = default_passage_val
59
- try:
60
- model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest")
61
- prompt = (
62
- f"You are an AI assistant that creates engaging short reading passages. "
63
- f"Generate a story of {word_target} suitable for a student named {name} in Grade {grade}. "
64
- f"The story topic is: '{topic}'. Use age-appropriate vocabulary for Grade {grade}. Ensure the story is interesting and easy to read aloud. "
65
- f"Do not include any introductory or concluding phrases like 'Here is a story'."
66
- )
67
- safety_settings = [{"category": c, "threshold": "BLOCK_MEDIUM_AND_ABOVE"} for c in [
68
- "HARM_CATEGORY_HARASSMENT",
69
- "HARM_CATEGORY_HATE_SPEECH",
70
- "HARM_CATEGORY_SEXUALLY_EXPLICIT",
71
- "HARM_CATEGORY_DANGEROUS_CONTENT"
72
- ]]
73
- generation_config = genai.types.GenerationConfig(candidate_count=1, max_output_tokens=max_llm_tokens, temperature=0.7)
74
- response = model.generate_content(prompt, generation_config=generation_config, safety_settings=safety_settings)
75
- if response.candidates and response.candidates[0].content.parts:
76
- story = response.text
77
- if response.prompt_feedback and response.prompt_feedback.block_reason:
78
- story_text_result = f"Story idea for '{topic}' blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
79
- elif not story.strip():
80
- story_text_result = f"The LLM couldn't generate a story for '{topic}'. Try another topic or rephrase. ✨"
81
- else:
82
- story_text_result = story.strip()
83
- else:
84
- if response.prompt_feedback and response.prompt_feedback.block_reason:
85
- story_text_result = f"Story idea for '{topic}' got blocked (Reason: {response.prompt_feedback.block_reason}). Try a different topic. 😊"
86
- else:
87
- story_text_result = "Hmm, LLM had trouble with that topic. Maybe try another one? 🤔"
88
- return story_text_result
89
- except Exception as e:
90
- return f"Oh no! 😟 Error generating story. Details: {e}"
91
-
92
- def text_to_speech_using_space_simple(text):
93
- """Simplified TTS function - Gradio will show its default loading indicator"""
94
- global tts_client
95
-
96
- if not text or not text.strip():
97
- return None
98
-
99
- # Reconnect to TTS client if needed
100
- if not tts_client:
101
- try:
102
- tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
103
- except Exception as e:
104
- print(f"Failed to connect to TTS service: {e}")
105
- return None
106
-
107
- if not tts_client:
108
- return None
109
-
110
- try:
111
- # Try the correct API configuration with emotion parameter
112
- api_methods = [
113
- {"params": [text, "alloy", "happy"], "api_name": "/text_to_speech_app"},
114
- {"params": [text, "alloy", "neutral"], "api_name": "/text_to_speech_app"},
115
- {"params": [text, "nova", "neutral"], "api_name": "/text_to_speech_app"},
116
- {"params": [text], "api_name": "/predict"}
117
- ]
118
-
119
- audio_filepath = None
120
- for method in api_methods:
121
- try:
122
- print(f"Trying TTS with params: {method['params']} and api_name: {method['api_name']}")
123
- audio_result = tts_client.predict(
124
- *method["params"],
125
- api_name=method["api_name"]
126
- )
127
- print(f"TTS result type: {type(audio_result)}, content: {audio_result}")
128
-
129
- # Extract audio file path from result
130
- if isinstance(audio_result, tuple) and len(audio_result) > 0:
131
- audio_filepath = audio_result[0]
132
- elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
133
- audio_filepath = audio_result
134
- elif isinstance(audio_result, list) and len(audio_result) > 0:
135
- audio_filepath = audio_result[0]
136
-
137
- if audio_filepath:
138
- print(f"Successfully generated audio: {audio_filepath}")
139
- break
140
-
141
- except Exception as method_error:
142
- print(f"TTS method failed: {method_error}")
143
- continue
144
-
145
- if audio_filepath:
146
- print(f"FINAL: Returning audio file path: {audio_filepath}")
147
- return audio_filepath
148
- else:
149
- print("All TTS methods failed, trying to reconnect...")
150
- raise Exception("All API methods failed")
151
-
152
- except Exception as e:
153
- print(f"TTS error: {e}")
154
- # Try to reconnect on error
155
- try:
156
- tts_client = Client("NihalGazi/Text-To-Speech-Unlimited")
157
- if tts_client:
158
- # Try the most basic approach with emotion parameter
159
- audio_result = tts_client.predict(
160
- text,
161
- "alloy", # voice
162
- "neutral", # emotion
163
- api_name="/text_to_speech_app"
164
- )
165
- print(f"Retry result: {type(audio_result)}, {audio_result}")
166
-
167
- audio_filepath = None
168
- if isinstance(audio_result, tuple) and len(audio_result) > 0:
169
- audio_filepath = audio_result[0]
170
- elif isinstance(audio_result, str) and audio_result.endswith(('.wav', '.mp3', '.flac', '.m4a')):
171
- audio_filepath = audio_result
172
- elif isinstance(audio_result, list) and len(audio_result) > 0:
173
- audio_filepath = audio_result[0]
174
-
175
- if audio_filepath:
176
- print(f"RETRY SUCCESS: Returning audio file path: {audio_filepath}")
177
- return audio_filepath
178
-
179
- except Exception as retry_error:
180
- print(f"TTS retry failed: {retry_error}")
181
- pass
182
-
183
- print("TTS failed completely - returning None")
184
- return None
185
-
186
- def speech_to_text_whisper_space(audio_filepath, max_retries=3):
187
- if not whisper_stt_client:
188
- return "Speech-to-text service is not available. 🛠️"
189
- if not audio_filepath:
190
- return "No recording received for transcription. 🎤"
191
- for attempt in range(max_retries):
192
- try:
193
- result = whisper_stt_client.predict(audio_filepath, api_name="/predict")
194
- if isinstance(result, tuple) and len(result) > 0:
195
- transcribed_text = result[0] if result[0] else ""
196
- elif isinstance(result, list) and len(result) > 0:
197
- transcribed_text = result[0] if result[0] else ""
198
- elif isinstance(result, str):
199
- transcribed_text = result
200
- else:
201
- return "Hmm, STT service returned unexpected format. 🤔"
202
- return transcribed_text if transcribed_text else "No speech detected in the recording. 🤫"
203
- except Exception:
204
- continue
205
- return "Unexpected error during transcription. Please try again! 🔄"
206
-
207
- def clean_text_for_comparison(text):
208
- if not isinstance(text, str): return []
209
- text = text.lower(); punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
210
- text = text.translate(str.maketrans('', '', punctuation_to_remove)); return text.split()
211
-
212
- def compare_texts_for_feedback(original_text, student_text):
213
- original_words, student_words = clean_text_for_comparison(original_text), clean_text_for_comparison(student_text)
214
- if not student_words: return "It sounds like you didn't record or it was very quiet! 🤫 Try recording again nice and clear!", ""
215
-
216
- # Enhanced analysis metrics
217
- total_original_words = len(original_words)
218
- total_student_words = len(student_words)
219
-
220
- matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
221
- highlighted_parts = []
222
-
223
- # Detailed tracking
224
- correct_words = 0
225
- substituted_words = 0
226
- missed_words = 0
227
- extra_words = 0
228
-
229
- # New improved tracking
230
- challenging_words = []
231
- skill_areas = {
232
- 'accuracy': {'score': 0, 'tips': []},
233
- 'fluency': {'score': 0, 'tips': []},
234
- 'pronunciation': {'score': 0, 'tips': []}
235
- }
236
-
237
- # Enhanced pronunciation guide with more words
238
- pronunciation_guide = {
239
- 'the': 'thuh or thee', 'through': 'threw', 'though': 'thoh', 'thought': 'thawt',
240
- 'knight': 'night', 'know': 'noh', 'write': 'right', 'wrong': 'rawng', 'what': 'wot',
241
- 'where': 'wair', 'when': 'wen', 'why': 'wy', 'who': 'hoo', 'laugh': 'laff',
242
- 'enough': 'ee-nuff', 'cough': 'koff', 'rough': 'ruff', 'tough': 'tuff', 'magic': 'maj-ik',
243
- 'school': 'skool', 'friend': 'frend', 'said': 'sed', 'says': 'sez', 'once': 'wunts',
244
- 'was': 'wuz', 'were': 'wur', 'you': 'yoo', 'your': 'yor', 'there': 'thair', 'their': 'thair', 'they': 'thay',
245
- 'because': 'bee-koz', 'beautiful': 'byoo-ti-ful', 'different': 'dif-er-ent', 'important': 'im-por-tant',
246
- 'people': 'pee-pul', 'together': 'too-geth-er', 'water': 'waw-ter', 'favorite': 'fay-vor-it',
247
- 'journey': 'jur-nee', 'treasure': 'trezh-er', 'adventure': 'ad-ven-cher', 'mysterious': 'mis-teer-ee-us'
248
- }
249
-
250
- def identify_challenging_words(text_words):
251
- """Identify potentially difficult words from the story for proactive help"""
252
- challenging = []
253
- for word in text_words:
254
- word_lower = word.lower()
255
- # Add words that are commonly mispronounced or complex
256
- if (len(word) > 6 or # Long words
257
- word_lower in pronunciation_guide or # Known difficult words
258
- 'tion' in word_lower or 'ough' in word_lower or # Tricky endings
259
- word_lower.startswith(('wr', 'kn', 'ph')) or # Silent letters
260
- 'gh' in word_lower or 'th' in word_lower): # Difficult sounds
261
- if word_lower not in challenging:
262
- challenging.append(word_lower)
263
- return challenging[:5] # Limit to 5 most relevant words
264
-
265
- def get_pronunciation_tip(word):
266
- word_lower = word.lower()
267
- if word_lower in pronunciation_guide:
268
- return pronunciation_guide[word_lower]
269
- elif len(word) > 6:
270
- # Simple syllable breakdown
271
- vowels = 'aeiou'
272
- syllables = []
273
- current_syllable = ''
274
- for i, char in enumerate(word_lower):
275
- current_syllable += char
276
- if char in vowels and i < len(word_lower) - 1:
277
- if word_lower[i + 1] not in vowels:
278
- syllables.append(current_syllable)
279
- current_syllable = ''
280
- if current_syllable: syllables.append(current_syllable)
281
- if len(syllables) > 1: return '-'.join(syllables)
282
- return word_lower
283
-
284
- # Process each operation in the diff for highlighting
285
- for tag, i1, i2, j1, j2 in matcher.get_opcodes():
286
- orig_seg_words, stud_seg_words = original_words[i1:i2], student_words[j1:j2]
287
- orig_seg_text, stud_seg_text = " ".join(orig_seg_words), " ".join(stud_seg_words)
288
-
289
- if tag == 'equal':
290
- correct_words += len(orig_seg_words)
291
- highlighted_parts.append(f'<span style="background: #22c55e; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500;">{orig_seg_text}</span>')
292
- elif tag == 'replace':
293
- substituted_words += len(orig_seg_words)
294
- highlighted_parts.append(f'<span style="background: #f59e0b; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500; text-decoration: line-through;">{orig_seg_text}</span> <span style="background: #ef4444; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500;">→{stud_seg_text}</span>')
295
- elif tag == 'delete':
296
- missed_words += len(orig_seg_words)
297
- highlighted_parts.append(f'<span style="background: #ef4444; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500; text-decoration: line-through;">{orig_seg_text}</span> <span style="font-style: italic; color: #9ca3af; font-size: 0.9em;">(skipped)</span>')
298
- elif tag == 'insert':
299
- extra_words += len(stud_seg_words)
300
- highlighted_parts.append(f'<span style="background: #8b5cf6; color: white; padding: 3px 6px; border-radius: 6px; margin: 2px; font-weight: 500; font-style: italic;">+{stud_seg_text}</span>')
301
-
302
- # Calculate comprehensive metrics
303
- accuracy_percentage = round((correct_words / total_original_words) * 100, 1) if total_original_words > 0 else 0
304
-
305
- # Determine performance level
306
- if accuracy_percentage >= 95:
307
- performance_level = "🏆 Excellent"
308
- performance_color = "#10b981"
309
- performance_message = "Outstanding reading! You're reading like a champion!"
310
- elif accuracy_percentage >= 85:
311
- performance_level = "🌟 Very Good"
312
- performance_color = "#3b82f6"
313
- performance_message = "Great job! You're doing really well with your reading."
314
- elif accuracy_percentage >= 70:
315
- performance_level = "💪 Good Progress"
316
- performance_color = "#f59e0b"
317
- performance_message = "Nice work! Keep practicing to improve even more."
318
- elif accuracy_percentage >= 50:
319
- performance_level = "📚 Keep Practicing"
320
- performance_color = "#ef4444"
321
- performance_message = "You're learning! More practice will help you improve."
322
- else:
323
- performance_level = "🚀 Just Getting Started"
324
- performance_color = "#8b5cf6"
325
- performance_message = "Every reader starts somewhere! Keep trying and you'll get better."
326
-
327
- # Generate challenging words for proactive help
328
- challenging_words = identify_challenging_words(original_words)
329
-
330
- # Assess skill areas
331
- skill_areas['accuracy']['score'] = accuracy_percentage
332
- if accuracy_percentage < 90:
333
- skill_areas['accuracy']['tips'] = ['Practice reading slowly and clearly', 'Follow along with the text while listening']
334
-
335
- skill_areas['fluency']['score'] = max(0, 100 - (missed_words * 10))
336
- if missed_words > 2:
337
- skill_areas['fluency']['tips'] = ['Try reading the story multiple times', 'Practice difficult words separately first']
338
-
339
- skill_areas['pronunciation']['score'] = max(0, 100 - (substituted_words * 15))
340
- if substituted_words > 1:
341
- skill_areas['pronunciation']['tips'] = ['Listen carefully to each word sound', 'Break long words into smaller parts']
342
-
343
- final_text = " ".join(highlighted_parts)
344
-
345
- # Perfect reading case
346
- if accuracy_percentage == 100:
347
- feedback_html = f"""
348
- <div style="background: linear-gradient(135deg, #10b981, #059669); padding: 24px; border-radius: 16px; color: white; text-align: center; margin-bottom: 20px;">
349
- <h2 style="margin: 0 0 8px 0; font-size: 1.8rem;">🎉 PERFECT READING! 🎉</h2>
350
- <p style="margin: 0; font-size: 1.1rem; opacity: 0.9;">Amazing! You read every single word correctly!</p>
351
- </div>
352
-
353
- <div style="background: #f0fdf4; border: 2px solid #22c55e; border-radius: 12px; padding: 20px; margin-bottom: 16px;">
354
- <h3 style="color: #15803d; margin: 0 0 12px 0;">📊 Your Reading Score</h3>
355
- <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 16px;">
356
- <div style="text-align: center;">
357
- <div style="font-size: 2rem; font-weight: bold; color: #15803d;">100%</div>
358
- <div style="font-size: 0.9rem; color: #166534;">Word Accuracy</div>
359
- </div>
360
- <div style="text-align: center;">
361
- <div style="font-size: 2rem; font-weight: bold, color: #15803d;">{total_original_words}/{total_original_words}</div>
362
- <div style="font-size: 0.9rem, color: #166534;">Words Correct</div>
363
- </div>
364
- </div>
365
- <div style="text-align: center; padding: 12px; background: #dcfce7; border-radius: 8px;">
366
- <strong style="color: #15803d;">🏆 Reading Champion Level!</strong>
367
- </div>
368
- </div>
369
-
370
- <div style="background: #fffbeb; border-radius: 12px; padding: 16px;">
371
- <h4 style="color: #92400e; margin: 0 0 8px 0;">🎯 What's Next?</h4>
372
- <ul style="margin: 8px 0; padding-left: 20px; color: #78350f;">
373
- <li>Try a more challenging story topic</li>
374
- <li>Practice reading faster while staying accurate</li>
375
- <li>Help a friend or family member practice reading</li>
376
- <li>Celebrate your excellent reading skills! 🎊</li>
377
- </ul>
378
- </div>
379
- """
380
- return feedback_html, final_text
381
-
382
- # Improved analysis with non-repetitive, skill-focused feedback
383
- feedback_html = f"""
384
- <div style="background: linear-gradient(135deg, {performance_color}, {performance_color}dd); padding: 20px; border-radius: 16px; color: white; text-align: center; margin-bottom: 20px;">
385
- <h2 style="margin: 0 0 8px 0; font-size: 1.6rem;">{performance_level}</h2>
386
- <p style="margin: 0; font-size: 1rem; opacity: 0.9;">{performance_message}</p>
387
- </div>
388
-
389
- <div style="background: #f8fafc; border-radius: 12px; padding: 20px; margin-bottom: 20px;">
390
- <h3 style="color: #1e293b; margin: 0 0 16px 0;">📊 Reading Dashboard</h3>
391
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); gap: 16px; margin-bottom: 16px;">
392
- <div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
393
- <div style="font-size: 1.8rem; font-weight: bold; color: {performance_color};">{accuracy_percentage}%</div>
394
- <div style="font-size: 0.85rem; color: #64748b;">Accuracy</div>
395
- </div>
396
- <div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
397
- <div style="font-size: 1.8rem; font-weight: bold; color: #22c55e;">{correct_words}</div>
398
- <div style="font-size: 0.85rem; color: #64748b;">Words Correct</div>
399
- </div>
400
- <div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
401
- <div style="font-size: 1.8rem; font-weight: bold; color: #ef4444;">{missed_words}</div>
402
- <div style="font-size: 0.85rem; color: #64748b;">Missed</div>
403
- </div>
404
- <div style="text-align: center; background: white; padding: 12px; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);">
405
- <div style="font-size: 1.8rem; font-weight: bold; color: #f59e0b;">{substituted_words}</div>
406
- <div style="font-size: 0.85rem; color: #64748b;">Changed</div>
407
- </div>
408
- </div>
409
-
410
- <!-- Color Legend -->
411
- <div style="background: #ffffff; border-radius: 8px; padding: 12px; margin-top: 16px;">
412
- <h4 style="color: #374151; margin: 0 0 8px 0; font-size: 0.9rem;">📖 Word Color Guide:</h4>
413
- <div style="display: flex; flex-wrap: wrap; gap: 8px; font-size: 0.8rem;">
414
- <span style="background: #22c55e; color: white; padding: 2px 8px; border-radius: 4px;">✓ Correct</span>
415
- <span style="background: #ef4444; color: white; padding: 2px 8px; border-radius: 4px;">✗ Missed</span>
416
- <span style="background: #f59e0b; color: white; padding: 2px 8px; border-radius: 4px;">~ Changed</span>
417
- <span style="background: #8b5cf6; color: white; padding: 2px 8px; border-radius: 4px;">+ Added</span>
418
- </div>
419
- </div>
420
- </div>
421
- """
422
-
423
- # Smart Focus Areas - skill-based instead of error repetition
424
- improvement_areas = []
425
- if accuracy_percentage < 85:
426
- improvement_areas.append("🎯 **Reading Accuracy**: Focus on reading each word carefully")
427
- if missed_words > 2:
428
- improvement_areas.append("📖 **Reading Fluency**: Practice reading without skipping words")
429
- if substituted_words > 2:
430
- improvement_areas.append("🗣️ **Pronunciation**: Work on saying words clearly")
431
- if extra_words > 1:
432
- improvement_areas.append("👁️ **Focus & Attention**: Follow the text closely while reading")
433
-
434
- if improvement_areas:
435
- feedback_html += f"""
436
- <div style="background: #fef9c3; border-left: 4px solid #eab308; padding: 16px; border-radius: 8px; margin-bottom: 16px;">
437
- <h4 style="color: #a16207; margin: 0 0 12px 0;">🎯 Smart Focus Areas</h4>
438
- <div style="color: #a16207;">
439
- """
440
- for area in improvement_areas[:3]: # Limit to 3 most important areas
441
- feedback_html += f" • {area}<br>"
442
-
443
- feedback_html += """
444
- </div>
445
- </div>
446
- """
447
-
448
- # Proactive Pronunciation Helper - based on story words, not just errors
449
- if challenging_words:
450
- feedback_html += f"""
451
- <div style="background: #e0f2fe; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
452
- <h4 style="color: #0277bd; margin: 0 0 12px 0;">🗣️ Story Word Pronunciation Guide</h4>
453
- <p style="color: #0277bd; font-size: 0.9rem; margin: 0 0 12px 0;">Here are some words from your story that might be tricky:</p>
454
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 12px;">
455
- """
456
- for word in challenging_words:
457
- pronunciation = get_pronunciation_tip(word)
458
- feedback_html += f"""
459
- <div style="background: #b3e5fc; padding: 8px 12px; border-radius: 6px;">
460
- <strong style="color: #01579b;">{word.upper()}</strong><br>
461
- <span style="color: #0277bd; font-size: 0.85rem;">"say: {pronunciation}"</span>
462
- </div>"""
463
-
464
- feedback_html += """
465
- </div>
466
- <div style="margin-top: 12px; padding: 8px; background: #b3e5fc; border-radius: 6px; font-size: 0.9rem;">
467
- 💡 <strong>Practice tip:</strong> Listen to the AI reading these words and repeat them slowly!
468
- </div>
469
- </div>
470
- """
471
-
472
- # Progress Insights for Parents
473
- feedback_html += f"""
474
- <div style="background: #f0f9ff; border-radius: 12px; padding: 16px; margin-bottom: 16px;">
475
- <h4 style="color: #0369a1; margin: 0 0 12px 0;">📈 Reading Skills Progress</h4>
476
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 12px;">
477
- <div style="background: white; padding: 10px; border-radius: 6px; text-align: center;">
478
- <div style="font-size: 1.2rem; font-weight: bold; color: #0369a1;">{skill_areas['accuracy']['score']:.0f}%</div>
479
- <div style="font-size: 0.8rem; color: #64748b;">Word Accuracy</div>
480
- </div>
481
- <div style="background: white; padding: 10px; border-radius: 6px; text-align: center;">
482
- <div style="font-size: 1.2rem; font-weight: bold; color: #0369a1;">{skill_areas['fluency']['score']:.0f}%</div>
483
- <div style="font-size: 0.8rem; color: #64748b;">Reading Flow</div>
484
- </div>
485
- <div style="background: white; padding: 10px; border-radius: 6px; text-align: center;">
486
- <div style="font-size: 1.2rem; font-weight: bold; color: #0369a1;">{skill_areas['pronunciation']['score']:.0f}%</div>
487
- <div style="font-size: 0.8rem; color: #64748b;">Pronunciation</div>
488
- </div>
489
- </div>
490
- </div>
491
- """
492
-
493
- # Personalized Next Steps
494
- if accuracy_percentage >= 85:
495
- next_steps = [
496
- "🎧 Practice reading along with the audio for better timing",
497
- "📚 Try a slightly more challenging story topic",
498
- "🗣️ Focus on reading with expression and emotion"
499
- ]
500
- elif accuracy_percentage >= 70:
501
- next_steps = [
502
- "🎧 Listen to the AI reading first, then read yourself",
503
- "🔤 Practice the tricky words from above separately",
504
- "📱 Record yourself multiple times and compare"
505
- ]
506
- else:
507
- next_steps = [
508
- "🎧 Listen to the audio several times before recording",
509
- "👁️ Follow along with the text while listening",
510
- "⏰ Take your time - read slowly and clearly"
511
- ]
512
-
513
- feedback_html += f"""
514
- <div style="background: #f0f9ff; border-radius: 12px; padding: 16px;">
515
- <h4 style="color: #0369a1; margin: 0 0 12px 0;">🎮 Your Reading Quest - Next Steps!</h4>
516
- <div style="color: #0369a1;">
517
- """
518
- for step in next_steps:
519
- feedback_html += f" • {step}<br>"
520
-
521
- feedback_html += f"""
522
- </div>
523
- <div style="margin-top: 16px; padding: 12px; background: #dbeafe; border-radius: 8px; text-align: center;">
524
- <strong style="color: #1e40af;">🎯 Next Goal: Reach {min(accuracy_percentage + 15, 100)}% accuracy!</strong>
525
- </div>
526
- </div>
527
- """
528
-
529
- return feedback_html, final_text
530
-
531
- def assess_student_reading_ui(original_passage_state, student_audio_path):
532
- if not student_audio_path: return "🎤 Please record your reading first!", ""
533
- if not original_passage_state: return "Hmm, the original story is missing. 😟 Please generate a story first.", ""
534
- transcribed_text = speech_to_text_whisper_space(student_audio_path)
535
- stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
536
- if any(err in (transcribed_text or "").lower() for err in stt_errors): return transcribed_text, ""
537
- feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
538
- return feedback, highlighted_passage
539
-
540
- css = """
541
- body, .gradio-container {
542
- background: #f9fafb !important;
543
- font-family: -apple-system, BlinkMacSystemFont, 'San Francisco', 'Segoe UI', 'Roboto', Arial, sans-serif !important;
544
- }
545
- .main-header {
546
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
547
- border-radius: 0 !important;
548
- box-shadow: 0 8px 32px 0 rgba(102, 126, 234, 0.3) !important;
549
- padding: 32px 20px 28px 20px !important;
550
- margin: -20px -20px 28px -20px !important;
551
- width: calc(100% + 40px) !important;
552
- text-align: center;
553
- border: none !important;
554
- position: relative;
555
- overflow: hidden;
556
- }
557
- .main-header::before {
558
- content: '';
559
- position: absolute;
560
- top: 0;
561
- left: 0;
562
- right: 0;
563
- bottom: 0;
564
- background: linear-gradient(45deg, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0.05) 100%);
565
- pointer-events: none;
566
- }
567
- .main-header h1 {
568
- font-size: 2.4rem !important;
569
- font-weight: 800 !important;
570
- color: white !important;
571
- margin: 0 0 8px 0 !important;
572
- text-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
573
- letter-spacing: -0.5px !important;
574
- position: relative;
575
- z-index: 1;
576
- }
577
- .main-header p {
578
- color: rgba(255,255,255,0.9) !important;
579
- font-size: 1.1rem !important;
580
- margin: 0 !important;
581
- font-weight: 400 !important;
582
- position: relative;
583
- z-index: 1;
584
- }
585
- .tech-badge {
586
- background: rgba(255,255,255,0.2) !important;
587
- color: white !important;
588
- border-radius: 12px !important;
589
- padding: 4px 12px !important;
590
- font-size: 12px !important;
591
- font-weight: 600 !important;
592
- backdrop-filter: blur(10px) !important;
593
- }
594
- .gr-block, .gr-panel {background: white !important; border-radius: 18px !important; box-shadow: 0 2px 8px 0 rgba(60,60,90,0.07) !important; border: none !important; padding: 28px 22px !important;}
595
- .section-header {background: transparent !important; border: none !important; padding: 0 !important; margin-bottom: 16px !important;}
596
- .section-header h3 {color: #1e293b !important; font-size: 1.14rem !important; font-weight: 600 !important;}
597
- .section-header p {color: #8691a2 !important; font-size: 13px !important;}
598
-
599
- /* Enhanced button styles with click feedback */
600
- .gr-button {
601
- background: linear-gradient(90deg, #007AFF, #2689ff) !important;
602
- color: white !important;
603
- border-radius: 18px !important;
604
- font-weight: 600 !important;
605
- border: none !important;
606
- box-shadow: 0 1px 4px rgba(0, 123, 255, 0.04) !important;
607
- padding: 9px 22px !important;
608
- font-size: 16px !important;
609
- transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
610
- transform: translateY(0) !important;
611
- }
612
-
613
- .gr-button:hover {
614
- background: linear-gradient(90deg, #2689ff, #007AFF) !important;
615
- box-shadow: 0 4px 12px rgba(0, 123, 255, 0.15) !important;
616
- transform: translateY(-1px) !important;
617
- }
618
-
619
- .gr-button:active {
620
- background: linear-gradient(90deg, #0056CC, #1F5FFF) !important;
621
- box-shadow: 0 1px 3px rgba(0, 123, 255, 0.25) !important;
622
- transform: translateY(1px) !important;
623
- transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
624
- }
625
-
626
- .gr-button[variant="secondary"] {
627
- background: linear-gradient(90deg, #e0e7ef, #dde5f2) !important;
628
- color: #2a3140 !important;
629
- transition: all 0.15s cubic-bezier(0.4,0.0,0.2,1) !important;
630
- transform: translateY(0) !important
631
- }
632
-
633
- .gr-button[variant="secondary"]:hover {
634
- background: linear-gradient(90deg, #dde5f2, #e0e7ef) !important;
635
- box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08) !important;
636
- transform: translateY(-1px) !important;
637
- }
638
-
639
- .gr-button[variant="secondary"]:active {
640
- background: linear-gradient(90deg, #d1d9e0, #c9d1db) !important;
641
- box-shadow: 0 1px 3px rgba(0, 0, 0, 0.15) !important;
642
- transform: translateY(1px) !important;
643
- transition: all 0.1s cubic-bezier(0.4,0.0,0.2,1) !important;
644
- }
645
-
646
- /* Processing state for buttons */
647
- .gr-button.processing {
648
- background: linear-gradient(90deg, #94a3b8, #cbd5e1) !important;
649
- color: #64748b !important;
650
- cursor: wait !important;
651
- transform: translateY(0) !important;
652
- box-shadow: inset 0 2px 4px rgba(0, 0, 0, 0.1) !important;
653
- }
654
-
655
- label {color: #374151 !important; font-weight: 600 !important; font-size: 15px !important;}
656
- .gr-textbox, .gr-dropdown {border-radius: 12px !important; border: 1.5px solid #dbeafe !important; background: #f6f8fb !important; font-size: 16px !important; padding: 10px 14px !important;}
657
- .gr-textbox:focus, .gr-dropdown:focus {border-color: #007AFF !important; box-shadow: 0 0 0 2px rgba(0, 122, 255, 0.10) !important; outline: none !important;}
658
- .gr-audio {background: #f9fafb !important; border-radius: 16px !important; border: 1.5px solid #e5e7eb !important; padding: 18px !important;}
659
- .feedback-container {background: #f4f7fa !important; border-radius: 18px !important; padding: 18px 24px !important;}
660
-
661
- /* Spinner animation for progress indicators */
662
- @keyframes spin {
663
- 0% { transform: rotate(0deg); }
664
- 100% { transform: rotate(360deg); }
665
- }
666
-
667
- /* Pulse animation for loading states */
668
- @keyframes pulse {
669
- 0%, 100% { opacity: 1; }
670
- 50% { opacity: 0.7; }
671
- }
672
-
673
- .loading-pulse {
674
- animation: pulse 1.5s ease-in-out infinite;
675
- }
676
- """
677
-
678
- with gr.Blocks(theme=gr.themes.Soft(), css=css, title="ReadRight") as app:
679
- gr.Markdown("""
680
- <div class="main-header">
681
- <h1>📚 ReadRight</h1>
682
- <p>AI-powered reading practice and pronunciation feedback for students</p>
683
- </div>
684
- """)
685
-
686
- original_passage_state = gr.State("")
687
-
688
- with gr.Tabs():
689
- with gr.TabItem("📖 Practice & Generate", elem_id="main_tab"):
690
- with gr.Row(equal_height=True):
691
- with gr.Column(scale=1, variant="panel"):
692
- gr.Markdown("""
693
- <div class="section-header">
694
- <h3>📝 Story & Reading</h3>
695
- <p>Enter details, get your story, generate audio, and record yourself—all in one flow.</p>
696
- </div>
697
- """)
698
- s_name = gr.Textbox(label="👤 Your Name", placeholder="Enter your name")
699
- s_grade = gr.Dropdown(label="🎓 Grade Level", choices=[f"Grade {i}" for i in range(1, 11)], value="Grade 3")
700
- s_topic = gr.Textbox(label="💡 Story Topic", placeholder="E.g., space, animals, friendship")
701
- gen_btn = gr.Button("✨ Generate Story", variant="primary")
702
- passage_out = gr.Textbox(label="📖 Story", lines=8, interactive=False, placeholder="Your story appears here...")
703
- audio_out = gr.Audio(label="🎵 Story Audio", type="filepath", visible=True, autoplay=False)
704
- gr.Markdown("""
705
- <div style="margin: 20px 0 0 0; padding: 10px 20px; background: #f4f7fa; border-radius: 16px;">
706
- <b>➡️ Next:</b> Record yourself reading below, then check the "Analysis & Feedback" tab for results.
707
- </div>
708
- """)
709
- stud_audio_in = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Your Recording")
710
- record_again_btn = gr.Button("🔄 Record Again", variant="secondary", size="sm", visible=False)
711
- clear_recording_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm", visible=False)
712
- assess_btn = gr.Button("🔍 Analyze Reading", variant="primary", size="lg", interactive=False)
713
- recording_status = gr.Markdown("", elem_id="recording_status")
714
- analysis_status = gr.Markdown("", elem_id="analysis_status")
715
-
716
- with gr.TabItem("📊 Analysis & Feedback", elem_id="analysis_tab"):
717
- with gr.Row():
718
- with gr.Column(scale=1, variant="panel"):
719
- gr.Markdown("""
720
- <div class="section-header">
721
- <h3>🔍 Word-by-Word Analysis</h3>
722
- <p>See exactly which words you read correctly</p>
723
- </div>
724
- """)
725
- highlighted_out = gr.HTML(
726
- value="""
727
- <div style="text-align: center; color: #6b7280; padding: 20px;">
728
- <h4>🎯 Detailed Word Analysis</h4>
729
- <p>Color-coded word analysis will appear here.</p>
730
- <div style="margin-top: 15px; padding: 15px; background: #f8fafc; border-radius: 12px;">
731
- <div style="display: flex; flex-wrap: wrap; gap: 8px; justify-content: center; font-size: 0.8rem; margin-bottom: 10px;">
732
- <span style="background: #22c55e; color: white; padding: 2px 8px; border-radius: 4px;">✓ Correct</span>
733
- <span style="background: #ef4444; color: white; padding: 2px 8px; border-radius: 4px;">✗ Missed</span>
734
- <span style="background: #f59e0b; color: white; padding: 2px 8px; border-radius: 4px;">~ Changed</span>
735
- <span style="background: #8b5cf6; color: white; padding: 2px 8px; border-radius: 4px;">+ Added</span>
736
- </div>
737
- <p style="margin: 0; font-size: 14px;">🎤 Complete a reading practice session to see your word analysis!</p>
738
- </div>
739
- </div>
740
- """,
741
- elem_id="highlighted_passage_output"
742
- )
743
-
744
- with gr.Row():
745
- with gr.Column(scale=1, variant="panel"):
746
- gr.Markdown("""
747
- <div class="section-header">
748
- <h3>📊 Reading Performance</h3>
749
- <p>Your detailed feedback and scores</p>
750
- </div>
751
- """)
752
- feedback_out = gr.HTML(
753
- value="""
754
- <div style="text-align: center; color: #6b7280; padding: 20px;">
755
- <h4>📈 Performance Analysis</h4>
756
- <p>Your detailed feedback will appear here after recording.</p>
757
- <div style="margin-top: 15px; padding: 15px; background: #f8fafc; border-radius: 12px;">
758
- <p style="margin: 0; font-size: 14px;">💡 <strong>Tip:</strong> Go to the "Practice & Generate" tab to record yourself reading!</p>
759
- </div>
760
- </div>
761
- """,
762
- elem_id="feedback_output"
763
- )
764
-
765
- with gr.TabItem("ℹ️ About & How It Works", elem_id="about_tab"):
766
- gr.Markdown("""
767
- <div class="section-header">
768
- <h3>🔧 How ReadRight Works</h3>
769
- <p>Understanding the technology behind your ReadRight</p>
770
- </div>
771
- """)
772
-
773
- gr.HTML("""
774
- <div style="text-align: center; margin: 20px 0;">
775
- <h3 style="color: #1e293b; margin-bottom: 20px;">📊 Application Workflow</h3>
776
- <svg width="1400" height="700" xmlns="http://www.w3.org/2000/svg" style="max-width: 100%; height: auto; border: 2px solid #e5e7eb; border-radius: 12px; background: white;">
777
- <!-- Background -->
778
- <rect width="1400" height="600" fill="#fafafa"/>
779
-
780
- <!-- Title -->
781
- <text x="700" y="30" text-anchor="middle" font-size="24" font-weight="bold" fill="#1f2937">Reading Practice Application Workflow</text>
782
-
783
- <!-- Top Row - Input to Audio -->
784
- <rect x="100" y="80" width="200" height="100" rx="20" fill="#dbeafe" stroke="#2563eb" stroke-width="3"/>
785
- <text x="200" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#1e40af">User Input</text>
786
- <text x="200" y="140" text-anchor="middle" font-size="14" fill="#3730a3">Student Name</text>
787
- <text x="200" y="160" text-anchor="middle" font-size="14" fill="#3730a3">Grade Level & Topic</text>
788
-
789
- <!-- Arrow 1 -->
790
- <path d="M300 130 L380 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
791
- <text x="340" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">INPUT</text>
792
-
793
- <rect x="380" y="80" width="200" height="100" rx="20" fill="#dcfce7" stroke="#16a34a" stroke-width="3"/>
794
- <text x="480" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#15803d">Story Generator</text>
795
- <text x="480" y="140" text-anchor="middle" font-size="14" fill="#166534">AI creates personalized</text>
796
- <text x="480" y="160" text-anchor="middle" font-size="14" fill="#166534">reading story</text>
797
-
798
- <!-- Arrow 2 -->
799
- <path d="M580 130 L660 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
800
- <text x="620" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STORY</text>
801
-
802
- <rect x="660" y="80" width="200" height="100" rx="20" fill="#fef3c7" stroke="#d97706" stroke-width="3"/>
803
- <text x="760" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#b45309">Audio Synthesis</text>
804
- <text x="760" y="140" text-anchor="middle" font-size="14" fill="#92400e">Text-to-Speech</text>
805
- <text x="760" y="160" text-anchor="middle" font-size="14" fill="#92400e">Audio Generation</text>
806
-
807
- <!-- Arrow 3 -->
808
- <path d="M860 130 L960 130" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
809
- <text x="910" y="120" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">AUDIO</text>
810
-
811
- <rect x="960" y="80" width="200" height="100" rx="20" fill="#f3e8ff" stroke="#9333ea" stroke-width="3"/>
812
- <text x="1060" y="115" text-anchor="middle" font-size="18" font-weight="bold" fill="#7c3aed">Text Comparison</text>
813
- <text x="1060" y="140" text-anchor="middle" font-size="14" fill="#6b21a8">Analysis Engine</text>
814
- <text x="1060" y="160" text-anchor="middle" font-size="14" fill="#6b21a8">Accuracy Detection</text>
815
-
816
- <!-- Vertical Flow Arrow (Audio to Student Recording) -->
817
- <path d="M760 180 L760 250" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
818
- <text x="790" y="220" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">STUDENT LISTENS</text>
819
-
820
- <!-- Bottom Row - Student Practice to Feedback -->
821
- <rect x="660" y="250" width="200" height="100" rx="20" fill="#fce7f3" stroke="#ec4899" stroke-width="3"/>
822
- <text x="760" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#be185d">Student Recording</text>
823
- <text x="760" y="310" text-anchor="middle" font-size="14" fill="#9d174d">Student reads</text>
824
- <text x="760" y="330" text-anchor="middle" font-size="14" fill="#9d174d">story aloud</text>
825
-
826
- <!-- Arrow 4 (Student Recording to Speech Recognition) -->
827
- <path d="M660 300 L580 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
828
- <text x="620" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RECORDING</text>
829
-
830
- <rect x="380" y="250" width="200" height="100" rx="20" fill="#e0e7ff" stroke="#6366f1" stroke-width="3"/>
831
- <text x="480" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#4338ca">Speech Recognition</text>
832
- <text x="480" y="310" text-anchor="middle" font-size="14" fill="#3730a3">Speech-to-Text</text>
833
- <text x="480" y="330" text-anchor="middle" font-size="14" fill="#3730a3">Transcription</text>
834
-
835
- <!-- Arrow 5 (Speech Recognition to Feedback) -->
836
- <path d="M380 300 L300 300" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
837
- <text x="340" y="290" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">TRANSCRIPT</text>
838
-
839
- <rect x="100" y="250" width="200" height="100" rx="20" fill="#fef2f2" stroke="#ef4444" stroke-width="3"/>
840
- <text x="200" y="285" text-anchor="middle" font-size="18" font-weight="bold" fill="#dc2626">Feedback System</text>
841
- <text x="200" y="310" text-anchor="middle" font-size="14" fill="#b91c1c">Performance Analysis</text>
842
- <text x="200" y="330" text-anchor="middle" font-size="14" fill="#b91c1c">Improvement Tips</text>
843
-
844
- <!-- Arrow from Feedback to Report -->
845
- <path d="M200 350 L200 450" stroke="#6b7280" stroke-width="3" marker-end="url(#arrowhead)" fill="none"/>
846
- <text x="230" y="400" text-anchor="middle" font-size="12" font-weight="bold" fill="#6b7280">RESULTS</text>
847
-
848
- <!-- Output Box -->
849
- <rect x="100" y="450" width="200" height="80" rx="20" fill="#f0fdf4" stroke="#22c55e" stroke-width="3"/>
850
- <text x="200" y="480" text-anchor="middle" font-size="18" font-weight="bold" fill="#16a34a">Student Report</text>
851
- <text x="200" y="505" text-anchor="middle" font-size="14" fill="#15803d">Reading accuracy</text>
852
- <text x="200" y="520" text-anchor="middle" font-size="14" fill="#15803d">& improvement areas</text>
853
-
854
- <!-- Process Flow Indicators -->
855
- <circle cx="760" cy="400" r="8" fill="#3b82f6"/>
856
- <text x="780" y="370" font-size="12" font-weight="bold" fill="#3b82f6">ACTIVE LEARNING</text>
857
- <text x="780" y="385" font-size="10" fill="#3b82f6">Student practices reading</text>
858
- <text x="780" y="415" font-size="12" font-weight="bold" fill="#3b82f6">AI ASSESSMENT</text>
859
- <text x="780" y="430" font-size="10" fill="#3b82f6">Real-time analysis & feedback</text>
860
-
861
- <!-- Arrowhead Definition -->
862
- <defs>
863
- <marker id="arrowhead" markerWidth="12" markerHeight="7" refX="10" refY="3.5" orient="auto">
864
- <polygon points="0 0, 12 3.5, 0 7" fill="#6b7280"/>
865
- </marker>
866
- </defs>
867
- </svg>
868
- </div>
869
- """)
870
-
871
- gr.Markdown("""
872
- ## 🚀 How to Use the App
873
-
874
- • **Enter details** → Name, grade, and story topic
875
- • **Generate story** → Click "✨ Generate Story" button
876
- • **Listen to AI** → Play the audio to hear correct pronunciation
877
- • **Record yourself** → Use microphone to read the story aloud
878
- • **Get feedback** → Click "🔍 Analyze Reading" for results
879
- • **Practice more** → Try new topics and improve your reading!
880
-
881
- ---
882
-
883
- ## 🔧 Key Components
884
-
885
- - **User Input (UI Agent)**: Collects student details (name, grade, topic) via an intuitive interface.
886
- - **Story Generator (LLM Agent)**: Utilizes advanced language models to craft personalized, engaging stories.
887
- - **Audio Synthesis (TTS Agent)**: Converts text stories into natural-sounding speech for accurate pronunciation guidance.
888
- - **Student Recording (Recording Agent)**: Captures student readings for analysis.
889
- - **Speech Recognition (STT Agent)**: Transcribes recorded readings into text for comparison.
890
- - **Text Comparison (Analysis Agent)**: Analyzes transcription accuracy, comparing student readings to the original text.
891
- - **Feedback Generation (Feedback Agent)**: Creates detailed feedback reports, highlighting strengths and areas for improvement.
892
-
893
- """)
894
-
895
- gr.Markdown("""
896
- <div style="text-align: center; margin-top: 30px; padding: 20px; background: white; border-radius: 12px; font-size: 0.96em; color: #6b7280;">
897
- Built for reading practice with modern AI tools.
898
- </div>
899
- """)
900
-
901
- def generate_story_and_setup_ui(name, grade, topic):
902
- story_text, audio_btn_update, audio_player_update, passage_state = "", gr.update(interactive=False, visible=False), gr.update(value=None, visible=False), ""
903
- res = generate_story_from_llm(name, grade, topic)
904
- if res:
905
- story_text, audio_btn_update, audio_player_update = res
906
- if story_text and not any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
907
- passage_state = story_text
908
- return story_text, audio_btn_update, audio_player_update, passage_state
909
-
910
- def assess_reading_with_analysis(original_passage_state, student_audio_path):
911
- if not student_audio_path:
912
- return (
913
- """
914
- <div class="status-indicator">
915
- <p style="margin: 0; font-weight: 500;">🎤 Please record your reading first!</p>
916
- </div>
917
- """,
918
- "🎤 Please record your reading first!",
919
- ""
920
- )
921
- if not original_passage_state:
922
- return (
923
- """
924
- <div class="status-indicator">
925
- <p style="margin: 0; font-weight: 500;">📚 Please generate a story first in the Story Creator tab.</p>
926
- </div>
927
- """,
928
- "Please generate a story first in the Story Creator tab.",
929
- ""
930
- )
931
-
932
- # Start transcription
933
- transcribed_text = speech_to_text_whisper_space(student_audio_path)
934
-
935
- stt_errors = ["couldn't understand", "had trouble", "service isn't working", "service is busy", "didn't get any recording", "filepath type issue"]
936
- if any(err in (transcribed_text or "").lower() for err in stt_errors):
937
- return (
938
- """
939
- <div class="status-indicator status-error">
940
- <p style="margin: 0; font-weight: 500;">❌ Transcription Error</p>
941
- <p style="margin: 5px 0 0 0; font-size: 13px;">Please try recording again</p>
942
- </div>
943
- """,
944
- transcribed_text,
945
- ""
946
- )
947
-
948
- feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
949
-
950
- analysis_msg = """
951
- <div class="status-indicator status-success">
952
- <p style="margin: 0; font-weight: 500;">✅ Analysis Complete!</p>
953
- <p style="margin: 5px 0 0 0; font-size: 13px;">Head over to the "Analysis & Feedback" tab to see your results! 🎯</p>
954
- </div>
955
- """
956
- return (analysis_msg, feedback, highlighted_passage)
957
-
958
- def generate_story_and_audio_complete(name, grade, topic):
959
- """Generate story and audio in one function - Gradio will show default loading indicators"""
960
-
961
- # Generate story text first
962
- story_text = generate_story_from_llm(name, grade, topic)
963
- if not story_text:
964
- return "", None, ""
965
-
966
- # Check if story generation was successful
967
- if not story_text or any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]):
968
- return story_text, None, story_text
969
-
970
- # Generate audio (Gradio will show its loading indicator)
971
- audio_filepath = text_to_speech_using_space_simple(story_text)
972
-
973
- if audio_filepath:
974
- print(f"COMPLETE: Story and audio generated successfully")
975
- return story_text, audio_filepath, story_text
976
- else:
977
- print("COMPLETE: Story generated, but audio failed")
978
- return story_text, None, story_text
979
-
980
- def update_recording_status(audio_file):
981
- if audio_file is not None:
982
- return (
983
- gr.update(value="""
984
- <div class="status-indicator status-success">
985
- <p style="margin: 0; font-weight: 500;">🎉 Recording Complete!</p>
986
- <p style="margin: 5px 0 0 0; font-size: 12px;">Ready for analysis</p>
987
- </div>
988
- """),
989
- gr.update(visible=True),
990
- gr.update(visible=True),
991
- gr.update(interactive=True)
992
- )
993
- else:
994
- return (
995
- gr.update(value="""
996
- <div class="status-indicator">
997
- <p style="margin: 0; font-weight: 500;">🎤 Ready to Record</p>
998
- <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
999
- </div>
1000
- """),
1001
- gr.update(visible=False),
1002
- gr.update(visible=False),
1003
- gr.update(interactive=False)
1004
- )
1005
-
1006
- def clear_recording():
1007
- return (
1008
- None,
1009
- gr.update(value="""
1010
- <div class="status-indicator">
1011
- <p style="margin: 0; font-weight: 500;">🎤 Ready to Record</p>
1012
- <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to start</p>
1013
- </div>
1014
- """),
1015
- gr.update(visible=False),
1016
- gr.update(visible=False),
1017
- gr.update(interactive=False),
1018
- """
1019
- <div style="text-align: center; color: #6b7280;">
1020
- <h4>Analysis Results</h4>
1021
- <p>Your feedback will appear here.</p>
1022
- <div class="status-indicator">
1023
- <p style="margin: 0; font-size: 14px;">💡 Record yourself reading to get started!</p>
1024
- </div>
1025
- </div>
1026
- """,
1027
- """
1028
- <div style="text-align: center; color: #6b7280;">
1029
- <h4>Word-by-Word Analysis</h4>
1030
- <p>Get color-coded feedback below.</p>
1031
- <div class="status-indicator">
1032
- <p style="margin: 0; font-size: 14px;">🎤 Complete a reading practice session to see your analysis!</p>
1033
- </div>
1034
- </div>
1035
- """
1036
- )
1037
-
1038
- def record_again_action():
1039
- return (
1040
- None,
1041
- gr.update(value="""
1042
- <div class="status-indicator status-warning">
1043
- <p style="margin: 0; font-weight: 500;">🔄 Ready for Take 2!</p>
1044
- <p style="margin: 5px 0 0 0; font-size: 12px;">Click microphone to record again</p>
1045
- </div>
1046
- """),
1047
- gr.update(visible=False),
1048
- gr.update(visible=False),
1049
- gr.update(interactive=False)
1050
- )
1051
-
1052
- # Event handlers - Generate story first, then audio separately
1053
- gen_btn.click(
1054
- fn=generate_story_from_llm,
1055
- inputs=[s_name, s_grade, s_topic],
1056
- outputs=[passage_out]
1057
- ).then(
1058
- fn=lambda story_text: story_text, # Store story in state immediately
1059
- inputs=[passage_out],
1060
- outputs=[original_passage_state]
1061
- )
1062
-
1063
- # Separate audio generation triggered by story output change
1064
- passage_out.change(
1065
- fn=lambda story_text: text_to_speech_using_space_simple(story_text) if story_text and not any(err in story_text.lower() for err in ["error", "blocked", "couldn't", "api key not configured"]) else None,
1066
- inputs=[passage_out],
1067
- outputs=[audio_out]
1068
- )
1069
-
1070
- assess_btn.click(
1071
- fn=assess_reading_with_analysis,
1072
- inputs=[original_passage_state, stud_audio_in],
1073
- outputs=[analysis_status, feedback_out, highlighted_out]
1074
- )
1075
-
1076
- stud_audio_in.change(
1077
- fn=update_recording_status,
1078
- inputs=[stud_audio_in],
1079
- outputs=[recording_status, record_again_btn, clear_recording_btn, assess_btn]
1080
- )
1081
-
1082
- record_again_btn.click(
1083
- fn=record_again_action,
1084
- outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn]
1085
- )
1086
-
1087
- clear_recording_btn.click(
1088
- fn=clear_recording,
1089
- outputs=[stud_audio_in, recording_status, record_again_btn, clear_recording_btn, assess_btn, feedback_out, highlighted_out]
1090
- )
1091
-
1092
- # Launch the application
1093
- if __name__ == "__main__":
1094
- app.launch(debug=True, share=False)