ParulPandey commited on
Commit
8be64e3
·
verified ·
1 Parent(s): 6636bf4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -382
app.py DELETED
@@ -1,382 +0,0 @@
1
- import gradio as gr
2
- import os
3
- import difflib
4
- from gradio_client import Client, file as gradio_file
5
- import time
6
- import google.generativeai as genai # Import Gemini library
7
-
8
- # --- Configuration & Clients ---
9
-
10
- # Function to initialize Gemini client (handles local env var and HF Secrets)
11
- def configure_gemini_api():
12
- api_key = None
13
- try:
14
- # This will only work if running on Hugging Face Spaces with the secret set
15
- api_key = gr.Secrets.get("GOOGLE_API_KEY")
16
- except AttributeError: # Running locally, gr.Secrets not available
17
- api_key = os.environ.get("GOOGLE_API_KEY")
18
- except FileNotFoundError: # gr.Secrets.get can raise this if no secrets file found
19
- api_key = os.environ.get("GOOGLE_API_KEY")
20
-
21
- if api_key:
22
- try:
23
- genai.configure(api_key=api_key)
24
- return True
25
- except Exception as e:
26
- print(f"Error configuring Gemini API: {e}")
27
- return False
28
- else:
29
- print("WARN: GOOGLE_API_KEY not found in Gradio Secrets or environment. Story generation with Gemini will be disabled.")
30
- return False
31
-
32
- # Configure Gemini API at startup
33
- GEMINI_API_CONFIGURED = configure_gemini_api()
34
-
35
- # Initialize TTS Client for Bark (suno/bark)
36
- try:
37
- bark_tts_client = Client("suno/bark")
38
- except Exception as e:
39
- print(f"Fatal: Could not initialize Bark TTS client (suno/bark): {e}. TTS will not work.")
40
- bark_tts_client = None
41
-
42
- # Initialize STT Client for Whisper (abidlabs/whisper-large-v2)
43
- try:
44
- whisper_stt_client = Client("abidlabs/whisper-large-v2")
45
- except Exception as e:
46
- print(f"Fatal: Could not initialize Whisper STT client (abidlabs/whisper-large-v2): {e}. STT will not work.")
47
- whisper_stt_client = None
48
-
49
- # --- Helper Functions ---
50
-
51
- def generate_story_with_gemini(name, grade, topic):
52
- if not GEMINI_API_CONFIGURED:
53
- return "Google Gemini API key not configured. Story generation is disabled. 🔑"
54
-
55
- try:
56
- # Choose a Gemini model. 'gemini-1.5-flash-latest' is good for speed and general tasks.
57
- # Other options: 'gemini-1.0-pro', 'gemini-1.5-pro-latest' (if available and needed)
58
- model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest") # Or 'gemini-pro'
59
-
60
- prompt = (
61
- f"You are a super friendly and imaginative storyteller for kids. "
62
- f"Please write an exciting and fun short story (around 100-120 words) for a student named {name} who is in Grade {grade}. "
63
- f"The story must be about '{topic}'. "
64
- f"Use simple words and sentences that a Grade {grade} student can easily read aloud and understand. "
65
- f"Make the story engaging and positive. Jump right into the story without any introduction like 'Here is a story for you'."
66
- )
67
-
68
- # For safety, though Gemini has built-in safety settings
69
- safety_settings = [
70
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
71
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
72
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
73
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
74
- ]
75
-
76
- generation_config = genai.types.GenerationConfig(
77
- candidate_count=1,
78
- # stop_sequences=["\n\n\n"], # Optional: if you notice overly long outputs
79
- max_output_tokens=300, # Generous for a 120-word story
80
- temperature=0.75, # For creativity
81
- # top_p=0.9, # Optional: nucleus sampling
82
- # top_k=40 # Optional: top-k sampling
83
- )
84
-
85
- response = model.generate_content(
86
- prompt,
87
- generation_config=generation_config,
88
- safety_settings=safety_settings
89
- )
90
-
91
- if response.candidates and response.candidates[0].content.parts:
92
- story = response.text # .text directly gives the generated string
93
- # Check for safety blocks
94
- if response.prompt_feedback and response.prompt_feedback.block_reason:
95
- return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked by the safety filter (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
96
- if not story.strip(): # Empty response despite no block
97
- return f"Hmm, Gemini gave me a blank page for '{topic}'. Let's try a different topic or try again! ✨"
98
- return story.strip()
99
- else: # No valid candidates or blocked
100
- if response.prompt_feedback and response.prompt_feedback.block_reason:
101
- return f"Oh dear! My story idea for '{topic}' was a bit too wild and got blocked by the safety filter (Reason: {response.prompt_feedback.block_reason}). Let's try a different topic! 😊"
102
- print(f"Gemini API response issue: {response}")
103
- return f"Hmm, Gemini's story magic seems to be on a little break for '{topic}'. Maybe try another topic? 🤔"
104
-
105
- except Exception as e:
106
- print(f"Error generating story with Gemini: {e}")
107
- # Check for common API key related errors
108
- if "API_KEY_INVALID" in str(e) or "API key not valid" in str(e):
109
- return "Oops! The Google Gemini API key seems to be having a problem. Please tell the grown-ups to check it! 🔑"
110
- return f"Oh no! 😟 I had a little trouble dreaming up a story with Gemini. Error: {e}"
111
-
112
-
113
- def text_to_speech_bark(text_to_speak):
114
- if not bark_tts_client:
115
- return "The Bark TTS sound machine isn't working right now. 🛠️ Please tell the grown-ups!"
116
- try:
117
- voice_preset = "v2/en_speaker_7"
118
- job = bark_tts_client.submit(
119
- text_to_speak,
120
- voice_preset,
121
- api_name="/generate_audio"
122
- )
123
- audio_result = job.result(timeout=180)
124
-
125
- if isinstance(audio_result, tuple) and len(audio_result) > 0:
126
- audio_filepath = audio_result[0]
127
- elif isinstance(audio_result, str):
128
- audio_filepath = audio_result
129
- else:
130
- print(f"Unexpected Bark TTS result format: {audio_result}")
131
- return "Hmm, the sound came out a bit funny from Bark. 🤔"
132
- return audio_filepath
133
- except Exception as e:
134
- print(f"Error with Bark TTS (suno/bark): {e}")
135
- if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower() or " सर्वर व्यस्त है" in str(e):
136
- return "The Bark sound machine is super busy with other kids! 인기폭발! очередь! Please try again in a little bit. 🕒"
137
- if "generator" in str(e).lower() and "choices" in str(e).lower():
138
- return f"Oops! Bark had a hiccup with the voice. Chosen: '{voice_preset}'. Maybe try later? Details: {e}"
139
- return f"Oh dear, Bark couldn't make the sound. 🔇 Error: {e}"
140
-
141
- def speech_to_text_whisper_space(audio_filepath):
142
- if not whisper_stt_client:
143
- return "The Whisper listening ears aren't working right now. 🛠️ Please tell the grown-ups!"
144
- if not audio_filepath:
145
- return "Oops! I didn't get any recording to listen to. 🎤"
146
- try:
147
- job = whisper_stt_client.submit(
148
- gradio_file(audio_filepath),
149
- "transcribe",
150
- "English",
151
- api_name="/predict"
152
- )
153
- result_dict = job.result(timeout=120)
154
-
155
- if isinstance(result_dict, dict) and 'text' in result_dict:
156
- return result_dict['text']
157
- elif isinstance(result_dict, str):
158
- return result_dict
159
- else:
160
- print(f"Unexpected Whisper STT result format: {result_dict}")
161
- return "Hmm, I couldn't quite understand the words from Whisper. 🤔"
162
- except Exception as e:
163
- print(f"Error transcribing audio with Whisper Space: {e}")
164
- if "Queue full" in str(e).lower() or "too much pending traffic" in str(e).lower():
165
- return "The Whisper listening ears are super busy! 인기폭발! очередь! Please try again in a bit. 🕒"
166
- return f"Oh no! Whisper had trouble hearing that. 🙉 Error: {e}"
167
-
168
- def clean_text_for_comparison(text):
169
- if not isinstance(text, str): return []
170
- text = text.lower()
171
- punctuation_to_remove = "!\"#$%&()*+,-./:;<=>?@[\\]^_`{|}~"
172
- text = text.translate(str.maketrans('', '', punctuation_to_remove))
173
- return text.split()
174
-
175
- def compare_texts_for_feedback(original_text, student_text):
176
- original_words = clean_text_for_comparison(original_text)
177
- student_words = clean_text_for_comparison(student_text)
178
-
179
- if not student_words:
180
- return "It sounds like you didn't record anything, or maybe it was super quiet! 🤫 Try recording again nice and clear!", ""
181
-
182
- matcher = difflib.SequenceMatcher(None, original_words, student_words, autojunk=False)
183
- feedback_lines = []
184
- highlighted_passage_parts = []
185
-
186
- for tag, i1, i2, j1, j2 in matcher.get_opcodes():
187
- original_segment = original_words[i1:i2]
188
- student_segment = student_words[j1:j2]
189
-
190
- if tag == 'equal':
191
- highlighted_passage_parts.append(" ".join(original_segment))
192
- elif tag == 'replace':
193
- if len(original_segment) == len(student_segment):
194
- for i in range(len(original_segment)):
195
- o_word = original_segment[i]
196
- s_word = student_segment[i]
197
- feedback_lines.append(f"- You said: \"*{s_word}*\" instead of: \"**{o_word}**\"")
198
- highlighted_passage_parts.append(f"~~{o_word}~~ **{s_word}**")
199
- else:
200
- feedback_lines.append(f"- Instead of: \"**{' '.join(original_segment)}**\", you said: \"*{' '.join(student_segment)}*\"")
201
- highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ **{' '.join(student_segment)}**")
202
- elif tag == 'delete':
203
- feedback_lines.append(f"- You missed: \"**{' '.join(original_segment)}**\"")
204
- highlighted_passage_parts.append(f"~~{' '.join(original_segment)}~~ (*skipped*)")
205
- elif tag == 'insert':
206
- feedback_lines.append(f"- You added: \"*{' '.join(student_segment)}*\" (which wasn't in the story)")
207
- highlighted_passage_parts.append(f"(*added:* **{' '.join(student_segment)}**)")
208
-
209
- final_highlighted_text = " ".join(highlighted_passage_parts)
210
-
211
- if not feedback_lines:
212
- return "🎉🥳 WOOHOO! Amazing reading! You got all the words spot on! 🥳🎉", final_highlighted_text
213
- else:
214
- feedback_summary = "Great try! Here are a few words to practice to make it even better:\n" + "\n".join(feedback_lines)
215
- return feedback_summary, final_highlighted_text
216
-
217
- # --- Gradio UI Functions ---
218
- def generate_story_and_audio_for_ui(name, grade, topic, progress=gr.Progress(track_tqdm=True)):
219
- if not name or not grade or not topic:
220
- return "Oops! Please tell me your name, grade, and a fun topic first! 😊", None, gr.update(visible=False), ""
221
-
222
- progress(0.1, desc="📖 Asking Gemini to dream up a cool story for you...")
223
- story_text = generate_story_with_gemini(name, grade, topic) # USE GEMINI FUNCTION
224
-
225
- # Check for Gemini specific error messages or general failure indicators
226
- gemini_error_keywords = ["Gemini API key not configured", "Oh no!", "Oops!", "Hmm,"]
227
- if any(keyword in story_text for keyword in gemini_error_keywords) or not story_text.strip() :
228
- # Keep recording area hidden if story generation failed
229
- return story_text, None, gr.update(visible=False), story_text
230
-
231
- progress(0.5, desc="🎧 Warming up the Bark sound machine... (this can take a moment, like magic!)")
232
- tts_audio_path = text_to_speech_bark(story_text)
233
-
234
- error_conditions_tts = [
235
- "couldn't make the sound", "sound came out a bit funny", "sound machine isn't working",
236
- "sound machine is super busy", "Bark had a hiccup"
237
- ]
238
- if any(err in (tts_audio_path or "") for err in error_conditions_tts):
239
- # Show story, but show TTS error and hide recording parts
240
- return story_text, tts_audio_path, gr.update(visible=False), story_text
241
-
242
- progress(1.0, desc="✅ Story and sound are ready! Let's go!")
243
- return (
244
- story_text,
245
- tts_audio_path,
246
- gr.update(visible=True), # Show recording_assessment_area
247
- story_text # Pass story_text to gr.State
248
- )
249
-
250
- def assess_student_reading_ui(original_passage_state, student_audio_path, progress=gr.Progress(track_tqdm=True)):
251
- if not student_audio_path:
252
- return "🎤 Whoops! Did you forget to record your awesome reading? Try again!", ""
253
- if not original_passage_state:
254
- return "Hmm, I lost the story! 😟 Please generate a new story first.", ""
255
-
256
- progress(0.2, desc="👂 Whisper is listening carefully to your recording...")
257
- transcribed_text = speech_to_text_whisper_space(student_audio_path)
258
-
259
- error_conditions_stt = [
260
- "couldn't understand the words", "had trouble hearing that", "listening ears aren't working",
261
- "listening ears are super busy", "didn't get any recording"
262
- ]
263
- if any(err in (transcribed_text or "") for err in error_conditions_stt):
264
- return transcribed_text, ""
265
-
266
- progress(0.7, desc="🧠 Thinking about the words...")
267
- feedback, highlighted_passage = compare_texts_for_feedback(original_passage_state, transcribed_text)
268
- progress(1.0, desc="⭐ Feedback is ready!")
269
- return feedback, highlighted_passage
270
-
271
- # --- Gradio Interface ---
272
- css = """
273
- body { font-family: 'Comic Sans MS', 'Chalkboard SE', 'Comic Neue', cursive; background-color: #F0F8FF; } /* AliceBlue background */
274
- .gr-button {
275
- background-color: #FF69B4 !important; /* HotPink */
276
- color: white !important;
277
- border-radius: 20px !important;
278
- font-weight: bold !important;
279
- border: 2px solid #FF1493 !important; /* DeepPink border */
280
- box-shadow: 0px 3px 5px rgba(0,0,0,0.2) !important;
281
- }
282
- .gr-button:hover { background-color: #FF1493 !important; } /* DeepPink on hover */
283
- .gr-panel {
284
- border-radius: 15px !important;
285
- box-shadow: 5px 5px 15px rgba(0,0,0,0.1) !important;
286
- background-color: #FFFACD !important; /* LemonChiffon panel background */
287
- border: 2px dashed #FFD700 !important; /* Gold dashed border */
288
- }
289
- label, .gr-checkbox-label { color: #4B0082 !important; font-weight: bold !important; } /* Indigo */
290
- .gr-textbox, .gr-dropdown { border-radius: 10px !important; border: 1px solid #DDA0DD !important; } /* Plum border for inputs */
291
- #student_audio_input audio { background-color: #E6E6FA; border-radius: 10px; } /* Lavender for audio player */
292
- #feedback_output, #highlighted_passage_output {
293
- background-color: #FFFFE0; /* LightYellow */
294
- padding: 15px;
295
- border-radius: 10px;
296
- border: 1px solid #FAFAD2; /* LightGoldenrodYellow */
297
- }
298
- """
299
-
300
- with gr.Blocks(theme=gr.themes.Soft(primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.purple), css=css) as app:
301
- gr.Markdown(
302
- """
303
- <div style="text-align: center; padding: 20px 0;">
304
- <h1 style="color: #FF6347; font-size: 3em; text-shadow: 2px 2px #D3D3D3;">🌈🦄✨ AI Reading Buddy ✨🦄🌈</h1>
305
- <p style="font-size: 1.3em; color: #483D8B;">Let's read a super fun story from Gemini and practice our words!</p>
306
- </div>
307
- """
308
- )
309
-
310
- original_passage_state = gr.State("")
311
-
312
- with gr.Row():
313
- with gr.Column(scale=1):
314
- gr.Markdown("### <span style='color:#DB7093;'>✏️ Tell Me About You!</span>")
315
- student_name_input = gr.Textbox(label="👑 Your Awesome Name:", placeholder="E.g., Princess Lily")
316
- student_grade_input = gr.Dropdown(
317
- label="🧑‍🎓 Your Grade:",
318
- choices=[f"{i}" for i in range(1, 11)],
319
- value="3"
320
- )
321
- topic_input = gr.Textbox(label="🚀 Story Topic Idea:", placeholder="E.g., brave little astronaut")
322
- generate_button = gr.Button(value="🎈 Get My Gemini Story!")
323
-
324
- with gr.Column(scale=2):
325
- gr.Markdown("### <span style='color:#DB7093;'>📖 Your Special Story (from Gemini AI):</span>")
326
- passage_output = gr.Textbox(label="Read this aloud:", lines=10, interactive=False)
327
- gr.Markdown("### <span style='color:#DB7093;'>🔊 Listen to the Story:</span>")
328
- audio_output = gr.Audio(label="Hear how it sounds (with Bark TTS Bark️)", type="filepath")
329
-
330
- gr.Markdown("<hr style='border:1px dashed #FFB6C1;'>") # LightPink dashed separator
331
-
332
- with gr.Row(visible=False) as recording_assessment_area:
333
- with gr.Column(scale=1):
334
- gr.Markdown("### <span style='color:#32CD32;'>🤩 Your Turn to Shine! 🤩</span>")
335
- student_audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Record yourself reading the story! Press the mic, then stop.", elem_id="student_audio_input")
336
- assess_button = gr.Button(value="🧐 Check My Reading!", elem_id="assess_button")
337
-
338
- with gr.Column(scale=2):
339
- gr.Markdown("### <span style='color:#32CD32;'>💡 Word Detective Feedback:</span>")
340
- feedback_output = gr.Markdown(value="Your amazing feedback will pop up here! ✨", elem_id="feedback_output")
341
- highlighted_passage_output = gr.Markdown(value="See your reading journey here! 🗺️", elem_id="highlighted_passage_output")
342
-
343
-
344
- generate_button.click(
345
- fn=generate_story_and_audio_for_ui,
346
- inputs=[student_name_input, student_grade_input, topic_input],
347
- outputs=[
348
- passage_output,
349
- audio_output,
350
- recording_assessment_area, # Directly control visibility of the row
351
- original_passage_state
352
- ]
353
- )
354
-
355
- assess_button.click(
356
- fn=assess_student_reading_ui,
357
- inputs=[original_passage_state, student_audio_input],
358
- outputs=[feedback_output, highlighted_passage_output]
359
- )
360
-
361
- gr.Markdown(
362
- """
363
- ---
364
- <div style="text-align: center; font-size: 0.9em; color: #555;">
365
- Built with ❤️ for the Agentic Demo Track Hackathon! Tag: <code>agent-demo-track</code>
366
- <br>Stories by Google Gemini, voices by Suno Bark @ HF, and listening by Whisper @ HF.
367
- </div>
368
- """
369
- )
370
-
371
- # --- Launching the App ---
372
- if __name__ == "__main__":
373
- if not GEMINI_API_CONFIGURED:
374
- print("🚨 GOOGLE_API_KEY not configured for local testing or failed to initialize!")
375
- print("Please set it: export GOOGLE_API_KEY='your_key_here'")
376
-
377
- if not bark_tts_client:
378
- print("🚨 Bark TTS client (suno/bark) could not be initialized. TTS will not work.")
379
- if not whisper_stt_client:
380
- print("🚨 Whisper STT client (abidlabs/whisper-large-v2) could not be initialized. STT will not work.")
381
-
382
- app.launch(debug=True)