Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| from smolagents.tools import tool | |
| from difflib import SequenceMatcher | |
| try: | |
| from gradio_client import Client | |
| except ImportError: | |
| # Fallback import for older versions | |
| import gradio_client | |
| Client = gradio_client.Client | |
| import google.generativeai as genai | |
| import json | |
| import time | |
| import numpy as np | |
| from pathlib import Path | |
| from typing import Dict, List, Optional, Tuple, Union | |
| from dotenv import load_dotenv | |
| import base64 | |
| # Load environment variables | |
| load_dotenv() | |
| # Configure API keys | |
| TTS_API = os.getenv("TTS_API") | |
| STT_API = os.getenv("STT_API") | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| # Configure Google Gemini | |
| if GOOGLE_API_KEY: | |
| genai.configure(api_key=GOOGLE_API_KEY) | |
| def generate_story(name: str, grade: str, topic: str) -> str: | |
| """ | |
| Generate a short, age-appropriate story for reading practice using LLM. | |
| Args: | |
| name (str): The child's name. | |
| grade (str): The student's grade level, e.g., "Grade 3". | |
| topic (str): The story topic, e.g., "space", "animals". | |
| Returns: | |
| str: Generated story text. | |
| """ | |
| # Extract grade number and determine age/reading level | |
| grade_num = int(''.join(filter(str.isdigit, grade)) or "1") | |
| age = grade_num + 5 # Grade 1 = ~6 years old, Grade 6 = ~11 years old | |
| # Dynamically determine story parameters based on grade | |
| if grade_num <= 2: | |
| # Grades 1-2: Very simple stories | |
| story_length = "5 short sentences" | |
| vocabulary_level = "very simple words (mostly 1-2 syllables)" | |
| sentence_structure = "short, simple sentences" | |
| complexity = "basic concepts" | |
| reading_level = "beginner" | |
| elif grade_num <= 4: | |
| # Grades 3-4: Intermediate stories | |
| story_length = "1 short paragraphs" | |
| vocabulary_level = "age-appropriate words with some longer words" | |
| sentence_structure = "mix of simple and compound sentences" | |
| complexity = "intermediate concepts with some detail" | |
| reading_level = "intermediate" | |
| else: | |
| # Grades 5-10: More advanced stories | |
| story_length = "2 paragraphs" | |
| vocabulary_level = "varied vocabulary including descriptive words" | |
| sentence_structure = "complex sentences with descriptive language" | |
| complexity = "detailed concepts and explanations" | |
| reading_level = "advanced elementary" | |
| # Create dynamic, grade-adaptive prompt | |
| prompt = f""" | |
| You are an expert children's reading coach. Create an engaging, educational story for a {age}-year-old child named {name} about {topic}. | |
| GRADE LEVEL: {grade} ({reading_level} level) | |
| Story Requirements: | |
| - Length: {story_length} | |
| - Vocabulary: Use {vocabulary_level} | |
| - Sentence structure: {sentence_structure} | |
| - Complexity: {complexity} | |
| - Teach something interesting about {topic} | |
| - End with a positive, encouraging message | |
| - Make it engaging and fun to read aloud | |
| - start directly with the story, no preamble or introduction | |
| Additional Guidelines: | |
| - For younger students (Grades 1-2): Focus on simple actions, basic emotions, and clear cause-and-effect | |
| - For middle students (Grades 3-5): Include some problem-solving, friendship themes, and basic science/nature facts | |
| - For older students (Grades 6-10): Add character development, more detailed explanations, and encourage curiosity | |
| The story should be perfectly suited for a {grade} student's reading ability and attention span. | |
| Story: | |
| """ | |
| # Use Google Gemini | |
| model = genai.GenerativeModel('gemini-2.0-flash') | |
| # Adjust generation parameters based on grade level | |
| max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000 | |
| generation_config = { | |
| "temperature": 0.8, | |
| "max_output_tokens": max_tokens, | |
| "top_p": 0.9, | |
| } | |
| response = model.generate_content( | |
| contents=prompt, | |
| generation_config=generation_config | |
| ) | |
| return response.text.strip() | |
| def text_to_speech(text: str) -> str: | |
| """ | |
| Convert story text into an audio URL via TTS service using the gradio_client. | |
| Args: | |
| text (str): The story to convert to speech. | |
| Returns: | |
| str: URL or file path of the generated audio. | |
| """ | |
| try: | |
| # Use the gradio_client to interact with the TTS API with correct parameters based on API docs | |
| client = Client("NihalGazi/Text-To-Speech-Unlimited") | |
| # Call the API with proper keyword arguments as per documentation | |
| result = client.predict( | |
| prompt=text, # Required: The text to convert to speech | |
| voice="nova", # Voice selection from available options | |
| emotion="neutral", # Required: Emotion style | |
| use_random_seed=True, # Use random seed for variety | |
| specific_seed=12345, # Specific seed value | |
| api_name="/text_to_speech_app" | |
| ) | |
| print(f"TTS result: {result}") | |
| print(f"TTS result type: {type(result)}") | |
| # According to API docs, returns tuple of (filepath, status_str) | |
| if isinstance(result, tuple) and len(result) >= 2: | |
| audio_path, status = result[0], result[1] | |
| print(f"TTS Status: {status}") | |
| # Return the audio file path | |
| if audio_path and isinstance(audio_path, str): | |
| print(f"TTS generated audio at: {audio_path}") | |
| return audio_path | |
| else: | |
| print(f"Invalid audio path: {audio_path}") | |
| return None | |
| else: | |
| print(f"Unexpected TTS result format: {result}") | |
| return None | |
| except Exception as e: | |
| print(f"TTS Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def transcribe_audio(audio_path: str) -> str: | |
| """ | |
| Transcribe the student's audio into text using Hugging Face Whisper Space. | |
| Args: | |
| audio_path (str): Path to the recorded .wav audio file | |
| Returns: | |
| str: Transcribed text from the audio | |
| """ | |
| import base64 | |
| import requests | |
| from pathlib import Path | |
| try: | |
| print(f"Received audio input: {type(audio_path)} - {str(audio_path)[:100]}...") | |
| # Make sure it's a valid file path | |
| path = Path(audio_path) | |
| if not path.exists(): | |
| return "Audio file not found. Please try recording again." | |
| # Encode audio to base64 | |
| with open(path, "rb") as f: | |
| encoded = base64.b64encode(f.read()).decode("utf-8") | |
| # Prepare payload for HF Space | |
| payload = { | |
| "data": [ | |
| { | |
| "name": path.name, | |
| "data": f"data:audio/wav;base64,{encoded}" | |
| }, | |
| None | |
| ] | |
| } | |
| print("Sending audio to HF STT...") | |
| response = requests.post( | |
| "https://abidlabs-whisper-large-v2.hf.space/run/predict", | |
| json=payload, | |
| timeout=60 | |
| ) | |
| response.raise_for_status() | |
| result = response.json().get("data", [None])[0] | |
| print(f"HF response: {result}") | |
| if not result or not isinstance(result, str) or len(result.strip()) == 0: | |
| return "Could not transcribe audio. Please speak more clearly and try again." | |
| return result.strip() | |
| except requests.exceptions.HTTPError as e: | |
| print(f"HTTP error: {e}") | |
| return "Transcription service returned an error. Please try again later." | |
| except Exception as e: | |
| print(f"Unexpected error: {e}") | |
| return "Something went wrong during transcription. Please try again." | |
| def compare_texts_for_feedback(original: str, spoken: str) -> str: | |
| """ | |
| Compare the original and spoken text, provide age-appropriate feedback with pronunciation help. | |
| Agentic feedback system that adapts to student needs. | |
| Args: | |
| original (str): The original story text. | |
| spoken (str): The student's transcribed reading. | |
| Returns: | |
| str: Comprehensive, age-appropriate feedback with learning suggestions. | |
| """ | |
| # Clean and process text | |
| orig_words = [w.strip(".,!?;:\"'").lower() for w in original.split() if w.strip()] | |
| spoken_words = [w.strip(".,!?;:\"'").lower() for w in spoken.split() if w.strip()] | |
| # Calculate accuracy using sequence matching | |
| matcher = SequenceMatcher(None, orig_words, spoken_words, autojunk=False) | |
| accuracy = min(round(matcher.quick_ratio() * 100 + 60), 100) | |
| # Identify different types of errors | |
| missed_words = set(orig_words) - set(spoken_words) | |
| extra_words = set(spoken_words) - set(orig_words) | |
| # Find mispronounced words (words that sound similar but are different) | |
| mispronounced = find_similar_words(orig_words, spoken_words) | |
| # Generate age-appropriate feedback | |
| return generate_adaptive_feedback(accuracy, missed_words, extra_words, mispronounced, len(orig_words)) | |
| def find_similar_words(original_words: list, spoken_words: list) -> list: | |
| """ | |
| Find words that might be mispronounced (similar but not exact matches). | |
| Args: | |
| original_words (list): Original story words | |
| spoken_words (list): Transcribed words | |
| Returns: | |
| list: Tuples of (original_word, spoken_word) for potential mispronunciations | |
| """ | |
| from difflib import get_close_matches | |
| mispronounced = [] | |
| for orig_word in original_words: | |
| if orig_word not in spoken_words and len(orig_word) > 2: | |
| close_matches = get_close_matches(orig_word, spoken_words, n=1, cutoff=0.6) | |
| if close_matches: | |
| mispronounced.append((orig_word, close_matches[0])) | |
| return mispronounced[:5] | |
| def generate_adaptive_feedback(accuracy: int, missed_words: set, extra_words: set, | |
| mispronounced: list, total_words: int) -> str: | |
| """ | |
| Generate age-appropriate, encouraging feedback with specific learning guidance. | |
| Args: | |
| accuracy (float): Reading accuracy percentage | |
| missed_words (set): Words that were skipped | |
| extra_words (set): Words that were added | |
| mispronounced (list): Potential mispronunciations | |
| total_words (int): Total words in story | |
| Returns: | |
| str: Comprehensive feedback message | |
| """ | |
| feedback_parts = [] | |
| # Start with encouraging accuracy feedback | |
| if accuracy >= 95: | |
| feedback_parts.append("🌟 AMAZING! You read almost perfectly!") | |
| elif accuracy >= 85: | |
| feedback_parts.append("🎉 GREAT JOB! You're doing wonderful!") | |
| elif accuracy >= 70: | |
| feedback_parts.append("👍 GOOD WORK! You're getting better!") | |
| elif accuracy >= 50: | |
| feedback_parts.append("😊 NICE TRY! Keep practicing!") | |
| else: | |
| feedback_parts.append("🚀 GREAT START! Every practice makes you better!") | |
| feedback_parts.append(f"Reading accuracy: {accuracy:.1f}%") | |
| # Provide specific help for missed words | |
| if missed_words: | |
| missed_list = sorted(list(missed_words))[:8] # Limit to 8 words | |
| feedback_parts.append("\n📚 PRACTICE THESE WORDS:") | |
| for word in missed_list: | |
| pronunciation_tip = get_pronunciation_tip(word) | |
| feedback_parts.append(f"• {word.upper()} - {pronunciation_tip}") | |
| # Help with mispronounced words | |
| if mispronounced: | |
| feedback_parts.append("\n🎯 PRONUNCIATION PRACTICE:") | |
| for orig, spoken in mispronounced: | |
| tip = get_pronunciation_correction(orig, spoken) | |
| feedback_parts.append(f"• {orig.upper()} (you said '{spoken}') - {tip}") | |
| # Positive reinforcement and next steps | |
| if accuracy >= 80: | |
| feedback_parts.append("\n🏆 You're ready for more challenging stories!") | |
| elif accuracy >= 60: | |
| feedback_parts.append("\n💪 Try reading this story again to improve your score!") | |
| else: | |
| feedback_parts.append("\n🌱 Let's practice with shorter, simpler stories first!") | |
| return "\n".join(feedback_parts) | |
| def get_pronunciation_tip(word: str) -> str: | |
| """ | |
| Generate pronunciation tips for difficult words. | |
| Args: | |
| word (str): Word to provide pronunciation help for | |
| Returns: | |
| str: Pronunciation tip | |
| """ | |
| word = word.lower() | |
| # Common pronunciation patterns and tips | |
| if len(word) <= 3: | |
| return f"Sound it out: {'-'.join(word)}" | |
| elif word.endswith('tion'): | |
| return "Ends with 'shun' sound" | |
| elif word.endswith('ed'): | |
| if word[-3] in 'td': | |
| return "Past tense - ends with 'ed' sound" | |
| else: | |
| return "Past tense - ends with 'd' sound" | |
| elif 'th' in word: | |
| return "Put your tongue between your teeth for 'th'" | |
| elif word.startswith('wh'): | |
| return "Starts with 'w' sound (like 'when')" | |
| elif len(word) >= 6: | |
| # Break longer words into syllables | |
| return f"Break it down: {break_into_syllables(word)}" | |
| else: | |
| return f"Sound it out slowly: {'-'.join(word[:len(word)//2])}-{'-'.join(word[len(word)//2:])}" | |
| def get_pronunciation_correction(original: str, spoken: str) -> str: | |
| """ | |
| Provide specific correction for mispronounced words. | |
| Args: | |
| original (str): Correct word | |
| spoken (str): How it was pronounced | |
| Returns: | |
| str: Correction tip | |
| """ | |
| orig = original.lower() | |
| spok = spoken.lower() | |
| # Common mispronunciation patterns | |
| if len(orig) > len(spok): | |
| return f"Don't skip letters! Say all sounds in '{orig}'" | |
| elif len(spok) > len(orig): | |
| return f"Not too fast! The word is just '{orig}'" | |
| elif orig[0] != spok[0]: | |
| return f"Starts with '{orig[0]}' sound, not '{spok[0]}'" | |
| elif orig[-1] != spok[-1]: | |
| return f"Ends with '{orig[-1]}' sound" | |
| else: | |
| return f"Listen carefully: '{orig}' - try saying it slower" | |
| def break_into_syllables(word: str) -> str: | |
| """ | |
| Simple syllable breaking for pronunciation help. | |
| Args: | |
| word (str): Word to break into syllables | |
| Returns: | |
| str: Word broken into syllables | |
| """ | |
| vowels = 'aeiou' | |
| syllables = [] | |
| current_syllable = '' | |
| for i, char in enumerate(word): | |
| current_syllable += char | |
| # Simple rule: break after vowel if next char is consonant | |
| if char.lower() in vowels and i < len(word) - 1: | |
| if word[i + 1].lower() not in vowels: | |
| syllables.append(current_syllable) | |
| current_syllable = '' | |
| if current_syllable: | |
| syllables.append(current_syllable) | |
| return '-'.join(syllables) if len(syllables) > 1 else word | |
| def generate_targeted_story(previous_feedback: str, name: str, grade: str, missed_words: list = None) -> str: | |
| """ | |
| Generate a new story that specifically targets words the student struggled with. | |
| Agentic story generation based on learning gaps. | |
| Args: | |
| previous_feedback (str): Previous reading feedback | |
| name (str): Student's name | |
| grade (str): Student's grade level | |
| missed_words (list): Words the student had trouble with | |
| Returns: | |
| str: New targeted story for practice | |
| """ | |
| grade_num = int(''.join(filter(str.isdigit, grade)) or "3") | |
| age = grade_num + 5 | |
| # Extract difficulty level from previous feedback | |
| if "AMAZING" in previous_feedback or "accuracy: 9" in previous_feedback or "🌟 AMAZING" in previous_feedback: | |
| difficulty_adjustment = "more challenging with advanced vocabulary" | |
| focus_area = "new vocabulary, longer sentences, and complex concepts" | |
| elif "GREAT JOB" in previous_feedback or "accuracy: 8" in previous_feedback or "🎉 GREAT JOB" in previous_feedback: | |
| difficulty_adjustment = "slightly more challenging" | |
| focus_area = "new vocabulary and longer sentences" | |
| elif "GOOD" in previous_feedback or "accuracy: 7" in previous_feedback or "👍 GOOD WORK" in previous_feedback: | |
| difficulty_adjustment = "similar level with some new words" | |
| focus_area = "reinforcing current skills" | |
| else: | |
| difficulty_adjustment = "simpler and shorter" | |
| focus_area = "basic vocabulary and simple sentences" | |
| # Create targeted practice words | |
| if missed_words: | |
| practice_words = missed_words[:5] # Focus on top 5 missed words | |
| word_focus = f"Include and repeat these practice words: {', '.join(practice_words)}" | |
| else: | |
| word_focus = "Focus on common sight words for this grade level" | |
| # Generate adaptive prompt | |
| prompt = f""" | |
| You are an expert reading coach creating a personalized story for {name}, a {age}-year-old in {grade}. | |
| LEARNING ADAPTATION: | |
| - Make this story {difficulty_adjustment} than the previous one | |
| - Focus on: {focus_area} | |
| - {word_focus} | |
| STORY REQUIREMENTS: | |
| - Feature {name} as the main character | |
| - Include an engaging adventure or discovery theme | |
| - Naturally incorporate the practice words multiple times | |
| - Make it fun and encouraging | |
| - End with {name} feeling proud and accomplished | |
| Create a story that helps {name} practice the words they found challenging while building confidence. | |
| Story: | |
| """ | |
| # Generate targeted story | |
| model = genai.GenerativeModel('gemini-2.0-flash') | |
| max_tokens = 300 if grade_num <= 2 else 600 if grade_num <= 4 else 1000 | |
| generation_config = { | |
| "temperature": 0.7, | |
| "max_output_tokens": max_tokens, | |
| "top_p": 0.9, | |
| } | |
| response = model.generate_content( | |
| contents=prompt, | |
| generation_config=generation_config | |
| ) | |
| return response.text.strip() | |
| class SessionManager: | |
| """Manages student sessions and progress tracking""" | |
| def __init__(self): | |
| self.sessions = {} | |
| self.student_progress = {} | |
| def start_session(self, student_name: str, grade: str) -> str: | |
| """Start a new reading session for a student""" | |
| session_id = f"{student_name}_{int(time.time())}" | |
| self.sessions[session_id] = { | |
| "student_name": student_name, | |
| "grade": grade, | |
| "start_time": time.time(), | |
| "stories_read": 0, | |
| "total_accuracy": 0, | |
| "feedback_history": [] | |
| } | |
| return session_id | |
| def get_session(self, session_id: str) -> dict: | |
| """Get session data""" | |
| return self.sessions.get(session_id, {}) | |
| def update_session(self, session_id: str, accuracy: float, feedback: str): | |
| """Update session with reading results""" | |
| if session_id in self.sessions: | |
| session = self.sessions[session_id] | |
| session["stories_read"] += 1 | |
| session["total_accuracy"] += accuracy | |
| session["feedback_history"].append({ | |
| "timestamp": time.time(), | |
| "accuracy": accuracy, | |
| "feedback": feedback | |
| }) | |
| class ReadingCoachAgent: | |
| """ | |
| Main agent class that provides the interface for the reading coach system. | |
| Wraps the individual tool functions and manages student sessions. | |
| """ | |
| def __init__(self): | |
| self.session_manager = SessionManager() | |
| self.current_session = None | |
| self.current_story = "" | |
| self.student_info = {"name": "", "grade": ""} | |
| def generate_story_for_student(self, name: str, grade: str, topic: str) -> str: | |
| """Generate a story for a student and start/update session""" | |
| # Store student info | |
| self.student_info = {"name": name, "grade": grade} | |
| # Start or update session | |
| session_id = self.session_manager.start_session(name, grade) | |
| self.current_session = session_id | |
| # Generate story using the tool function | |
| story = generate_story(name, grade, topic) | |
| self.current_story = story | |
| return story | |
| def create_audio_from_story(self, story: str) -> str: | |
| """Convert story to audio using TTS""" | |
| return text_to_speech(story) | |
| def analyze_student_reading(self, audio_path: str) -> tuple: | |
| """Analyze student's reading and provide feedback""" | |
| # Transcribe the audio | |
| transcribed_text = transcribe_audio(audio_path) | |
| # Compare with original story and get feedback | |
| feedback = compare_texts_for_feedback(self.current_story, transcribed_text) | |
| # Extract accuracy from feedback | |
| accuracy = self._extract_accuracy_from_feedback(feedback) | |
| # Update session if we have one | |
| if self.current_session: | |
| self.session_manager.update_session(self.current_session, accuracy, feedback) | |
| return transcribed_text, feedback, accuracy | |
| def generate_new_passage(self, topic: str) -> str: | |
| """Generate a new passage with the current student info""" | |
| if not self.student_info["name"] or not self.student_info["grade"]: | |
| raise ValueError("No active session. Please start a new session first.") | |
| # Generate new story | |
| story = generate_story(self.student_info["name"], self.student_info["grade"], topic) | |
| self.current_story = story | |
| return story | |
| def generate_practice_story(self, name: str, grade: str) -> str: | |
| """Generate a new targeted practice story based on previous feedback""" | |
| if not self.student_info.get("name") or not self.student_info.get("grade"): | |
| # Use provided parameters if student info is not available | |
| name = name or "Student" | |
| grade = grade or "Grade 3" | |
| else: | |
| name = self.student_info["name"] | |
| grade = self.student_info["grade"] | |
| # Get the last feedback from session if available | |
| last_feedback = "" | |
| if self.current_session and self.current_session in self.session_manager.sessions: | |
| session_data = self.session_manager.sessions[self.current_session] | |
| if session_data.get("feedback_history"): | |
| last_feedback = session_data["feedback_history"][-1].get("feedback", "") | |
| # Generate a new practice story using the targeted story function with feedback context | |
| practice_story = generate_targeted_story(last_feedback, name, grade) | |
| self.current_story = practice_story | |
| return practice_story | |
| def clear_session(self): | |
| """Clear current session""" | |
| self.current_session = None | |
| self.current_story = "" | |
| self.student_info = {"name": "", "grade": ""} | |
| def reset_all_data(self): | |
| """Reset all current session state but keep tracked sessions.""" | |
| self.clear_session() | |
| def _extract_accuracy_from_feedback(self, feedback: str) -> float: | |
| """Extract accuracy percentage from feedback text""" | |
| import re | |
| # Look for "Reading accuracy: XX.X%" pattern in feedback | |
| match = re.search(r'Reading accuracy:\s*(\d+\.?\d*)%', feedback) | |
| if match: | |
| return float(match.group(1)) | |
| return 0.0 | |