Spaces:
Sleeping
Sleeping
| # api/chatbot.py | |
| import re | |
| import logging | |
| from typing import Dict | |
| from google import genai | |
| from .config import gemini_flash_api_key | |
| from memory import MemoryManager | |
| from utils import translate_query | |
| from search import search_comprehensive | |
| # Safety guard removed - cooking tutor doesn't need medical safety checks | |
| logger = logging.getLogger("cooking-tutor") | |
| class GeminiClient: | |
| """Gemini API client for generating responses""" | |
| def __init__(self): | |
| self.client = genai.Client(api_key=gemini_flash_api_key) | |
| def generate_content(self, prompt: str, model: str = "gemini-2.5-flash", temperature: float = 0.7) -> str: | |
| """Generate content using Gemini API""" | |
| try: | |
| response = self.client.models.generate_content(model=model, contents=prompt) | |
| return response.text | |
| except Exception as e: | |
| logger.error(f"[LLM] ❌ Error calling Gemini API: {e}") | |
| return "Error generating response from Gemini." | |
| class CookingTutorChatbot: | |
| """Cooking tutor chatbot that uses only web search + memory.""" | |
| def __init__(self, model_name: str): | |
| self.model_name = model_name | |
| self.gemini_client = GeminiClient() | |
| self.memory = MemoryManager() | |
| def chat( | |
| self, | |
| user_id: str, | |
| user_query: str, | |
| lang: str = "EN", | |
| search_mode: bool = True, | |
| video_mode: bool = False, | |
| servings: int = None, | |
| dietary: list = None, | |
| allergens: list = None, | |
| equipment: list = None, | |
| time_limit_minutes: int = None, | |
| skill_level: str = None, | |
| cuisine: str = None, | |
| structured: bool = False, | |
| ) -> str: | |
| # Translate to English-centric search if needed | |
| if lang.upper() in {"VI", "ZH"}: | |
| user_query = translate_query(user_query, lang.lower()) | |
| # Basic cooking relevance check | |
| cooking_keywords = ['recipe', 'cooking', 'baking', 'food', 'ingredient', 'kitchen', 'chef', 'meal', 'dish', 'cuisine', 'cook', 'bake', 'roast', 'grill', 'fry', 'boil', 'steam', 'season', 'spice', 'herb', 'sauce', 'marinade', 'dressing', 'appetizer', 'main course', 'dessert', 'breakfast', 'lunch', 'dinner'] | |
| query_lower = user_query.lower() | |
| if not any(keyword in query_lower for keyword in cooking_keywords): | |
| logger.warning(f"[SAFETY] Non-cooking query detected: {user_query}") | |
| return "⚠️ I'm a cooking tutor! Please ask me about recipes, cooking techniques, ingredients, or anything food-related." | |
| # Conversation memory (recent turns) | |
| contextual_chunks = self.memory.get_contextual_chunks(user_id, user_query, lang) | |
| # Web search context | |
| search_context = "" | |
| url_mapping = {} | |
| source_aggregation = {} | |
| video_results = [] | |
| if search_mode: | |
| try: | |
| search_context, url_mapping, source_aggregation = search_comprehensive( | |
| f"cooking technique tutorial: {user_query}", | |
| num_results=12, | |
| target_language=lang, | |
| include_videos=bool(video_mode) | |
| ) | |
| if video_mode and source_aggregation: | |
| video_results = source_aggregation.get('sources', []) or [] | |
| except Exception as e: | |
| logger.error(f"[SEARCH] Failed: {e}") | |
| # Build prompt | |
| parts = [ | |
| "You are a professional cooking tutor and recipe coach.", | |
| "Provide step-by-step, practical instructions with exact measurements, temperatures, and timings.", | |
| "Offer substitutions, variations, pantry-friendly swaps, and troubleshooting tips.", | |
| "Adapt guidance to different skill levels (beginner/intermediate/advanced).", | |
| "Use Markdown with headings, numbered steps, bullet lists, and short paragraphs.", | |
| "Always include a concise Ingredients list when relevant.", | |
| "Cite sources inline using <#ID> tags already present in the search context when applicable.", | |
| ] | |
| # Constraints block | |
| constraints = [] | |
| if servings: | |
| constraints.append(f"Servings: {servings}") | |
| if dietary: | |
| constraints.append(f"Dietary preferences: {', '.join(dietary)}") | |
| if allergens: | |
| constraints.append(f"Avoid allergens: {', '.join(allergens)}") | |
| if equipment: | |
| constraints.append(f"Available equipment: {', '.join(equipment)}") | |
| if time_limit_minutes: | |
| constraints.append(f"Time limit: {time_limit_minutes} minutes") | |
| if skill_level: | |
| constraints.append(f"Skill level: {skill_level}") | |
| if cuisine: | |
| constraints.append(f"Cuisine: {cuisine}") | |
| if constraints: | |
| parts.append("Constraints to respect:\n- " + "\n- ".join(constraints)) | |
| if contextual_chunks: | |
| parts.append("Relevant context from previous messages:\n" + contextual_chunks) | |
| if search_context: | |
| parts.append("Cooking knowledge from the web (with citations):\n" + search_context) | |
| parts.append(f"User's cooking question: {user_query}") | |
| parts.append(f"Language to generate answer: {lang}") | |
| if structured: | |
| parts.append( | |
| "Return a Markdown response with these sections if relevant:" | |
| "\n1. Title" | |
| "\n2. Summary (2-3 sentences)" | |
| "\n3. Ingredients (quantities in metric and US units)" | |
| "\n4. Equipment" | |
| "\n5. Step-by-step Instructions (numbered)" | |
| "\n6. Timing & Temperatures" | |
| "\n7. Variations & Substitutions" | |
| "\n8. Troubleshooting & Doneness Cues" | |
| "\n9. Storage & Reheating" | |
| "\n10. Sources" | |
| ) | |
| prompt = "\n\n".join(parts) | |
| response = self.gemini_client.generate_content(prompt, model=self.model_name, temperature=0.6) | |
| # Process citations | |
| if url_mapping: | |
| response = self._process_citations(response, url_mapping) | |
| # Basic cooking relevance check for response | |
| if response and len(response) > 50: | |
| response_lower = response.lower() | |
| if not any(keyword in response_lower for keyword in cooking_keywords): | |
| logger.warning(f"[SAFETY] Non-cooking response detected, redirecting to cooking topic") | |
| response = "⚠️ Let's stick to cooking-related topics. Try asking about recipes, techniques, or ingredients!" | |
| if user_id: | |
| self.memory.add_exchange(user_id, user_query, response, lang=lang) | |
| if video_mode and video_results: | |
| return { | |
| 'text': response.strip(), | |
| 'videos': video_results | |
| } | |
| return response.strip() | |
| def _process_citations(self, response: str, url_mapping: Dict[int, str]) -> str: | |
| """Replace citation tags with actual URLs, handling both single and multiple references""" | |
| # Pattern to match both single citations <#1> and multiple citations <#1, #2, #5, #7, #9> | |
| citation_pattern = r'<#([^>]+)>' | |
| def replace_citation(match): | |
| citation_content = match.group(1) | |
| # Split by comma and clean up each citation ID | |
| citation_ids = [id_str.strip() for id_str in citation_content.split(',')] | |
| urls = [] | |
| for citation_id in citation_ids: | |
| try: | |
| doc_id = int(citation_id) | |
| if doc_id in url_mapping: | |
| url = url_mapping[doc_id] | |
| urls.append(f'<{url}>') | |
| logger.info(f"[CITATION] Replacing <#{doc_id}> with {url}") | |
| else: | |
| logger.warning(f"[CITATION] No URL mapping found for document ID {doc_id}") | |
| urls.append(f'<#{doc_id}>') # Keep original if URL not found | |
| except ValueError: | |
| logger.warning(f"[CITATION] Invalid citation ID: {citation_id}") | |
| urls.append(f'<#{citation_id}>') # Keep original if invalid | |
| # Join multiple URLs with spaces | |
| return ' '.join(urls) | |
| # Replace citations with URLs | |
| processed_response = re.sub(citation_pattern, replace_citation, response) | |
| # Count total citations processed | |
| citations_found = re.findall(citation_pattern, response) | |
| total_citations = sum(len([id_str.strip() for id_str in citation_content.split(',')]) | |
| for citation_content in citations_found) | |
| logger.info(f"[CITATION] Processed {total_citations} citations from {len(citations_found)} citation groups, {len(url_mapping)} URL mappings available") | |
| return processed_response | |