Spaces:
Sleeping
Sleeping
| # MCP-Powered Voice Assistant with Open-Source Tools | |
| # Hugging Face Space Implementation | |
| import gradio as gr | |
| import numpy as np | |
| import sqlite3 | |
| import json | |
| import requests | |
| from PIL import Image | |
| import io | |
| import time | |
| # ------ Mock MCP Server Implementation ------ | |
| class MockMCPServer: | |
| def __init__(self): | |
| self.tools = {} | |
| def register_tool(self, name, func, description): | |
| self.tools[name] = { | |
| "function": func, | |
| "description": description | |
| } | |
| def call_tool(self, tool_name, params): | |
| if tool_name in self.tools: | |
| return self.tools[tool_name]["function"](**params) | |
| return {"error": f"Tool {tool_name} not found"} | |
| # ------ Create Mock MCP Server ------ | |
| mcp_server = MockMCPServer() | |
| # ------ Tool Implementations ------ | |
| def get_recipe_by_ingredients(ingredients): | |
| """Find recipes based on available ingredients""" | |
| # In a real implementation, this would call an API | |
| return { | |
| "recipes": [ | |
| {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"}, | |
| {"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"} | |
| ] | |
| } | |
| def get_recipe_image(recipe_name): | |
| """Generate an image of the finished recipe""" | |
| # In production, this would call a model like Stable Diffusion | |
| return { | |
| "image_url": "https://example.com/recipe-image.jpg", | |
| "alt_text": f"Image of {recipe_name}" | |
| } | |
| def convert_measurements(amount, from_unit, to_unit): | |
| """Convert cooking measurements between units""" | |
| conversions = { | |
| ("tbsp", "tsp"): lambda x: x * 3, | |
| ("cups", "ml"): lambda x: x * 240, | |
| ("oz", "g"): lambda x: x * 28.35 | |
| } | |
| conversion_key = (from_unit.lower(), to_unit.lower()) | |
| if conversion_key in conversions: | |
| return {"result": conversions[conversion_key](amount), "unit": to_unit} | |
| return {"error": "Conversion not supported"} | |
| # ------ Recipe Database ------ | |
| def init_recipe_db(): | |
| conn = sqlite3.connect(':memory:') | |
| c = conn.cursor() | |
| c.execute('''CREATE TABLE recipes | |
| (id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''') | |
| recipes = [ | |
| ("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]), | |
| "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15), | |
| ("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]), | |
| "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30) | |
| ] | |
| c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes) | |
| conn.commit() | |
| return conn | |
| # ------ Voice Processing Functions ------ | |
| def text_to_speech(text): | |
| """Mock TTS function - in real use, replace with actual TTS""" | |
| print(f"[TTS]: {text}") | |
| # Return dummy audio data | |
| return np.zeros(16000, dtype=np.float32), 16000 | |
| def speech_to_text(audio): | |
| """Mock STT function - in real use, replace with actual STT""" | |
| # Return dummy text | |
| return "Show me pancake recipes" | |
| # ------ Agent Logic ------ | |
| def process_query(query, db_conn): | |
| """Process user query using the available tools""" | |
| # Simple intent recognition | |
| if "recipe" in query.lower() or "make" in query.lower(): | |
| # Extract ingredients | |
| ingredients = ["flour", "eggs"] # Simplified extraction | |
| return mcp_server.call_tool( | |
| "get_recipe_by_ingredients", | |
| {"ingredients": ingredients} | |
| ) | |
| elif "image" in query.lower() or "show" in query.lower(): | |
| recipe_name = "Classic Pancakes" # Simplified extraction | |
| return mcp_server.call_tool( | |
| "get_recipe_image", | |
| {"recipe_name": recipe_name} | |
| ) | |
| elif "convert" in query.lower(): | |
| # Simplified extraction | |
| return mcp_server.call_tool( | |
| "convert_measurements", | |
| {"amount": 2, "from_unit": "cups", "to_unit": "ml"} | |
| ) | |
| else: | |
| # Fallback to database search | |
| c = db_conn.cursor() | |
| c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",)) | |
| return c.fetchall() | |
| # ------ Register Tools with MCP Server ------ | |
| mcp_server.register_tool( | |
| "get_recipe_by_ingredients", | |
| get_recipe_by_ingredients, | |
| "Find recipes based on available ingredients" | |
| ) | |
| mcp_server.register_tool( | |
| "get_recipe_image", | |
| get_recipe_image, | |
| "Generate an image of the finished recipe" | |
| ) | |
| mcp_server.register_tool( | |
| "convert_measurements", | |
| convert_measurements, | |
| "Convert cooking measurements between units" | |
| ) | |
| # ------ Initialize System ------ | |
| db_conn = init_recipe_db() | |
| # ------ Gradio Interface ------ | |
| def process_voice_command(audio): | |
| """Process voice command through the agent system""" | |
| # Convert audio to text | |
| query = speech_to_text(audio) | |
| # Process query using agent logic | |
| result = process_query(query, db_conn) | |
| # Generate response text | |
| if isinstance(result, list) and result: | |
| response_text = f"Found {len(result)} recipes:\n" | |
| for item in result: | |
| response_text += f"- {item[1]} ({item[4]} mins)\n" | |
| elif "recipes" in result: | |
| response_text = f"Found {len(result['recipes'])} recipes:\n" | |
| for recipe in result["recipes"]: | |
| response_text += f"- {recipe['name']} ({recipe['time']} mins)\n" | |
| elif "image_url" in result: | |
| response_text = f"Here's an image of {result.get('alt_text', 'the recipe')}" | |
| image = Image.new('RGB', (300, 200), color=(73, 109, 137)) | |
| else: | |
| response_text = str(result) | |
| image = None | |
| # Convert response to audio | |
| audio_data, sr = text_to_speech(response_text) | |
| # Return results | |
| return ( | |
| (sr, audio_data), | |
| response_text, | |
| image if 'image' in locals() else None | |
| ) | |
| # ------ Hugging Face Space UI ------ | |
| with gr.Blocks(title="MCP Culinary Voice Assistant") as demo: | |
| gr.Markdown("# 🧑🍳 MCP-Powered Culinary Voice Assistant (Open-Source)") | |
| gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!") | |
| with gr.Row(): | |
| audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant") | |
| audio_output = gr.Audio(label="Assistant Response", interactive=False) | |
| with gr.Row(): | |
| text_output = gr.Textbox(label="Transcription", interactive=False) | |
| image_output = gr.Image(label="Recipe Image", interactive=False) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Process Command", variant="primary") | |
| submit_btn.click( | |
| fn=process_voice_command, | |
| inputs=[audio_input], | |
| outputs=[audio_output, text_output, image_output] | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["What can I make with eggs and flour?"], | |
| ["Show me how tomato soup looks"], | |
| ["Convert 2 cups to milliliters"] | |
| ], | |
| inputs=[text_output], | |
| label="Example Queries" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |