Spaces:
Running
Running
| import os | |
| import whisper | |
| import requests | |
| from flask import Flask, request, jsonify, render_template | |
| from dotenv import load_dotenv | |
| from deepgram import DeepgramClient, PrerecordedOptions | |
| import tempfile | |
| import json | |
| import subprocess | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import warnings | |
| warnings.filterwarnings("ignore", message="FP16 is not supported on CPU; using FP32 instead") | |
| app = Flask(__name__) | |
| print("APP IS RUNNING, ANIKET") | |
| # Load the .env file | |
| load_dotenv() | |
| print("ENV LOADED, ANIKET") | |
| # Fetch the API key from the .env file | |
| API_KEY = os.getenv("FIRST_API_KEY") | |
| DEEPGRAM_API_KEY = os.getenv("SECOND_API_KEY") | |
| # Ensure the API key is loaded correctly | |
| if not API_KEY: | |
| raise ValueError("API Key not found. Make sure it is set in the .env file.") | |
| if not DEEPGRAM_API_KEY: | |
| raise ValueError("DEEPGRAM_API_KEY not found. Make sure it is set in the .env file.") | |
| GEMINI_API_ENDPOINT = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent" | |
| GEMINI_API_KEY = API_KEY | |
| def health_check(): | |
| return jsonify({"status": "success", "message": "API is running successfully!"}), 200 | |
| def download_audio(url, temp_video_path): | |
| """Download video (MP4 format) from the given URL and save it to temp_video_path.""" | |
| response = requests.get(url, stream=True) | |
| if response.status_code == 200: | |
| with open(temp_video_path, 'wb') as f: | |
| for chunk in response.iter_content(chunk_size=1024): | |
| f.write(chunk) | |
| print(f"Audio downloaded successfully to {temp_video_path}") | |
| else: | |
| raise Exception(f"Failed to download audio, status code: {response.status_code}") | |
| def process_video(): | |
| if 'videoUrl' not in request.json: | |
| return jsonify({"error": "No video URL provided"}), 400 | |
| video_url = request.json['videoUrl'] | |
| temp_video_path = None | |
| try: | |
| # Step 1: Download the WAV file from the provided URL | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file: | |
| temp_video_path = temp_video_file.name | |
| download_audio(video_url, temp_video_path) | |
| # Step 2: get the information from the downloaded MP4 file synchronously | |
| video_info = get_information_from_video_using_OCR(temp_video_path) | |
| if not video_info: | |
| return jsonify({"error": "video information extraction failed"}), 500 | |
| # Step 3: Generate structured recipe information using Gemini API synchronously | |
| structured_data = query_gemini_api(video_info) | |
| return jsonify(structured_data) | |
| except Exception as e: | |
| return jsonify({"error": str(e)}), 500 | |
| finally: | |
| # Clean up temporary audio file | |
| if temp_video_path and os.path.exists(temp_video_path): | |
| os.remove(temp_video_path) | |
| print(f"Temporary audio file deleted: {temp_video_path}") | |
| def query_gemini_api(transcription): | |
| """ | |
| Send transcription text to Gemini API and fetch structured recipe information synchronously. | |
| """ | |
| try: | |
| # Define the structured prompt | |
| prompt = ( | |
| "Analyze the provided cooking video transcription and extract the following structured information:\n" | |
| "1. Recipe Name: Identify the name of the dish being prepared.\n" | |
| "2. Ingredients List: Extract a detailed list of ingredients with their respective quantities (if mentioned).\n" | |
| "3. Steps for Preparation: Provide a step-by-step breakdown of the recipe's preparation process, organized and numbered sequentially.\n" | |
| "4. Cooking Techniques Used: Highlight the cooking techniques demonstrated in the video, such as searing, blitzing, wrapping, etc.\n" | |
| "5. Equipment Needed: List all tools, appliances, or utensils mentioned, e.g., blender, hot pan, cling film, etc.\n" | |
| "6. Nutritional Information (if inferred): Provide an approximate calorie count or nutritional breakdown based on the ingredients used.\n" | |
| "7. Serving size: In count of people or portion size.\n" | |
| "8. Special Notes or Variations: Include any specific tips, variations, or alternatives mentioned.\n" | |
| "9. Festive or Thematic Relevance: Note if the recipe has any special relevance to holidays, events, or seasons.\n" | |
| "Also, make sure not to provide anything else or any other information or warning or text apart from the above things mentioned." | |
| f"Text: {transcription}\n" | |
| ) | |
| # Prepare the payload and headers | |
| payload = { | |
| "contents": [ | |
| { | |
| "parts": [ | |
| {"text": prompt} | |
| ] | |
| } | |
| ] | |
| } | |
| headers = {"Content-Type": "application/json"} | |
| # Send request to Gemini API synchronously | |
| response = requests.post( | |
| f"{GEMINI_API_ENDPOINT}?key={GEMINI_API_KEY}", | |
| json=payload, | |
| headers=headers, | |
| ) | |
| # Raise error if response code is not 200 | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "No result found") | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error querying Gemini API: {e}") | |
| return {"error": str(e)} | |
| if __name__ == '__main__': | |
| app.run(debug=True) | |