Spaces:
Sleeping
Sleeping
| import requests | |
| import base64 | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| HF_API_KEY = os.getenv("HF_API_KEY") | |
| # ----------------------------- | |
| # Speech-to-Text (STT) using HuggingFace Whisper | |
| # ----------------------------- | |
| def speech_to_text(audio_file): | |
| with open(audio_file, "rb") as f: | |
| audio_bytes = f.read() | |
| response = requests.post( | |
| "https://api-inference.huggingface.co/models/openai/whisper-small", | |
| headers={"Authorization": f"Bearer {HF_API_KEY}"}, | |
| data=audio_bytes | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| return result.get("text", "Sorry, I couldn’t transcribe that.") | |
| else: | |
| print(f"STT request failed: {response.status_code} {response.text}") | |
| return "Speech recognition failed." | |
| # ----------------------------- | |
| # Text-to-Speech (TTS) using Kitten TTS | |
| # ----------------------------- | |
| def text_to_speech(text): | |
| url = "https://huggingface.co/KittenML/kitten-tts-nano-0.1/resolve/main/tts" | |
| payload = {"text": text} | |
| response = requests.post(url, json=payload) | |
| if response.status_code == 200: | |
| audio_base64 = response.json()["audio"] | |
| audio_bytes = base64.b64decode(audio_base64) | |
| output_path = "output_audio.wav" | |
| with open(output_path, "wb") as f: | |
| f.write(audio_bytes) | |
| return output_path | |
| else: | |
| print(f"TTS request failed: {response.status_code}") | |
| return None | |