Spaces:
Sleeping
Sleeping
File size: 1,520 Bytes
e18a571 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import requests
import base64
import os
from dotenv import load_dotenv
load_dotenv()
HF_API_KEY = os.getenv("HF_API_KEY")
# -----------------------------
# Speech-to-Text (STT) using HuggingFace Whisper
# -----------------------------
def speech_to_text(audio_file):
with open(audio_file, "rb") as f:
audio_bytes = f.read()
response = requests.post(
"https://api-inference.huggingface.co/models/openai/whisper-small",
headers={"Authorization": f"Bearer {HF_API_KEY}"},
data=audio_bytes
)
if response.status_code == 200:
result = response.json()
return result.get("text", "Sorry, I couldn’t transcribe that.")
else:
print(f"STT request failed: {response.status_code} {response.text}")
return "Speech recognition failed."
# -----------------------------
# Text-to-Speech (TTS) using Kitten TTS
# -----------------------------
def text_to_speech(text):
url = "https://huggingface.co/KittenML/kitten-tts-nano-0.1/resolve/main/tts"
payload = {"text": text}
response = requests.post(url, json=payload)
if response.status_code == 200:
audio_base64 = response.json()["audio"]
audio_bytes = base64.b64decode(audio_base64)
output_path = "output_audio.wav"
with open(output_path, "wb") as f:
f.write(audio_bytes)
return output_path
else:
print(f"TTS request failed: {response.status_code}")
return None
|