Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,16 +1,13 @@
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile
|
| 2 |
-
from fastapi.responses import JSONResponse
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
from models.model_wav2vec import Wav2VecIntent
|
| 5 |
from huggingface_hub import hf_hub_download
|
| 6 |
import torch
|
| 7 |
import soundfile as sf
|
| 8 |
-
import numpy as np
|
| 9 |
import librosa
|
| 10 |
|
| 11 |
app = FastAPI()
|
| 12 |
|
| 13 |
-
# Enable CORS for all origins (so your frontend can call the API)
|
| 14 |
app.add_middleware(
|
| 15 |
CORSMiddleware,
|
| 16 |
allow_origins=["*"],
|
|
@@ -19,7 +16,6 @@ app.add_middleware(
|
|
| 19 |
allow_headers=["*"],
|
| 20 |
)
|
| 21 |
|
| 22 |
-
# Download model from Hugging Face
|
| 23 |
MODEL_PATH = hf_hub_download(repo_id="avi292423/speech-intent-recognition-project", filename="wav2vec_best_model.pt")
|
| 24 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 25 |
|
|
@@ -36,7 +32,7 @@ label_map = {
|
|
| 36 |
index_to_label = {v: k for k, v in label_map.items()}
|
| 37 |
|
| 38 |
num_classes = 31
|
| 39 |
-
pretrained_model = "facebook/wav2vec2-large"
|
| 40 |
model = Wav2VecIntent(num_classes=num_classes, pretrained_model=pretrained_model).to(device)
|
| 41 |
state_dict = torch.load(MODEL_PATH, map_location=device)
|
| 42 |
model.load_state_dict(state_dict)
|
|
@@ -49,7 +45,6 @@ async def predict(file: UploadFile = File(...)):
|
|
| 49 |
f.write(audio_bytes)
|
| 50 |
audio, sample_rate = sf.read("temp.wav")
|
| 51 |
if sample_rate != 16000:
|
| 52 |
-
# Resample to 16kHz
|
| 53 |
audio = librosa.resample(audio.astype(float), orig_sr=sample_rate, target_sr=16000)
|
| 54 |
waveform = torch.tensor(audio, dtype=torch.float32).unsqueeze(0).to(device)
|
| 55 |
with torch.no_grad():
|
|
|
|
| 1 |
from fastapi import FastAPI, File, UploadFile
|
|
|
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
from models.model_wav2vec import Wav2VecIntent
|
| 4 |
from huggingface_hub import hf_hub_download
|
| 5 |
import torch
|
| 6 |
import soundfile as sf
|
|
|
|
| 7 |
import librosa
|
| 8 |
|
| 9 |
app = FastAPI()
|
| 10 |
|
|
|
|
| 11 |
app.add_middleware(
|
| 12 |
CORSMiddleware,
|
| 13 |
allow_origins=["*"],
|
|
|
|
| 16 |
allow_headers=["*"],
|
| 17 |
)
|
| 18 |
|
|
|
|
| 19 |
MODEL_PATH = hf_hub_download(repo_id="avi292423/speech-intent-recognition-project", filename="wav2vec_best_model.pt")
|
| 20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 21 |
|
|
|
|
| 32 |
index_to_label = {v: k for k, v in label_map.items()}
|
| 33 |
|
| 34 |
num_classes = 31
|
| 35 |
+
pretrained_model = "facebook/wav2vec2-large"
|
| 36 |
model = Wav2VecIntent(num_classes=num_classes, pretrained_model=pretrained_model).to(device)
|
| 37 |
state_dict = torch.load(MODEL_PATH, map_location=device)
|
| 38 |
model.load_state_dict(state_dict)
|
|
|
|
| 45 |
f.write(audio_bytes)
|
| 46 |
audio, sample_rate = sf.read("temp.wav")
|
| 47 |
if sample_rate != 16000:
|
|
|
|
| 48 |
audio = librosa.resample(audio.astype(float), orig_sr=sample_rate, target_sr=16000)
|
| 49 |
waveform = torch.tensor(audio, dtype=torch.float32).unsqueeze(0).to(device)
|
| 50 |
with torch.no_grad():
|