Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import torch
|
|
| 6 |
import gradio as gr
|
| 7 |
import pytube as pt
|
| 8 |
import spaces
|
| 9 |
-
from transformers import pipeline
|
| 10 |
from huggingface_hub import model_info
|
| 11 |
try:
|
| 12 |
import flash_attn
|
|
@@ -25,9 +25,14 @@ print(f"Using device: {device}")
|
|
| 25 |
|
| 26 |
@spaces.GPU(duration=60)
|
| 27 |
def pipe(file, return_timestamps=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
asr = pipeline(
|
| 29 |
task="automatic-speech-recognition",
|
| 30 |
-
model=
|
| 31 |
chunk_length_s=30,
|
| 32 |
device=device,
|
| 33 |
token=auth_token,
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
import pytube as pt
|
| 8 |
import spaces
|
| 9 |
+
from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
|
| 10 |
from huggingface_hub import model_info
|
| 11 |
try:
|
| 12 |
import flash_attn
|
|
|
|
| 25 |
|
| 26 |
@spaces.GPU(duration=60)
|
| 27 |
def pipe(file, return_timestamps=False):
|
| 28 |
+
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, low_cpu_mem_usage=True)
|
| 29 |
+
model.to(device)
|
| 30 |
+
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
| 31 |
+
model.forward = torch.compile(model.forward, mode="reduce-overhead", fullgraph=True)
|
| 32 |
+
model.generation_config.cache_implementation = "static"
|
| 33 |
asr = pipeline(
|
| 34 |
task="automatic-speech-recognition",
|
| 35 |
+
model=model,
|
| 36 |
chunk_length_s=30,
|
| 37 |
device=device,
|
| 38 |
token=auth_token,
|