Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| from fastapi import APIRouter, Depends, HTTPException, status | |
| from libs.convert_to_audio import convert_to_audio | |
| from libs.transformer.get_transcript import get_transcript_gpu | |
| from libs.transformer.get_transcript_2 import get_transcribe_transformers | |
| from langchain_community.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader | |
| from libs.header_api_auth import get_api_key | |
| from libs.transformer.youtube_download import download_youtube | |
| router = APIRouter(prefix="/get-transcript-transformer", tags=["transcript"]) | |
| def get_transcript(audio_path: str, model_size: str = "distil-whisper/distil-small.en", api_key: str = Depends(get_api_key)): | |
| st = time.time() | |
| output_audio_folder = f"./cached/audio" | |
| if not os.path.exists(output_audio_folder): | |
| os.makedirs(output_audio_folder) | |
| if("https://www.youtube.com" in audio_path): | |
| output_file = download_youtube(audio_path) | |
| else: | |
| output_file = f"{output_audio_folder}/{audio_path.split('/')[-1].split(".")[0]}.mp3" | |
| convert_to_audio(audio_path.strip(), output_file) | |
| try: | |
| text, chunks = get_transcribe_transformers(output_file, "Xkev/Llama-3.2V-11B-cot") | |
| except Exception as error: | |
| raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"error>>>: {error}") | |
| finally: | |
| if os.path.exists(output_file): | |
| os.remove(output_file) | |
| listSentences = [] | |
| for chunk in chunks: | |
| listSentences.append({ | |
| "start_time": chunk.get("timestamp")[0], | |
| "end_time": chunk.get("timestamp")[1], | |
| "text": chunk.get("text") | |
| }) | |
| et = time.time() | |
| elapsed_time = et - st | |
| return {"text": text, | |
| 'list_sentence': listSentences, | |
| 'elapsed_time': round(elapsed_time, 2) | |
| } | |