Spaces:
Sleeping
Sleeping
| import requests | |
| import gradio as gr | |
| import soundfile as sf | |
| import time | |
| def speech_translation(audio, language): | |
| if audio is None: | |
| return "No audio input provided!", "No audio input provided!" | |
| # Convert audio to .wav format if not already | |
| if not audio.endswith(".wav"): | |
| wav_data, samplerate = sf.read(audio) | |
| sf.write("temp_audio.wav", wav_data, samplerate) | |
| audio_file = "temp_audio.wav" | |
| else: | |
| audio_file = audio | |
| # ASR processing | |
| files = { | |
| 'file': open(audio_file, "rb"), | |
| 'language': (None, language), | |
| 'vtt': (None, 'true'), | |
| } | |
| response = requests.post('https://asr.iitm.ac.in/ssl_asr/decode', files=files) | |
| print(response.json()) | |
| try: | |
| asr_output = response.json()['transcript'] | |
| except: | |
| asr_output = "Error in ASR processing" | |
| asr_output = asr_output.replace("।", "") | |
| asr_output = asr_output.replace(".", "") | |
| time.sleep(1) | |
| if language == "telugu": | |
| lang = "te" | |
| elif language == "hindi": | |
| lang = "hi" | |
| elif language == "marathi": | |
| lang = "mr" | |
| elif language == "bengali": | |
| lang = "bn" | |
| payload = { | |
| "pipelineTasks": [ | |
| { | |
| "taskType": "translation", | |
| "config": { | |
| "language": { | |
| "sourceLanguage": lang, | |
| "targetLanguage": "en", | |
| }, | |
| }, | |
| } | |
| ], | |
| "pipelineRequestConfig": { | |
| "pipelineId" : "64392f96daac500b55c543cd" | |
| } | |
| } | |
| headers = { | |
| "Content-Type": "application/json", | |
| "userID": "2aeef589f4584eb08aa0b9c49761aeb8", | |
| "ulcaApiKey": "02ed10445a-66b0-4061-9030-9b0b8b37a4f1" | |
| } | |
| response = requests.post('https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline', json=payload, headers=headers) | |
| if response.status_code == 200: | |
| response_data = response.json() | |
| print(response_data) | |
| service_id = response_data["pipelineResponseConfig"][0]["config"][0]["serviceId"] | |
| # if lang=="te": | |
| # service_id = "bhashini/iitm/asr-dravidian--gpu--t4" | |
| # else: | |
| # service_id = "bhashini/iitm/asr-indoaryan--gpu--t4" | |
| # print("halfway") | |
| compute_payload = { | |
| "pipelineTasks": [ | |
| { | |
| "taskType": "translation", | |
| "config": { | |
| "language": { | |
| "sourceLanguage": lang, | |
| "targetLanguage": "en", | |
| }, | |
| }, | |
| } | |
| ], | |
| "inputData": {"input": [{"source": asr_output}]}, | |
| } | |
| callback_url = response_data["pipelineInferenceAPIEndPoint"]["callbackUrl"] | |
| headers2 = { | |
| "Content-Type": "application/json", | |
| response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["name"]: | |
| response_data["pipelineInferenceAPIEndPoint"]["inferenceApiKey"]["value"] | |
| } | |
| compute_response = requests.post(callback_url, json=compute_payload, headers=headers2) | |
| # print(compute_response.json()) | |
| if compute_response.status_code == 200: | |
| compute_response_data = compute_response.json() | |
| print(compute_response_data) | |
| translated_content = compute_response_data["pipelineResponse"][0]["output"][0]["target"] | |
| print( | |
| "Translation successful", | |
| translated_content | |
| ) | |
| else: | |
| print ( | |
| "status_code", compute_response.status_code) | |
| return translated_content | |
| iface = gr.Interface( | |
| fn=speech_translation, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Record your speech"), | |
| gr.Dropdown(["telugu", "hindi", "marathi", "bengali"], label="Select Language") | |
| ], | |
| outputs=["text"], | |
| title="Speech Translation", | |
| description="Record your speech and get the English translation.", | |
| ) | |
| iface.launch() |