Spaces:
Running
Running
| import requests | |
| from deep_translator import GoogleTranslator | |
| import gradio as gr | |
| import soundfile as sf | |
| def speech_translation(audio, language): | |
| if audio is None: | |
| return "No audio input provided!", "No audio input provided!" | |
| # Convert audio to .wav format if not already | |
| if not audio.endswith(".wav"): | |
| wav_data, samplerate = sf.read(audio) | |
| sf.write("temp_audio.wav", wav_data, samplerate) | |
| audio_file = "temp_audio.wav" | |
| else: | |
| audio_file = audio | |
| # ASR processing | |
| files = { | |
| 'file': open(audio_file, "rb"), | |
| 'language': (None, language), | |
| 'vtt': (None, 'true'), | |
| } | |
| response = requests.post('https://asr.iitm.ac.in/internal/asr/decode', files=files) | |
| print(response.json()) | |
| try: | |
| asr_output = response.json()['transcript'] | |
| except: | |
| asr_output = "Error in ASR processing" | |
| asr_output = asr_output.replace("।", "") | |
| asr_output = asr_output.replace(".", "") | |
| translator = GoogleTranslator(source=language, target='en') | |
| translation = translator.translate(asr_output) | |
| return translation | |
| iface = gr.Interface( | |
| fn=speech_translation, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Record your speech"), | |
| gr.Dropdown(["telugu", "hindi", "marathi", "bengali"], label="Select Language") | |
| ], | |
| outputs=["text"], | |
| title="Speech Translation", | |
| description="Record your speech and get the English translation.", | |
| ) | |
| iface.launch(share=True) |