Spaces:

helvekami
/

ShukaNote

Running

App Files Files Community

helvekami commited on Mar 6

Commit

86fab4a

1 Parent(s): fbc6758

Updated Gradio App

Browse files

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 import transformers
 import librosa
 import torch
 # Load the Shuka model pipeline.
 pipe = transformers.pipeline(
@@ -17,7 +18,7 @@ def process_audio(audio):
     """
     if audio is None:
         return "No audio provided. Please upload or record an audio file."
     try:
         # Gradio returns a tuple: (sample_rate, numpy_array)
         sample_rate, audio_data = audio
@@ -27,7 +28,11 @@ def process_audio(audio):
     if audio_data is None or len(audio_data) == 0:
         return "Audio data is empty. Please try again with a valid audio file."
-    # Resample to 16000 Hz if necessary
     if sample_rate != 16000:
         try:
             audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
@@ -35,7 +40,7 @@ def process_audio(audio):
         except Exception as e:
             return f"Error during resampling: {e}"
-    # Define conversation turns for the model
     turns = [
         {'role': 'system', 'content': 'Respond naturally and informatively.'},
         {'role': 'user', 'content': '<|audio|>'}
@@ -46,7 +51,7 @@ def process_audio(audio):
     except Exception as e:
         return f"Error during model processing: {e}"
-    # Extract generated text
     if isinstance(result, list) and len(result) > 0:
         response = result[0].get('generated_text', '')
     else:
@@ -55,15 +60,14 @@ def process_audio(audio):
     return response
 # Create the Gradio interface.
-# If you wish to record audio directly, you may need to upgrade Gradio to a version that supports "source" for the Audio component.
 iface = gr.Interface(
     fn=process_audio,
-    inputs=gr.Audio(type="numpy"),  # using file upload input for audio
     outputs="text",
     title="Sarvam AI Shuka Voice Demo",
     description="Upload an audio file and get a response using Sarvam AI's Shuka model."
 )
 if __name__ == "__main__":
-    # If port 7860 is in use, you can specify another port (here we use 7861)
-    iface.launch(server_port=7861)

 import transformers
 import librosa
 import torch
+import numpy as np
 # Load the Shuka model pipeline.
 pipe = transformers.pipeline(
     """
     if audio is None:
         return "No audio provided. Please upload or record an audio file."
     try:
         # Gradio returns a tuple: (sample_rate, numpy_array)
         sample_rate, audio_data = audio
     if audio_data is None or len(audio_data) == 0:
         return "Audio data is empty. Please try again with a valid audio file."
+    # Convert audio data to float if not already floating-point.
+    if not np.issubdtype(audio_data.dtype, np.floating):
+        audio_data = audio_data.astype(np.float32)
+    # Resample to 16000 Hz if necessary.
     if sample_rate != 16000:
         try:
             audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
         except Exception as e:
             return f"Error during resampling: {e}"
+    # Define conversation turns for the model.
     turns = [
         {'role': 'system', 'content': 'Respond naturally and informatively.'},
         {'role': 'user', 'content': '<|audio|>'}
     except Exception as e:
         return f"Error during model processing: {e}"
+    # Extract the generated text response.
     if isinstance(result, list) and len(result) > 0:
         response = result[0].get('generated_text', '')
     else:
     return response
 # Create the Gradio interface.
 iface = gr.Interface(
     fn=process_audio,
+    inputs=gr.Audio(type="numpy"),  # File upload for audio.
     outputs="text",
     title="Sarvam AI Shuka Voice Demo",
     description="Upload an audio file and get a response using Sarvam AI's Shuka model."
 )
 if __name__ == "__main__":
+    # Set share=True to create a public link, and specify a server port.
+    iface.launch(share=True, server_port=7861)