Voxtral_Mini_Evaluation

Running

App Files Files Community

Loren commited on Jul 28

Commit

cc8013b

verified ·

1 Parent(s): 699000d

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -39

app.py CHANGED Viewed

@@ -31,50 +31,56 @@ def process_transcript(language: str, audio_path: str) -> str:
 @spaces.GPU
 def process_translate(language: str, audio_path: str) -> str:
-    conversation = [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "audio",
-                    "path": audio_path,
-                },
-                {"type": "text", "text": "Translate this in "+language},
-            ],
-        }
-    ]
-    inputs = processor.apply_chat_template(conversation)
-    inputs = inputs.to(device, dtype=torch.bfloat16)
-    outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
-    decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
-    return decoded_outputs[0]
 ###
 @spaces.GPU
 def process_chat(question: str, audio_path: str) -> str:
-    conversation = [
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "audio",
-                    "path": audio_path,
-                },
-                {"type": "text", "text": question},
-            ],
-        }
-    ]
-    inputs = processor.apply_chat_template(conversation)
-    inputs = inputs.to(device, dtype=torch.bfloat16)
-    outputs = model.generate(**inputs, max_new_tokens=500)
-    decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
-    return decoded_outputs[0]
 ###
 def disable_buttons():

 @spaces.GPU
 def process_translate(language: str, audio_path: str) -> str:
+    if audio_path is None:
+        return "Please provide some input audio: either upload an audio file or use the microphone."
+    else:
+        conversation = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "audio",
+                        "path": audio_path,
+                    },
+                    {"type": "text", "text": "Translate this in "+language},
+                ],
+            }
+        ]
+        inputs = processor.apply_chat_template(conversation)
+        inputs = inputs.to(device, dtype=torch.bfloat16)
+        outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
+        decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
+        return decoded_outputs[0]
 ###
 @spaces.GPU
 def process_chat(question: str, audio_path: str) -> str:
+    if audio_path is None:
+        return "Please provide some input audio: either upload an audio file or use the microphone."
+    else:
+        conversation = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "audio",
+                        "path": audio_path,
+                    },
+                    {"type": "text", "text": question},
+                ],
+            }
+        ]
+        inputs = processor.apply_chat_template(conversation)
+        inputs = inputs.to(device, dtype=torch.bfloat16)
+        outputs = model.generate(**inputs, max_new_tokens=500)
+        decoded_outputs = processor.batch_decode(outputs[:, inputs.input_ids.shape[1]:], skip_special_tokens=True)
+        return decoded_outputs[0]
 ###
 def disable_buttons():