Voxtral_Mini_Evaluation

Running

App Files Files Community

Loren commited on Jul 25

Commit

ace431a

verified ·

1 Parent(s): 8cf4656

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -8

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import gradio as gr
 import torch
 from transformers import AutoProcessor, VoxtralForConditionalGeneration
 import spaces
-from gradio_modal import Modal
 #### Functions
@@ -119,12 +118,12 @@ with gr.Blocks(title="Voxtral") as voxtral:
         gr.Markdown("""## **Key Features:**
 #### Voxtral builds upon Ministral-3B with powerful audio understanding capabilities.
-#### - **Dedicated transcription mode**: Voxtral can operate in a pure speech transcription mode to maximize performance. By default, Voxtral automatically predicts the source audio language and transcribes the text accordingly
-#### - **Long-form context**: With a 32k token context length, Voxtral handles audios up to 30 minutes for transcription, or 40 minutes for understanding
-#### - **Built-in Q&A and summarization**: Supports asking questions directly through audio. Analyze audio and generate structured summaries without the need for separate ASR and language models
-#### - **Natively multilingual**: Automatic language detection and state-of-the-art performance in the world’s most widely used languages (English, Spanish, French, Portuguese, Hindi, German, Dutch, Italian)
-#### - **Function-calling straight from voice**: Enables direct triggering of backend functions, workflows, or API calls based on spoken user intents
-#### - **Highly capable at text**: Retains the text understanding capabilities of its language model backbone, Ministral-3B""")
     gr.Markdown("### **1. Upload an audio file, record via microphone, or select a demo file:**")
@@ -170,7 +169,7 @@ with gr.Blocks(title="Voxtral") as voxtral:
         with gr.Column():
             with gr.Accordion("🤖 Ask audio file", open=True):
-                question_chat = gr.Textbox(label="Ask audio file", placeholder="Enter your question about audio file")
                 submit_chat = gr.Button("Ask audio file:", variant="primary")
                 text_chat = gr.Textbox(label="💬 Model answer", lines=10)

 import torch
 from transformers import AutoProcessor, VoxtralForConditionalGeneration
 import spaces
 #### Functions
         gr.Markdown("""## **Key Features:**
 #### Voxtral builds upon Ministral-3B with powerful audio understanding capabilities.
+##### - **Dedicated transcription mode**: Voxtral can operate in a pure speech transcription mode to maximize performance. By default, Voxtral automatically predicts the source audio language and transcribes the text accordingly
+##### - **Long-form context**: With a 32k token context length, Voxtral handles audios up to 30 minutes for transcription, or 40 minutes for understanding
+##### - **Built-in Q&A and summarization**: Supports asking questions directly through audio. Analyze audio and generate structured summaries without the need for separate ASR and language models
+##### - **Natively multilingual**: Automatic language detection and state-of-the-art performance in the world’s most widely used languages (English, Spanish, French, Portuguese, Hindi, German, Dutch, Italian)
+##### - **Function-calling straight from voice**: Enables direct triggering of backend functions, workflows, or API calls based on spoken user intents
+##### - **Highly capable at text**: Retains the text understanding capabilities of its language model backbone, Ministral-3B""")
     gr.Markdown("### **1. Upload an audio file, record via microphone, or select a demo file:**")
         with gr.Column():
             with gr.Accordion("🤖 Ask audio file", open=True):
+                question_chat = gr.Textbox(label="Enter your question about audio file:", placeholder="Enter your question about audio file")
                 submit_chat = gr.Button("Ask audio file:", variant="primary")
                 text_chat = gr.Textbox(label="💬 Model answer", lines=10)