Loren commited on
Commit
ace431a
·
verified ·
1 Parent(s): 8cf4656

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoProcessor, VoxtralForConditionalGeneration
4
  import spaces
5
- from gradio_modal import Modal
6
 
7
  #### Functions
8
 
@@ -119,12 +118,12 @@ with gr.Blocks(title="Voxtral") as voxtral:
119
  gr.Markdown("""## **Key Features:**
120
 
121
  #### Voxtral builds upon Ministral-3B with powerful audio understanding capabilities.
122
- #### - **Dedicated transcription mode**: Voxtral can operate in a pure speech transcription mode to maximize performance. By default, Voxtral automatically predicts the source audio language and transcribes the text accordingly
123
- #### - **Long-form context**: With a 32k token context length, Voxtral handles audios up to 30 minutes for transcription, or 40 minutes for understanding
124
- #### - **Built-in Q&A and summarization**: Supports asking questions directly through audio. Analyze audio and generate structured summaries without the need for separate ASR and language models
125
- #### - **Natively multilingual**: Automatic language detection and state-of-the-art performance in the world’s most widely used languages (English, Spanish, French, Portuguese, Hindi, German, Dutch, Italian)
126
- #### - **Function-calling straight from voice**: Enables direct triggering of backend functions, workflows, or API calls based on spoken user intents
127
- #### - **Highly capable at text**: Retains the text understanding capabilities of its language model backbone, Ministral-3B""")
128
 
129
 
130
  gr.Markdown("### **1. Upload an audio file, record via microphone, or select a demo file:**")
@@ -170,7 +169,7 @@ with gr.Blocks(title="Voxtral") as voxtral:
170
 
171
  with gr.Column():
172
  with gr.Accordion("🤖 Ask audio file", open=True):
173
- question_chat = gr.Textbox(label="Ask audio file", placeholder="Enter your question about audio file")
174
  submit_chat = gr.Button("Ask audio file:", variant="primary")
175
  text_chat = gr.Textbox(label="💬 Model answer", lines=10)
176
 
 
2
  import torch
3
  from transformers import AutoProcessor, VoxtralForConditionalGeneration
4
  import spaces
 
5
 
6
  #### Functions
7
 
 
118
  gr.Markdown("""## **Key Features:**
119
 
120
  #### Voxtral builds upon Ministral-3B with powerful audio understanding capabilities.
121
+ ##### - **Dedicated transcription mode**: Voxtral can operate in a pure speech transcription mode to maximize performance. By default, Voxtral automatically predicts the source audio language and transcribes the text accordingly
122
+ ##### - **Long-form context**: With a 32k token context length, Voxtral handles audios up to 30 minutes for transcription, or 40 minutes for understanding
123
+ ##### - **Built-in Q&A and summarization**: Supports asking questions directly through audio. Analyze audio and generate structured summaries without the need for separate ASR and language models
124
+ ##### - **Natively multilingual**: Automatic language detection and state-of-the-art performance in the world’s most widely used languages (English, Spanish, French, Portuguese, Hindi, German, Dutch, Italian)
125
+ ##### - **Function-calling straight from voice**: Enables direct triggering of backend functions, workflows, or API calls based on spoken user intents
126
+ ##### - **Highly capable at text**: Retains the text understanding capabilities of its language model backbone, Ministral-3B""")
127
 
128
 
129
  gr.Markdown("### **1. Upload an audio file, record via microphone, or select a demo file:**")
 
169
 
170
  with gr.Column():
171
  with gr.Accordion("🤖 Ask audio file", open=True):
172
+ question_chat = gr.Textbox(label="Enter your question about audio file:", placeholder="Enter your question about audio file")
173
  submit_chat = gr.Button("Ask audio file:", variant="primary")
174
  text_chat = gr.Textbox(label="💬 Model answer", lines=10)
175