Update app.py
Browse files
app.py
CHANGED
|
@@ -223,14 +223,15 @@ with gr.Blocks(css=css) as demo :
|
|
| 223 |
<h2 style="text-align: center;">Soft Video Understanding</h2>
|
| 224 |
<p style="text-align: center;">
|
| 225 |
An experiment to try to achieve what i call "soft video understanding" with open-source available models. <br />
|
| 226 |
-
We use moondream1 to caption extracted frames, salmonn to analyze extracted audio, then
|
| 227 |
-
Instructions prompt is available for further discussion with the Community.
|
|
|
|
| 228 |
</p>
|
| 229 |
""")
|
| 230 |
with gr.Row():
|
| 231 |
with gr.Column():
|
| 232 |
video_in = gr.Video(label="Video input")
|
| 233 |
-
with gr.Accordion("System Instructions", open=False):
|
| 234 |
system_instruction = gr.Markdown(
|
| 235 |
value = standard_sys
|
| 236 |
)
|
|
|
|
| 223 |
<h2 style="text-align: center;">Soft Video Understanding</h2>
|
| 224 |
<p style="text-align: center;">
|
| 225 |
An experiment to try to achieve what i call "soft video understanding" with open-source available models. <br />
|
| 226 |
+
We use moondream1 to caption extracted frames, salmonn to analyze extracted audio, then give visual and audio details to Zephyr which is instructed to resume what it understood.<br />
|
| 227 |
+
Instructions prompt is available for further discussion with the Community. <br />
|
| 228 |
+
Note that audio is crucial for better overall vision. Video longer than 10 seconds will be cut.
|
| 229 |
</p>
|
| 230 |
""")
|
| 231 |
with gr.Row():
|
| 232 |
with gr.Column():
|
| 233 |
video_in = gr.Video(label="Video input")
|
| 234 |
+
with gr.Accordion("System Instructions (for your curiosity)", open=False):
|
| 235 |
system_instruction = gr.Markdown(
|
| 236 |
value = standard_sys
|
| 237 |
)
|