Spaces:
Runtime error
Runtime error
add ui component to allow user enabl or disable reset_clip per frame
Browse files
app.py
CHANGED
|
@@ -107,7 +107,8 @@ def get_weight_files(size):
|
|
| 107 |
|
| 108 |
# Caption using cached llm with real-time debug logs
|
| 109 |
|
| 110 |
-
def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, usr_prompt
|
|
|
|
| 111 |
debug_msgs = []
|
| 112 |
timestamp = time.strftime('%H:%M:%S')
|
| 113 |
debug_msgs.append(f"[{timestamp}] Received frame shape: {frame.shape}")
|
|
@@ -145,10 +146,12 @@ def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, u
|
|
| 145 |
timestamp = time.strftime('%H:%M:%S')
|
| 146 |
debug_msgs.append(f"[{timestamp}] Sending prompt of length {len(usr_prompt)} to LLM")
|
| 147 |
# re-init handler for image
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
| 151 |
|
|
|
|
| 152 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
| 153 |
|
| 154 |
t_start = time.time()
|
|
@@ -209,15 +212,16 @@ def main():
|
|
| 209 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
| 210 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|
| 211 |
usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
|
|
|
|
| 212 |
cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
|
| 213 |
cap = gr.Textbox(interactive=False, label='Caption')
|
| 214 |
log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
|
| 215 |
|
| 216 |
cam.stream(
|
| 217 |
fn=caption_frame,
|
| 218 |
-
inputs=[cam, size_dd, model_dd, clip_dd, interval, sys_p, usr_p],
|
| 219 |
outputs=[cap, log_box],
|
| 220 |
-
time_limit=600
|
| 221 |
)
|
| 222 |
|
| 223 |
demo.launch()
|
|
|
|
| 107 |
|
| 108 |
# Caption using cached llm with real-time debug logs
|
| 109 |
|
| 110 |
+
def caption_frame(frame, size, model_file, clip_file, interval_ms, sys_prompt, usr_prompt,
|
| 111 |
+
reset_clip: bool):
|
| 112 |
debug_msgs = []
|
| 113 |
timestamp = time.strftime('%H:%M:%S')
|
| 114 |
debug_msgs.append(f"[{timestamp}] Received frame shape: {frame.shape}")
|
|
|
|
| 146 |
timestamp = time.strftime('%H:%M:%S')
|
| 147 |
debug_msgs.append(f"[{timestamp}] Sending prompt of length {len(usr_prompt)} to LLM")
|
| 148 |
# re-init handler for image
|
| 149 |
+
if reset_clip:
|
| 150 |
+
model_cache['llm'].chat_handler = SmolVLM2ChatHandler(clip_model_path=clip_file, verbose=False)
|
| 151 |
+
timestamp = time.strftime('%H:%M:%S')
|
| 152 |
+
debug_msgs.append(f"[{timestamp}] Reinitialized chat handler")
|
| 153 |
|
| 154 |
+
timestamp = time.strftime('%H:%M:%S')
|
| 155 |
debug_msgs.append(f"[{timestamp}] CPU count = {os.cpu_count()}")
|
| 156 |
|
| 157 |
t_start = time.time()
|
|
|
|
| 212 |
interval = gr.Slider(100, 20000, step=100, value=3000, label='Interval (ms)')
|
| 213 |
sys_p = gr.Textbox(lines=2, value="Focus on key dramatic action…", label='System Prompt')
|
| 214 |
usr_p = gr.Textbox(lines=1, value="What is happening in this image?", label='User Prompt')
|
| 215 |
+
reset_clip = gr.Checkbox(value=True, label="Reset CLIP handler each frame")
|
| 216 |
cam = gr.Image(sources=['webcam'], streaming=True, label='Webcam Feed')
|
| 217 |
cap = gr.Textbox(interactive=False, label='Caption')
|
| 218 |
log_box = gr.Textbox(lines=8, interactive=False, label='Debug Log')
|
| 219 |
|
| 220 |
cam.stream(
|
| 221 |
fn=caption_frame,
|
| 222 |
+
inputs=[cam, size_dd, model_dd, clip_dd, interval, sys_p, usr_p, reset_clip],
|
| 223 |
outputs=[cap, log_box],
|
| 224 |
+
time_limit=600,
|
| 225 |
)
|
| 226 |
|
| 227 |
demo.launch()
|