Spaces:
Running
on
Zero
Running
on
Zero
added basic UI for testing
Browse files- src/app.py +30 -3
src/app.py
CHANGED
|
@@ -62,7 +62,6 @@ def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, floa
|
|
| 62 |
|
| 63 |
def process_video(video_path: str, max_images: int) -> list[dict]:
|
| 64 |
result_content = []
|
| 65 |
-
# TODO: Change max_image to slider
|
| 66 |
frames = get_frames(video_path, max_images)
|
| 67 |
for frame in frames:
|
| 68 |
image, timestamp = frame
|
|
@@ -124,7 +123,11 @@ def process_history(history: list[dict]) -> list[dict]:
|
|
| 124 |
|
| 125 |
@spaces.GPU(duration=120)
|
| 126 |
def run(
|
| 127 |
-
message: dict,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
) -> Iterator[str]:
|
| 129 |
|
| 130 |
messages = []
|
|
@@ -133,7 +136,9 @@ def run(
|
|
| 133 |
{"role": "system", "content": [{"type": "text", "text": system_prompt}]}
|
| 134 |
)
|
| 135 |
messages.extend(process_history(history))
|
| 136 |
-
messages.append(
|
|
|
|
|
|
|
| 137 |
|
| 138 |
inputs = input_processor.apply_chat_template(
|
| 139 |
messages,
|
|
@@ -158,3 +163,25 @@ def run(
|
|
| 158 |
for delta in streamer:
|
| 159 |
output += delta
|
| 160 |
yield output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def process_video(video_path: str, max_images: int) -> list[dict]:
|
| 64 |
result_content = []
|
|
|
|
| 65 |
frames = get_frames(video_path, max_images)
|
| 66 |
for frame in frames:
|
| 67 |
image, timestamp = frame
|
|
|
|
| 123 |
|
| 124 |
@spaces.GPU(duration=120)
|
| 125 |
def run(
|
| 126 |
+
message: dict,
|
| 127 |
+
history: list[dict],
|
| 128 |
+
system_prompt: str,
|
| 129 |
+
max_new_tokens: int,
|
| 130 |
+
max_images: int,
|
| 131 |
) -> Iterator[str]:
|
| 132 |
|
| 133 |
messages = []
|
|
|
|
| 136 |
{"role": "system", "content": [{"type": "text", "text": system_prompt}]}
|
| 137 |
)
|
| 138 |
messages.extend(process_history(history))
|
| 139 |
+
messages.append(
|
| 140 |
+
{"role": "user", "content": process_user_input(message, max_images)}
|
| 141 |
+
)
|
| 142 |
|
| 143 |
inputs = input_processor.apply_chat_template(
|
| 144 |
messages,
|
|
|
|
| 163 |
for delta in streamer:
|
| 164 |
output += delta
|
| 165 |
yield output
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
demo = gr.ChatInterface(
|
| 169 |
+
fn=run,
|
| 170 |
+
type="messages",
|
| 171 |
+
chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
|
| 172 |
+
textbox=gr.MultimodalTextbox(
|
| 173 |
+
file_types=["image", ".mp4"], file_count="multiple", autofocus=True
|
| 174 |
+
),
|
| 175 |
+
multimodal=True,
|
| 176 |
+
additional_inputs=[
|
| 177 |
+
gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
|
| 178 |
+
gr.Slider(
|
| 179 |
+
label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700
|
| 180 |
+
),
|
| 181 |
+
gr.Slider(label="Max Images", minimum=1, maximum=4, step=1, value=2),
|
| 182 |
+
],
|
| 183 |
+
stop_btn=False,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
if __name__ == "__main__":
|
| 187 |
+
demo.launch()
|