AC2513 commited on
Commit
bc0a046
·
1 Parent(s): 0c4170f

fixed support for multiple medias

Browse files
Files changed (1) hide show
  1. src/app.py +12 -13
src/app.py CHANGED
@@ -79,16 +79,15 @@ def process_user_input(message: dict, max_images: int) -> list[dict]:
79
  if not message["files"]:
80
  return [{"type": "text", "text": message["text"]}]
81
 
82
- if message["files"][0].endswith(".mp4") or message["files"][0].endswith(".mov"):
83
- return [
84
- {"type": "text", "text": message["text"]},
85
- *process_video(message["files"][0], max_images),
86
- ]
87
 
88
- return [
89
- {"type": "text", "text": message["text"]},
90
- *[{"type": "image", "url": path} for path in message["files"]],
91
- ]
 
 
 
92
 
93
 
94
  def process_history(history: list[dict]) -> list[dict]:
@@ -129,10 +128,10 @@ def run(
129
  max_new_tokens: int,
130
  max_images: int,
131
  ) -> Iterator[str]:
132
-
133
  logger.debug(
134
- f"Received message: {message}, history: {history}, system_prompt: {system_prompt}, "
135
- f"max_new_tokens: {max_new_tokens}, max_images: {max_images}"
136
  )
137
 
138
  messages = []
@@ -175,7 +174,7 @@ demo = gr.ChatInterface(
175
  type="messages",
176
  chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
177
  textbox=gr.MultimodalTextbox(
178
- file_types=["image", ".mp4"], file_count="multiple", autofocus=True
179
  ),
180
  multimodal=True,
181
  additional_inputs=[
 
79
  if not message["files"]:
80
  return [{"type": "text", "text": message["text"]}]
81
 
82
+ result_content = [{"type": "text", "text": message["text"]}]
 
 
 
 
83
 
84
+ for file_path in message["files"]:
85
+ if file_path.endswith((".mp4", ".mov")):
86
+ result_content = [*result_content, *process_video(file_path, max_images)]
87
+ else:
88
+ result_content = [*result_content, {"type": "image", "url": file_path}]
89
+
90
+ return result_content
91
 
92
 
93
  def process_history(history: list[dict]) -> list[dict]:
 
128
  max_new_tokens: int,
129
  max_images: int,
130
  ) -> Iterator[str]:
131
+
132
  logger.debug(
133
+ f"\n message: {message} \n history: {history} \n system_prompt: {system_prompt} \n "
134
+ f"max_new_tokens: {max_new_tokens} \n max_images: {max_images}"
135
  )
136
 
137
  messages = []
 
174
  type="messages",
175
  chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
176
  textbox=gr.MultimodalTextbox(
177
+ file_types=[".mp4", ".img"], file_count="multiple", autofocus=True
178
  ),
179
  multimodal=True,
180
  additional_inputs=[