Spaces:
Runtime error
Runtime error
update chat
Browse files- app.py +7 -4
- multimodal/open_flamingo/chat/conversation.py +4 -13
app.py
CHANGED
|
@@ -248,17 +248,20 @@ def gradio_ask(user_message, chatbot, chat_state,radio):
|
|
| 248 |
|
| 249 |
|
| 250 |
def gradio_answer(chatbot, chat_state, img_list, radio, text,num_beams, temperature):
|
|
|
|
| 251 |
llm_message,image = \
|
| 252 |
chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
|
| 253 |
max_length=2000,radio = radio,text_input = text)
|
| 254 |
|
| 255 |
chatbot[-1][1] = llm_message
|
|
|
|
|
|
|
| 256 |
if image==None:
|
| 257 |
-
return chatbot, chat_state, img_list
|
| 258 |
else:
|
| 259 |
path = build_image(image)
|
| 260 |
chatbot = chatbot + [[None,(path,)]]
|
| 261 |
-
return chatbot, chat_state, img_list
|
| 262 |
|
| 263 |
task_template = {
|
| 264 |
"Cap": "Summarize the content of the photo <image>.",
|
|
@@ -312,13 +315,13 @@ with gr.Blocks() as demo:
|
|
| 312 |
# text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first, then input...", lines=3,
|
| 313 |
# value=None, visible=False, interactive=False)
|
| 314 |
|
| 315 |
-
text_input = gr.Textbox(label='User', placeholder='Please upload your image first
|
| 316 |
|
| 317 |
upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
|
| 318 |
[image, text_input, upload_button, chat_state, img_list,chatbot])
|
| 319 |
|
| 320 |
text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
|
| 321 |
-
gradio_answer, [chatbot, chat_state, img_list, radio, text_input,num_beams, temperature], [chatbot, chat_state, img_list]
|
| 322 |
)
|
| 323 |
clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
|
| 324 |
queue=False)
|
|
|
|
| 248 |
|
| 249 |
|
| 250 |
def gradio_answer(chatbot, chat_state, img_list, radio, text,num_beams, temperature):
|
| 251 |
+
image == None
|
| 252 |
llm_message,image = \
|
| 253 |
chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
|
| 254 |
max_length=2000,radio = radio,text_input = text)
|
| 255 |
|
| 256 |
chatbot[-1][1] = llm_message
|
| 257 |
+
if chat_state[-1]["from"]=="gpt":
|
| 258 |
+
chat_state[-1]["value"] = llm_message
|
| 259 |
if image==None:
|
| 260 |
+
return "", chatbot, chat_state, img_list
|
| 261 |
else:
|
| 262 |
path = build_image(image)
|
| 263 |
chatbot = chatbot + [[None,(path,)]]
|
| 264 |
+
return "", chatbot, chat_state, img_list
|
| 265 |
|
| 266 |
task_template = {
|
| 267 |
"Cap": "Summarize the content of the photo <image>.",
|
|
|
|
| 315 |
# text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first, then input...", lines=3,
|
| 316 |
# value=None, visible=False, interactive=False)
|
| 317 |
|
| 318 |
+
text_input = gr.Textbox(label='User', placeholder='Please upload your image first, then input...', interactive=False)
|
| 319 |
|
| 320 |
upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
|
| 321 |
[image, text_input, upload_button, chat_state, img_list,chatbot])
|
| 322 |
|
| 323 |
text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
|
| 324 |
+
gradio_answer, [chatbot, chat_state, img_list, radio, text_input,num_beams, temperature], [text_input,chatbot, chat_state, img_list]
|
| 325 |
)
|
| 326 |
clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
|
| 327 |
queue=False)
|
multimodal/open_flamingo/chat/conversation.py
CHANGED
|
@@ -287,7 +287,7 @@ class Chat:
|
|
| 287 |
elif radio in ["VQA"]:
|
| 288 |
conv.append({
|
| 289 |
"from": "human",
|
| 290 |
-
"value": f"Answer the question using a single word or phrase.{text}",
|
| 291 |
})
|
| 292 |
elif radio in ["REC"]:
|
| 293 |
conv.append({
|
|
@@ -368,7 +368,7 @@ class Chat:
|
|
| 368 |
conv.append(
|
| 369 |
{
|
| 370 |
"from": "gpt",
|
| 371 |
-
"value": object_token + text_input + end_token + visual_token
|
| 372 |
}
|
| 373 |
)
|
| 374 |
else:
|
|
@@ -427,17 +427,6 @@ class Chat:
|
|
| 427 |
added_bbox_list=None,
|
| 428 |
add_box=False,
|
| 429 |
)
|
| 430 |
-
# with torch.no_grad():
|
| 431 |
-
# outputs = self.model.generate(
|
| 432 |
-
# batch_images,
|
| 433 |
-
# input_ids,
|
| 434 |
-
# attention_mask=attention_mask,
|
| 435 |
-
# max_new_tokens=100,
|
| 436 |
-
# # min_new_tokens=8,
|
| 437 |
-
# num_beams=1,
|
| 438 |
-
# image_start_index_list=image_start_index_list,
|
| 439 |
-
# image_nums=image_nums,
|
| 440 |
-
# )
|
| 441 |
boxes = outputs["boxes"]
|
| 442 |
scores = outputs["scores"]
|
| 443 |
if len(scores) > 0:
|
|
@@ -463,6 +452,8 @@ class Chat:
|
|
| 463 |
# print(
|
| 464 |
# f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
|
| 465 |
output_text = self.tokenizer.decode(text_outputs[0])
|
|
|
|
|
|
|
| 466 |
return output_text, out_image
|
| 467 |
|
| 468 |
def upload_img(self, image, conv, img_list):
|
|
|
|
| 287 |
elif radio in ["VQA"]:
|
| 288 |
conv.append({
|
| 289 |
"from": "human",
|
| 290 |
+
"value": f"Answer the question using a single word or phrase. {text}",
|
| 291 |
})
|
| 292 |
elif radio in ["REC"]:
|
| 293 |
conv.append({
|
|
|
|
| 368 |
conv.append(
|
| 369 |
{
|
| 370 |
"from": "gpt",
|
| 371 |
+
"value": object_token + text_input + end_token + visual_token,
|
| 372 |
}
|
| 373 |
)
|
| 374 |
else:
|
|
|
|
| 427 |
added_bbox_list=None,
|
| 428 |
add_box=False,
|
| 429 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
boxes = outputs["boxes"]
|
| 431 |
scores = outputs["scores"]
|
| 432 |
if len(scores) > 0:
|
|
|
|
| 452 |
# print(
|
| 453 |
# f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
|
| 454 |
output_text = self.tokenizer.decode(text_outputs[0])
|
| 455 |
+
output_text = re.findall(r'Assistant:(.+)', output_text)[-1]
|
| 456 |
+
|
| 457 |
return output_text, out_image
|
| 458 |
|
| 459 |
def upload_img(self, image, conv, img_list):
|