Spaces:

chendl
/

compositional_test

Runtime error

App Files Files Community

chendl commited on Nov 1, 2023

Commit

86468ab

1 Parent(s): b96f40b

update chat

Browse files

Files changed (2) hide show

app.py +7 -4
multimodal/open_flamingo/chat/conversation.py +4 -13

app.py CHANGED Viewed

@@ -248,17 +248,20 @@ def gradio_ask(user_message, chatbot, chat_state,radio):
 def gradio_answer(chatbot, chat_state, img_list,  radio, text,num_beams, temperature):
     llm_message,image = \
     chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
                 max_length=2000,radio = radio,text_input = text)
     chatbot[-1][1] = llm_message
     if image==None:
-        return chatbot, chat_state, img_list
     else:
         path = build_image(image)
         chatbot = chatbot + [[None,(path,)]]
-        return chatbot, chat_state, img_list
 task_template = {
                 "Cap": "Summarize the content of the photo <image>.",
@@ -312,13 +315,13 @@ with gr.Blocks() as demo:
             # text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first， then input...", lines=3,
             #                         value=None, visible=False, interactive=False)
-            text_input = gr.Textbox(label='User', placeholder='Please upload your image first， then input...', interactive=False)
     upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
                         [image, text_input, upload_button, chat_state, img_list,chatbot])
     text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
-        gradio_answer, [chatbot, chat_state, img_list,  radio, text_input,num_beams, temperature], [chatbot, chat_state, img_list]
     )
     clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
                 queue=False)

 def gradio_answer(chatbot, chat_state, img_list,  radio, text,num_beams, temperature):
+    image == None
     llm_message,image = \
     chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
                 max_length=2000,radio = radio,text_input = text)
     chatbot[-1][1] = llm_message
+    if chat_state[-1]["from"]=="gpt":
+        chat_state[-1]["value"] = llm_message
     if image==None:
+        return "", chatbot, chat_state, img_list
     else:
         path = build_image(image)
         chatbot = chatbot + [[None,(path,)]]
+        return "", chatbot, chat_state, img_list
 task_template = {
                 "Cap": "Summarize the content of the photo <image>.",
             # text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first， then input...", lines=3,
             #                         value=None, visible=False, interactive=False)
+            text_input = gr.Textbox(label='User', placeholder='Please upload your image first, then input...', interactive=False)
     upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
                         [image, text_input, upload_button, chat_state, img_list,chatbot])
     text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
+        gradio_answer, [chatbot, chat_state, img_list,  radio, text_input,num_beams, temperature], [text_input,chatbot, chat_state, img_list]
     )
     clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
                 queue=False)

multimodal/open_flamingo/chat/conversation.py CHANGED Viewed

@@ -287,7 +287,7 @@ class Chat:
         elif radio in ["VQA"]:
             conv.append({
                 "from": "human",
-                "value": f"Answer the question using a single word or phrase.{text}",
             })
         elif radio in ["REC"]:
             conv.append({
@@ -368,7 +368,7 @@ class Chat:
             conv.append(
                 {
                     "from": "gpt",
-                    "value": object_token + text_input + end_token + visual_token
                 }
             )
         else:
@@ -427,17 +427,6 @@ class Chat:
                 added_bbox_list=None,
                 add_box=False,
             )
-        # with torch.no_grad():
-        #     outputs = self.model.generate(
-        #         batch_images,
-        #         input_ids,
-        #         attention_mask=attention_mask,
-        #         max_new_tokens=100,
-        #         # min_new_tokens=8,
-        #         num_beams=1,
-        #         image_start_index_list=image_start_index_list,
-        #         image_nums=image_nums,
-        #     )
         boxes = outputs["boxes"]
         scores = outputs["scores"]
         if len(scores) > 0:
@@ -463,6 +452,8 @@ class Chat:
         # print(
         #     f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
         output_text = self.tokenizer.decode(text_outputs[0])
         return output_text, out_image
     def upload_img(self, image, conv, img_list):

         elif radio in ["VQA"]:
             conv.append({
                 "from": "human",
+                "value": f"Answer the question using a single word or phrase. {text}",
             })
         elif radio in ["REC"]:
             conv.append({
             conv.append(
                 {
                     "from": "gpt",
+                    "value": object_token + text_input + end_token + visual_token,
                 }
             )
         else:
                 added_bbox_list=None,
                 add_box=False,
             )
         boxes = outputs["boxes"]
         scores = outputs["scores"]
         if len(scores) > 0:
         # print(
         #     f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
         output_text = self.tokenizer.decode(text_outputs[0])
+        output_text = re.findall(r'Assistant:(.+)', output_text)[-1]
         return output_text, out_image
     def upload_img(self, image, conv, img_list):