detect-web-ui-element

Runtime error

App Files Files Community

BoyuNLP commited on Oct 9, 2024

Commit

d8b7bfc

1 Parent(s): d2bd1f7

init

Browse files

Files changed (3) hide show

llava/conversation.py +3 -1
llava/serve/gradio_web_server.py +4 -17
pyproject.toml +1 -1

llava/conversation.py CHANGED Viewed

@@ -162,7 +162,7 @@ class Conversation:
                     images.append(image)
         return images
-    def to_gradio_chatbot(self,extra_image=None):
         ret = []
         for i, (role, msg) in enumerate(reversed(self.messages[self.offset:])):
             if role==self.roles[0]:
@@ -195,6 +195,8 @@ class Conversation:
                         image_format='JPEG')
                     img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
                     msg=img_str
                     ret.append([msg, None])
                 break
         return ret

                     images.append(image)
         return images
+    def to_gradio_chatbot(self,extra_image=None,extra_coordinates=None):
         ret = []
         for i, (role, msg) in enumerate(reversed(self.messages[self.offset:])):
             if role==self.roles[0]:
                         image_format='JPEG')
                     img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
                     msg=img_str
+                    if not extra_coordinates:
+                        msg=f"The element is at {extra_coordinates} on the screen: "  +msg
                     ret.append([msg, None])
                 break
         return ret

llava/serve/gradio_web_server.py CHANGED Viewed

@@ -70,19 +70,6 @@ from PIL import Image, ImageDraw
 def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
-    """
-    在给定的图片上绘制一个红色圆圈，并返回新的图片。如果 x, y 坐标不在图片范围内，
-    并且 y 超出了图片高度，则尝试将 y 减去 224；如果调整后的 y 仍然超出范围，则返回原图。
-    参数:
-    - image: 传入的 PIL.Image 对象
-    - x, y: 圆心的绝对坐标
-    - radius: 圆圈的半径，默认为 10
-    - color: 圆圈的颜色，默认为红色 (255, 0, 0)
-    返回:
-    - 带有红色圆圈的 PIL.Image 对象，或者在坐标不合法时返回原图。
-    """
     # 获取图片的宽度和高度
     img_width, img_height = image.size
@@ -108,9 +95,9 @@ def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
     right_down_point = (x + radius, y + radius)
     # 绘制圆圈 (outline 参数设置圆圈的颜色，width 设置线条粗细)
-    draw.ellipse([left_up_point, right_down_point], outline=color, width=2)
-    return image
 def get_conv_log_filename():
     t = datetime.datetime.now()
@@ -391,9 +378,9 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
     if len(all_images) > 0:
         # 假设我们对第一张图片进行 resize 并展示
-        resized_image = draw_circle_on_image(resize_image(all_images[0]),original_coord[0],original_coord[1])
         # state.append_message(state.roles[1], ("", resized_image,"Default"))
-        yield (state, state.to_gradio_chatbot(resized_image)) + (enable_btn,) * 5
     with open(get_conv_log_filename(), "a") as fout:
         data = {

 def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
     # 获取图片的宽度和高度
     img_width, img_height = image.size
     right_down_point = (x + radius, y + radius)
     # 绘制圆圈 (outline 参数设置圆圈的颜色，width 设置线条粗细)
+    draw.ellipse([left_up_point, right_down_point], outline=color, width=5)
+    return image,(x,y)
 def get_conv_log_filename():
     t = datetime.datetime.now()
     if len(all_images) > 0:
         # 假设我们对第一张图片进行 resize 并展示
+        resized_image,coordinates = draw_circle_on_image(resize_image(all_images[0]),original_coord[0],original_coord[1])
         # state.append_message(state.roles[1], ("", resized_image,"Default"))
+        yield (state, state.to_gradio_chatbot(resized_image,coordinates)) + (enable_btn,) * 5
     with open(get_conv_log_filename(), "a") as fout:
         data = {

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "uground_demo_test"
-version = "3.7"
 description = "Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents"
 readme = "README.md"
 requires-python = ">=3.8"

 [project]
 name = "uground_demo_test"
+version = "3.9"
 description = "Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents"
 readme = "README.md"
 requires-python = ">=3.8"