Spaces:
Runtime error
Runtime error
init
Browse files- llava/conversation.py +3 -1
- llava/serve/gradio_web_server.py +4 -17
- pyproject.toml +1 -1
llava/conversation.py
CHANGED
|
@@ -162,7 +162,7 @@ class Conversation:
|
|
| 162 |
images.append(image)
|
| 163 |
return images
|
| 164 |
|
| 165 |
-
def to_gradio_chatbot(self,extra_image=None):
|
| 166 |
ret = []
|
| 167 |
for i, (role, msg) in enumerate(reversed(self.messages[self.offset:])):
|
| 168 |
if role==self.roles[0]:
|
|
@@ -195,6 +195,8 @@ class Conversation:
|
|
| 195 |
image_format='JPEG')
|
| 196 |
img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
|
| 197 |
msg=img_str
|
|
|
|
|
|
|
| 198 |
ret.append([msg, None])
|
| 199 |
break
|
| 200 |
return ret
|
|
|
|
| 162 |
images.append(image)
|
| 163 |
return images
|
| 164 |
|
| 165 |
+
def to_gradio_chatbot(self,extra_image=None,extra_coordinates=None):
|
| 166 |
ret = []
|
| 167 |
for i, (role, msg) in enumerate(reversed(self.messages[self.offset:])):
|
| 168 |
if role==self.roles[0]:
|
|
|
|
| 195 |
image_format='JPEG')
|
| 196 |
img_str = f'<img src="data:image/jpeg;base64,{img_b64_str}" alt="user upload image" />'
|
| 197 |
msg=img_str
|
| 198 |
+
if not extra_coordinates:
|
| 199 |
+
msg=f"The element is at {extra_coordinates} on the screen: " +msg
|
| 200 |
ret.append([msg, None])
|
| 201 |
break
|
| 202 |
return ret
|
llava/serve/gradio_web_server.py
CHANGED
|
@@ -70,19 +70,6 @@ from PIL import Image, ImageDraw
|
|
| 70 |
|
| 71 |
|
| 72 |
def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
|
| 73 |
-
"""
|
| 74 |
-
在给定的图片上绘制一个红色圆圈,并返回新的图片。如果 x, y 坐标不在图片范围内,
|
| 75 |
-
并且 y 超出了图片高度,则尝试将 y 减去 224;如果调整后的 y 仍然超出范围,则返回原图。
|
| 76 |
-
|
| 77 |
-
参数:
|
| 78 |
-
- image: 传入的 PIL.Image 对象
|
| 79 |
-
- x, y: 圆心的绝对坐标
|
| 80 |
-
- radius: 圆圈的半径,默认为 10
|
| 81 |
-
- color: 圆圈的颜色,默认为红色 (255, 0, 0)
|
| 82 |
-
|
| 83 |
-
返回:
|
| 84 |
-
- 带有红色圆圈的 PIL.Image 对象,或者在坐标不合法时返回原图。
|
| 85 |
-
"""
|
| 86 |
# 获取图片的宽度和高度
|
| 87 |
img_width, img_height = image.size
|
| 88 |
|
|
@@ -108,9 +95,9 @@ def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
|
|
| 108 |
right_down_point = (x + radius, y + radius)
|
| 109 |
|
| 110 |
# 绘制圆圈 (outline 参数设置圆圈的颜色,width 设置线条粗细)
|
| 111 |
-
draw.ellipse([left_up_point, right_down_point], outline=color, width=
|
| 112 |
|
| 113 |
-
return image
|
| 114 |
|
| 115 |
def get_conv_log_filename():
|
| 116 |
t = datetime.datetime.now()
|
|
@@ -391,9 +378,9 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
|
|
| 391 |
if len(all_images) > 0:
|
| 392 |
# 假设我们对第一张图片进行 resize 并展示
|
| 393 |
|
| 394 |
-
resized_image = draw_circle_on_image(resize_image(all_images[0]),original_coord[0],original_coord[1])
|
| 395 |
# state.append_message(state.roles[1], ("", resized_image,"Default"))
|
| 396 |
-
yield (state, state.to_gradio_chatbot(resized_image)) + (enable_btn,) * 5
|
| 397 |
|
| 398 |
with open(get_conv_log_filename(), "a") as fout:
|
| 399 |
data = {
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def draw_circle_on_image(image, x, y, radius=20, color=(255, 0, 0)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# 获取图片的宽度和高度
|
| 74 |
img_width, img_height = image.size
|
| 75 |
|
|
|
|
| 95 |
right_down_point = (x + radius, y + radius)
|
| 96 |
|
| 97 |
# 绘制圆圈 (outline 参数设置圆圈的颜色,width 设置线条粗细)
|
| 98 |
+
draw.ellipse([left_up_point, right_down_point], outline=color, width=5)
|
| 99 |
|
| 100 |
+
return image,(x,y)
|
| 101 |
|
| 102 |
def get_conv_log_filename():
|
| 103 |
t = datetime.datetime.now()
|
|
|
|
| 378 |
if len(all_images) > 0:
|
| 379 |
# 假设我们对第一张图片进行 resize 并展示
|
| 380 |
|
| 381 |
+
resized_image,coordinates = draw_circle_on_image(resize_image(all_images[0]),original_coord[0],original_coord[1])
|
| 382 |
# state.append_message(state.roles[1], ("", resized_image,"Default"))
|
| 383 |
+
yield (state, state.to_gradio_chatbot(resized_image,coordinates)) + (enable_btn,) * 5
|
| 384 |
|
| 385 |
with open(get_conv_log_filename(), "a") as fout:
|
| 386 |
data = {
|
pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "uground_demo_test"
|
| 7 |
-
version = "3.
|
| 8 |
description = "Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents"
|
| 9 |
readme = "README.md"
|
| 10 |
requires-python = ">=3.8"
|
|
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "uground_demo_test"
|
| 7 |
+
version = "3.9"
|
| 8 |
description = "Navigating the Digital World as Humans Do: Universal Visual Grounding for GUI Agents"
|
| 9 |
readme = "README.md"
|
| 10 |
requires-python = ">=3.8"
|