Spaces:

Backup-bdg
/

OpenHands

Build error

App Files Files Community

OpenHands / openhands /runtime /browser /utils.py

Backup-bdg

Upload 964 files

51ff9e5 verified 5 months ago

raw

history blame

5.19 kB

	import base64
	import datetime
	import os
	from pathlib import Path

	from PIL import Image

	from openhands.core.exceptions import BrowserUnavailableException
	from openhands.core.schema import ActionType
	from openhands.events.action import BrowseInteractiveAction, BrowseURLAction
	from openhands.events.observation import BrowserOutputObservation
	from openhands.runtime.browser.base64 import png_base64_url_to_image
	from openhands.runtime.browser.browser_env import BrowserEnv
	from openhands.utils.async_utils import call_sync_from_async


	async def browse(
	action: BrowseURLAction \| BrowseInteractiveAction,
	browser: BrowserEnv \| None,
	workspace_dir: str \| None = None,
	) -> BrowserOutputObservation:
	if browser is None:
	raise BrowserUnavailableException()

	if isinstance(action, BrowseURLAction):
	# legacy BrowseURLAction
	asked_url = action.url
	if not asked_url.startswith('http'):
	asked_url = os.path.abspath(os.curdir) + action.url
	action_str = f'goto("{asked_url}")'

	elif isinstance(action, BrowseInteractiveAction):
	# new BrowseInteractiveAction, supports full featured BrowserGym actions
	# action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
	action_str = action.browser_actions
	else:
	raise ValueError(f'Invalid action type: {action.action}')

	try:
	# obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
	obs = await call_sync_from_async(browser.step, action_str)

	# Save screenshot if workspace_dir is provided
	screenshot_path = None
	if workspace_dir is not None and obs.get('screenshot'):
	# Create screenshots directory if it doesn't exist
	screenshots_dir = Path(workspace_dir) / '.browser_screenshots'
	screenshots_dir.mkdir(exist_ok=True)

	# Generate a filename based on timestamp
	timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f')
	screenshot_filename = f'screenshot_{timestamp}.png'
	screenshot_path = str(screenshots_dir / screenshot_filename)

	# Direct image saving from base64 data without using PIL's Image.open
	# This approach bypasses potential encoding issues that might occur when
	# converting between different image representations, ensuring the raw PNG
	# data from the browser is saved directly to disk.

	# Extract the base64 data
	base64_data = obs.get('screenshot', '')
	if ',' in base64_data:
	base64_data = base64_data.split(',')[1]

	try:
	# Decode base64 directly to binary
	image_data = base64.b64decode(base64_data)

	# Write binary data directly to file
	with open(screenshot_path, 'wb') as f:
	f.write(image_data)

	# Verify the image was saved correctly by opening it
	# This is just a verification step and can be removed in production
	Image.open(screenshot_path).verify()
	except Exception:
	# If direct saving fails, fall back to the original method
	image = png_base64_url_to_image(obs.get('screenshot'))
	image.save(screenshot_path, format='PNG', optimize=True)

	return BrowserOutputObservation(
	content=obs['text_content'], # text content of the page
	url=obs.get('url', ''), # URL of the page
	screenshot=obs.get('screenshot', None), # base64-encoded screenshot, png
	screenshot_path=screenshot_path, # path to saved screenshot file
	set_of_marks=obs.get(
	'set_of_marks', None
	), # base64-encoded Set-of-Marks annotated screenshot, png,
	goal_image_urls=obs.get('image_content', []),
	open_pages_urls=obs.get('open_pages_urls', []), # list of open pages
	active_page_index=obs.get(
	'active_page_index', -1
	), # index of the active page
	axtree_object=obs.get('axtree_object', {}), # accessibility tree object
	extra_element_properties=obs.get('extra_element_properties', {}),
	focused_element_bid=obs.get(
	'focused_element_bid', None
	), # focused element bid
	last_browser_action=obs.get(
	'last_action', ''
	), # last browser env action performed
	last_browser_action_error=obs.get('last_action_error', ''),
	error=True if obs.get('last_action_error', '') else False, # error flag
	trigger_by_action=action.action,
	)
	except Exception as e:
	return BrowserOutputObservation(
	content=str(e),
	screenshot='',
	screenshot_path=None,
	error=True,
	last_browser_action_error=str(e),
	url=asked_url if action.action == ActionType.BROWSE else '',
	trigger_by_action=action.action,
	)