Spaces:
Build error
Build error
| import base64 | |
| import datetime | |
| import os | |
| from pathlib import Path | |
| from PIL import Image | |
| from openhands.core.exceptions import BrowserUnavailableException | |
| from openhands.core.schema import ActionType | |
| from openhands.events.action import BrowseInteractiveAction, BrowseURLAction | |
| from openhands.events.observation import BrowserOutputObservation | |
| from openhands.runtime.browser.base64 import png_base64_url_to_image | |
| from openhands.runtime.browser.browser_env import BrowserEnv | |
| from openhands.utils.async_utils import call_sync_from_async | |
| async def browse( | |
| action: BrowseURLAction | BrowseInteractiveAction, | |
| browser: BrowserEnv | None, | |
| workspace_dir: str | None = None, | |
| ) -> BrowserOutputObservation: | |
| if browser is None: | |
| raise BrowserUnavailableException() | |
| if isinstance(action, BrowseURLAction): | |
| # legacy BrowseURLAction | |
| asked_url = action.url | |
| if not asked_url.startswith('http'): | |
| asked_url = os.path.abspath(os.curdir) + action.url | |
| action_str = f'goto("{asked_url}")' | |
| elif isinstance(action, BrowseInteractiveAction): | |
| # new BrowseInteractiveAction, supports full featured BrowserGym actions | |
| # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py | |
| action_str = action.browser_actions | |
| else: | |
| raise ValueError(f'Invalid action type: {action.action}') | |
| try: | |
| # obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396 | |
| obs = await call_sync_from_async(browser.step, action_str) | |
| # Save screenshot if workspace_dir is provided | |
| screenshot_path = None | |
| if workspace_dir is not None and obs.get('screenshot'): | |
| # Create screenshots directory if it doesn't exist | |
| screenshots_dir = Path(workspace_dir) / '.browser_screenshots' | |
| screenshots_dir.mkdir(exist_ok=True) | |
| # Generate a filename based on timestamp | |
| timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_%f') | |
| screenshot_filename = f'screenshot_{timestamp}.png' | |
| screenshot_path = str(screenshots_dir / screenshot_filename) | |
| # Direct image saving from base64 data without using PIL's Image.open | |
| # This approach bypasses potential encoding issues that might occur when | |
| # converting between different image representations, ensuring the raw PNG | |
| # data from the browser is saved directly to disk. | |
| # Extract the base64 data | |
| base64_data = obs.get('screenshot', '') | |
| if ',' in base64_data: | |
| base64_data = base64_data.split(',')[1] | |
| try: | |
| # Decode base64 directly to binary | |
| image_data = base64.b64decode(base64_data) | |
| # Write binary data directly to file | |
| with open(screenshot_path, 'wb') as f: | |
| f.write(image_data) | |
| # Verify the image was saved correctly by opening it | |
| # This is just a verification step and can be removed in production | |
| Image.open(screenshot_path).verify() | |
| except Exception: | |
| # If direct saving fails, fall back to the original method | |
| image = png_base64_url_to_image(obs.get('screenshot')) | |
| image.save(screenshot_path, format='PNG', optimize=True) | |
| return BrowserOutputObservation( | |
| content=obs['text_content'], # text content of the page | |
| url=obs.get('url', ''), # URL of the page | |
| screenshot=obs.get('screenshot', None), # base64-encoded screenshot, png | |
| screenshot_path=screenshot_path, # path to saved screenshot file | |
| set_of_marks=obs.get( | |
| 'set_of_marks', None | |
| ), # base64-encoded Set-of-Marks annotated screenshot, png, | |
| goal_image_urls=obs.get('image_content', []), | |
| open_pages_urls=obs.get('open_pages_urls', []), # list of open pages | |
| active_page_index=obs.get( | |
| 'active_page_index', -1 | |
| ), # index of the active page | |
| axtree_object=obs.get('axtree_object', {}), # accessibility tree object | |
| extra_element_properties=obs.get('extra_element_properties', {}), | |
| focused_element_bid=obs.get( | |
| 'focused_element_bid', None | |
| ), # focused element bid | |
| last_browser_action=obs.get( | |
| 'last_action', '' | |
| ), # last browser env action performed | |
| last_browser_action_error=obs.get('last_action_error', ''), | |
| error=True if obs.get('last_action_error', '') else False, # error flag | |
| trigger_by_action=action.action, | |
| ) | |
| except Exception as e: | |
| return BrowserOutputObservation( | |
| content=str(e), | |
| screenshot='', | |
| screenshot_path=None, | |
| error=True, | |
| last_browser_action_error=str(e), | |
| url=asked_url if action.action == ActionType.BROWSE else '', | |
| trigger_by_action=action.action, | |
| ) | |