Spaces:
Build error
Build error
| import argparse | |
| import hashlib | |
| import os | |
| import shutil | |
| import string | |
| import tempfile | |
| from enum import Enum | |
| from pathlib import Path | |
| import docker | |
| from dirhash import dirhash | |
| from jinja2 import Environment, FileSystemLoader | |
| import openhands | |
| from openhands import __version__ as oh_version | |
| from openhands.core.exceptions import AgentRuntimeBuildError | |
| from openhands.core.logger import openhands_logger as logger | |
| from openhands.runtime.builder import DockerRuntimeBuilder, RuntimeBuilder | |
| class BuildFromImageType(Enum): | |
| SCRATCH = 'scratch' # Slowest: Build from base image (no dependencies are reused) | |
| VERSIONED = 'versioned' # Medium speed: Reuse the most recent image with the same base image & OH version (a lot of dependencies are already installed) | |
| LOCK = 'lock' # Fastest: Reuse the most recent image with the exact SAME dependencies (lock files) | |
| def get_runtime_image_repo() -> str: | |
| return os.getenv('OH_RUNTIME_RUNTIME_IMAGE_REPO', 'ghcr.io/all-hands-ai/runtime') | |
| def _generate_dockerfile( | |
| base_image: str, | |
| build_from: BuildFromImageType = BuildFromImageType.SCRATCH, | |
| extra_deps: str | None = None, | |
| ) -> str: | |
| """Generate the Dockerfile content for the runtime image based on the base image. | |
| Parameters: | |
| - base_image (str): The base image provided for the runtime image | |
| - build_from (BuildFromImageType): The build method for the runtime image. | |
| - extra_deps (str): | |
| Returns: | |
| - str: The resulting Dockerfile content | |
| """ | |
| env = Environment( | |
| loader=FileSystemLoader( | |
| searchpath=os.path.join(os.path.dirname(__file__), 'runtime_templates') | |
| ) | |
| ) | |
| template = env.get_template('Dockerfile.j2') | |
| dockerfile_content = template.render( | |
| base_image=base_image, | |
| build_from_scratch=build_from == BuildFromImageType.SCRATCH, | |
| build_from_versioned=build_from == BuildFromImageType.VERSIONED, | |
| extra_deps=extra_deps if extra_deps is not None else '', | |
| ) | |
| return dockerfile_content | |
| def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]: | |
| """Retrieves the Docker repo and tag associated with the Docker image. | |
| Parameters: | |
| - base_image (str): The name of the base Docker image | |
| Returns: | |
| - tuple[str, str]: The Docker repo and tag of the Docker image | |
| """ | |
| if get_runtime_image_repo() in base_image: | |
| logger.debug( | |
| f'The provided image [{base_image}] is already a valid runtime image.\n' | |
| f'Will try to reuse it as is.' | |
| ) | |
| if ':' not in base_image: | |
| base_image = base_image + ':latest' | |
| repo, tag = base_image.split(':') | |
| return repo, tag | |
| else: | |
| if ':' not in base_image: | |
| base_image = base_image + ':latest' | |
| [repo, tag] = base_image.split(':') | |
| # Hash the repo if it's too long | |
| if len(repo) > 32: | |
| repo_hash = hashlib.md5(repo[:-24].encode()).hexdigest()[:8] | |
| repo = f'{repo_hash}_{repo[-24:]}' # Use 8 char hash + last 24 chars | |
| else: | |
| repo = repo.replace('/', '_s_') | |
| new_tag = f'oh_v{oh_version}_image_{repo}_tag_{tag}' | |
| # if it's still too long, hash the entire image name | |
| if len(new_tag) > 128: | |
| new_tag = f'oh_v{oh_version}_image_{hashlib.md5(new_tag.encode()).hexdigest()[:64]}' | |
| logger.warning( | |
| f'The new tag [{new_tag}] is still too long, so we use an hash of the entire image name: {new_tag}' | |
| ) | |
| return get_runtime_image_repo(), new_tag | |
| def build_runtime_image( | |
| base_image: str, | |
| runtime_builder: RuntimeBuilder, | |
| platform: str | None = None, | |
| extra_deps: str | None = None, | |
| build_folder: str | None = None, | |
| dry_run: bool = False, | |
| force_rebuild: bool = False, | |
| extra_build_args: list[str] | None = None, | |
| ) -> str: | |
| """Prepares the final docker build folder. | |
| If dry_run is False, it will also build the OpenHands runtime Docker image using the docker build folder. | |
| Parameters: | |
| - base_image (str): The name of the base Docker image to use | |
| - runtime_builder (RuntimeBuilder): The runtime builder to use | |
| - platform (str): The target platform for the build (e.g. linux/amd64, linux/arm64) | |
| - extra_deps (str): | |
| - build_folder (str): The directory to use for the build. If not provided a temporary directory will be used | |
| - dry_run (bool): if True, it will only ready the build folder. It will not actually build the Docker image | |
| - force_rebuild (bool): if True, it will create the Dockerfile which uses the base_image | |
| - extra_build_args (List[str]): Additional build arguments to pass to the builder | |
| Returns: | |
| - str: <image_repo>:<MD5 hash>. Where MD5 hash is the hash of the docker build folder | |
| See https://docs.all-hands.dev/usage/architecture/runtime for more details. | |
| """ | |
| if build_folder is None: | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| result = build_runtime_image_in_folder( | |
| base_image=base_image, | |
| runtime_builder=runtime_builder, | |
| build_folder=Path(temp_dir), | |
| extra_deps=extra_deps, | |
| dry_run=dry_run, | |
| force_rebuild=force_rebuild, | |
| platform=platform, | |
| extra_build_args=extra_build_args, | |
| ) | |
| return result | |
| result = build_runtime_image_in_folder( | |
| base_image=base_image, | |
| runtime_builder=runtime_builder, | |
| build_folder=Path(build_folder), | |
| extra_deps=extra_deps, | |
| dry_run=dry_run, | |
| force_rebuild=force_rebuild, | |
| platform=platform, | |
| extra_build_args=extra_build_args, | |
| ) | |
| return result | |
| def build_runtime_image_in_folder( | |
| base_image: str, | |
| runtime_builder: RuntimeBuilder, | |
| build_folder: Path, | |
| extra_deps: str | None, | |
| dry_run: bool, | |
| force_rebuild: bool, | |
| platform: str | None = None, | |
| extra_build_args: list[str] | None = None, | |
| ) -> str: | |
| runtime_image_repo, _ = get_runtime_image_repo_and_tag(base_image) | |
| lock_tag = f'oh_v{oh_version}_{get_hash_for_lock_files(base_image)}' | |
| versioned_tag = ( | |
| # truncate the base image to 96 characters to fit in the tag max length (128 characters) | |
| f'oh_v{oh_version}_{get_tag_for_versioned_image(base_image)}' | |
| ) | |
| versioned_image_name = f'{runtime_image_repo}:{versioned_tag}' | |
| source_tag = f'{lock_tag}_{get_hash_for_source_files()}' | |
| hash_image_name = f'{runtime_image_repo}:{source_tag}' | |
| logger.info(f'Building image: {hash_image_name}') | |
| if force_rebuild: | |
| logger.debug( | |
| f'Force rebuild: [{runtime_image_repo}:{source_tag}] from scratch.' | |
| ) | |
| prep_build_folder( | |
| build_folder, | |
| base_image, | |
| build_from=BuildFromImageType.SCRATCH, | |
| extra_deps=extra_deps, | |
| ) | |
| if not dry_run: | |
| _build_sandbox_image( | |
| build_folder, | |
| runtime_builder, | |
| runtime_image_repo, | |
| source_tag, | |
| lock_tag, | |
| versioned_tag, | |
| platform, | |
| extra_build_args=extra_build_args, | |
| ) | |
| return hash_image_name | |
| lock_image_name = f'{runtime_image_repo}:{lock_tag}' | |
| build_from = BuildFromImageType.SCRATCH | |
| # If the exact image already exists, we do not need to build it | |
| if runtime_builder.image_exists(hash_image_name, False): | |
| logger.debug(f'Reusing Image [{hash_image_name}]') | |
| return hash_image_name | |
| # We look for an existing image that shares the same lock_tag. If such an image exists, we | |
| # can use it as the base image for the build and just copy source files. This makes the build | |
| # much faster. | |
| if runtime_builder.image_exists(lock_image_name): | |
| logger.debug(f'Build [{hash_image_name}] from lock image [{lock_image_name}]') | |
| build_from = BuildFromImageType.LOCK | |
| base_image = lock_image_name | |
| elif runtime_builder.image_exists(versioned_image_name): | |
| logger.info( | |
| f'Build [{hash_image_name}] from versioned image [{versioned_image_name}]' | |
| ) | |
| build_from = BuildFromImageType.VERSIONED | |
| base_image = versioned_image_name | |
| else: | |
| logger.debug(f'Build [{hash_image_name}] from scratch') | |
| prep_build_folder(build_folder, base_image, build_from, extra_deps) | |
| if not dry_run: | |
| _build_sandbox_image( | |
| build_folder, | |
| runtime_builder, | |
| runtime_image_repo, | |
| source_tag=source_tag, | |
| lock_tag=lock_tag, | |
| # Only tag the versioned image if we are building from scratch. | |
| # This avoids too much layers when you lay one image on top of another multiple times | |
| versioned_tag=versioned_tag | |
| if build_from == BuildFromImageType.SCRATCH | |
| else None, | |
| platform=platform, | |
| extra_build_args=extra_build_args, | |
| ) | |
| return hash_image_name | |
| def prep_build_folder( | |
| build_folder: Path, | |
| base_image: str, | |
| build_from: BuildFromImageType, | |
| extra_deps: str | None, | |
| ) -> None: | |
| # Copy the source code to directory. It will end up in build_folder/code | |
| # If package is not found, build from source code | |
| openhands_source_dir = Path(openhands.__file__).parent | |
| project_root = openhands_source_dir.parent | |
| logger.debug(f'Building source distribution using project root: {project_root}') | |
| # Copy the 'openhands' directory (Source code) | |
| shutil.copytree( | |
| openhands_source_dir, | |
| Path(build_folder, 'code', 'openhands'), | |
| ignore=shutil.ignore_patterns( | |
| '.*/', | |
| '__pycache__/', | |
| '*.pyc', | |
| '*.md', | |
| ), | |
| ) | |
| # Copy pyproject.toml and poetry.lock files | |
| for file in ['pyproject.toml', 'poetry.lock']: | |
| src = Path(openhands_source_dir, file) | |
| if not src.exists(): | |
| src = Path(project_root, file) | |
| shutil.copy2(src, Path(build_folder, 'code', file)) | |
| # Create a Dockerfile and write it to build_folder | |
| dockerfile_content = _generate_dockerfile( | |
| base_image, | |
| build_from=build_from, | |
| extra_deps=extra_deps, | |
| ) | |
| dockerfile_path = Path(build_folder, 'Dockerfile') | |
| with open(str(dockerfile_path), 'w') as f: | |
| f.write(dockerfile_content) | |
| _ALPHABET = string.digits + string.ascii_lowercase | |
| def truncate_hash(hash: str) -> str: | |
| """Convert the base16 hash to base36 and truncate at 16 characters.""" | |
| value = int(hash, 16) | |
| result: list[str] = [] | |
| while value > 0 and len(result) < 16: | |
| value, remainder = divmod(value, len(_ALPHABET)) | |
| result.append(_ALPHABET[remainder]) | |
| return ''.join(result) | |
| def get_hash_for_lock_files(base_image: str) -> str: | |
| openhands_source_dir = Path(openhands.__file__).parent | |
| md5 = hashlib.md5() | |
| md5.update(base_image.encode()) | |
| for file in ['pyproject.toml', 'poetry.lock']: | |
| src = Path(openhands_source_dir, file) | |
| if not src.exists(): | |
| src = Path(openhands_source_dir.parent, file) | |
| with open(src, 'rb') as f: | |
| for chunk in iter(lambda: f.read(4096), b''): | |
| md5.update(chunk) | |
| # We get away with truncation because we want something that is unique | |
| # rather than something that is cryptographically secure | |
| result = truncate_hash(md5.hexdigest()) | |
| return result | |
| def get_tag_for_versioned_image(base_image: str) -> str: | |
| return base_image.replace('/', '_s_').replace(':', '_t_').lower()[-96:] | |
| def get_hash_for_source_files() -> str: | |
| openhands_source_dir = Path(openhands.__file__).parent | |
| dir_hash = dirhash( | |
| openhands_source_dir, | |
| 'md5', | |
| ignore=[ | |
| '.*/', # hidden directories | |
| '__pycache__/', | |
| '*.pyc', | |
| ], | |
| ) | |
| # We get away with truncation because we want something that is unique | |
| # rather than something that is cryptographically secure | |
| result = truncate_hash(dir_hash) | |
| return result | |
| def _build_sandbox_image( | |
| build_folder: Path, | |
| runtime_builder: RuntimeBuilder, | |
| runtime_image_repo: str, | |
| source_tag: str, | |
| lock_tag: str, | |
| versioned_tag: str | None, | |
| platform: str | None = None, | |
| extra_build_args: list[str] | None = None, | |
| ) -> str: | |
| """Build and tag the sandbox image. The image will be tagged with all tags that do not yet exist.""" | |
| names = [ | |
| f'{runtime_image_repo}:{source_tag}', | |
| f'{runtime_image_repo}:{lock_tag}', | |
| ] | |
| if versioned_tag is not None: | |
| names.append(f'{runtime_image_repo}:{versioned_tag}') | |
| names = [name for name in names if not runtime_builder.image_exists(name, False)] | |
| image_name = runtime_builder.build( | |
| path=str(build_folder), | |
| tags=names, | |
| platform=platform, | |
| extra_build_args=extra_build_args, | |
| ) | |
| if not image_name: | |
| raise AgentRuntimeBuildError(f'Build failed for image {names}') | |
| return image_name | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| '--base_image', type=str, default='nikolaik/python-nodejs:python3.12-nodejs22' | |
| ) | |
| parser.add_argument('--build_folder', type=str, default=None) | |
| parser.add_argument('--force_rebuild', action='store_true', default=False) | |
| parser.add_argument('--platform', type=str, default=None) | |
| args = parser.parse_args() | |
| if args.build_folder is not None: | |
| # If a build_folder is provided, we do not actually build the Docker image. We copy the necessary source code | |
| # and create a Dockerfile dynamically and place it in the build_folder only. This allows the Docker image to | |
| # then be created using the Dockerfile (most likely using the containers/build.sh script) | |
| build_folder = args.build_folder | |
| assert os.path.exists(build_folder), ( | |
| f'Build folder {build_folder} does not exist' | |
| ) | |
| logger.debug( | |
| f'Copying the source code and generating the Dockerfile in the build folder: {build_folder}' | |
| ) | |
| runtime_image_repo, runtime_image_tag = get_runtime_image_repo_and_tag( | |
| args.base_image | |
| ) | |
| logger.debug( | |
| f'Runtime image repo: {runtime_image_repo} and runtime image tag: {runtime_image_tag}' | |
| ) | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| # dry_run is true so we only prepare a temp_dir containing the required source code and the Dockerfile. We | |
| # then obtain the MD5 hash of the folder and return <image_repo>:<temp_dir_md5_hash> | |
| runtime_image_hash_name = build_runtime_image( | |
| args.base_image, | |
| runtime_builder=DockerRuntimeBuilder(docker.from_env()), | |
| build_folder=temp_dir, | |
| dry_run=True, | |
| force_rebuild=args.force_rebuild, | |
| platform=args.platform, | |
| ) | |
| _runtime_image_repo, runtime_image_source_tag = ( | |
| runtime_image_hash_name.split(':') | |
| ) | |
| # Move contents of temp_dir to build_folder | |
| shutil.copytree(temp_dir, build_folder, dirs_exist_ok=True) | |
| logger.debug( | |
| f'Build folder [{build_folder}] is ready: {os.listdir(build_folder)}' | |
| ) | |
| # We now update the config.sh in the build_folder to contain the required values. This is used in the | |
| # containers/build.sh script which is called to actually build the Docker image | |
| with open(os.path.join(build_folder, 'config.sh'), 'a') as file: | |
| file.write( | |
| ( | |
| f'\n' | |
| f'DOCKER_IMAGE_TAG={runtime_image_tag}\n' | |
| f'DOCKER_IMAGE_SOURCE_TAG={runtime_image_source_tag}\n' | |
| ) | |
| ) | |
| logger.debug( | |
| f'`config.sh` is updated with the image repo[{runtime_image_repo}] and tags [{runtime_image_tag}, {runtime_image_source_tag}]' | |
| ) | |
| logger.debug( | |
| f'Dockerfile, source code and config.sh are ready in {build_folder}' | |
| ) | |
| else: | |
| # If a build_folder is not provided, after copying the required source code and dynamically creating the | |
| # Dockerfile, we actually build the Docker image | |
| logger.debug('Building image in a temporary folder') | |
| docker_builder = DockerRuntimeBuilder(docker.from_env()) | |
| image_name = build_runtime_image( | |
| args.base_image, docker_builder, platform=args.platform | |
| ) | |
| logger.debug(f'\nBuilt image: {image_name}\n') | |