Source code for kfp.containers._build_image_api

# Copyright 2019 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the speci

__all__ = [
    'build_image_from_working_dir',
    'default_image_builder',
]


import logging
import os
import re
import shutil
import sys
import tempfile

import requests

from ._cache import calculate_recursive_dir_hash, try_read_value_from_cache, write_value_to_cache
from ._container_builder import ContainerBuilder


default_base_image = 'gcr.io/deeplearning-platform-release/tf-cpu.1-14'


_container_work_dir = '/python_env'


default_image_builder = ContainerBuilder()


def _generate_dockerfile_text(context_dir: str, dockerfile_path: str, base_image: str = None) -> str:
    # Generating the Dockerfile
    logging.info('Generating the Dockerfile')

    requirements_rel_path = 'requirements.txt'
    requirements_path = os.path.join(context_dir, requirements_rel_path)
    requirements_file_exists = os.path.exists(requirements_path)

    if not base_image:
        base_image = default_base_image
    if callable(base_image):
        base_image = base_image()

    dockerfile_lines = []
    dockerfile_lines.append('FROM {}'.format(base_image))
    dockerfile_lines.append('WORKDIR {}'.format(_container_work_dir))
    if requirements_file_exists:
        dockerfile_lines.append('COPY {} .'.format(requirements_rel_path))
        dockerfile_lines.append('RUN python3 -m pip install -r {}'.format(requirements_rel_path))
    dockerfile_lines.append('COPY . .')

    return '\n'.join(dockerfile_lines)


[docs]def build_image_from_working_dir( image_name: str = None, working_dir: str = None, file_filter_re: str = r'.*\.py', timeout: int = 1000, base_image: str = None, builder: ContainerBuilder = None) -> str: '''Builds and pushes a new container image that captures the current python working directory. This function recursively scans the working directory and captures the following files in the container image context: * :code:`requirements.txt` files * All python files (can be overridden by passing a different `file_filter_re` argument) The function generates Dockerfile that starts from a python container image, install packages from requirements.txt (if present) and copies all the captured python files to the container image. The Dockerfile can be overridden by placing a custom Dockerfile in the root of the working directory. Args: image_name: Optional. The image repo name where the new container image will be pushed. The name will be generated if not not set. working_dir: Optional. The directory that will be captured. The current directory will be used if omitted. file_filter_re: Optional. A regular expression that will be used to decide which files to include in the container building context. timeout: Optional. The image building timeout in seconds. base_image: Optional. The container image to use as the base for the new image. If not set, the Google Deep Learning Tensorflow CPU image will be used. builder: Optional. An instance of :py:class:`kfp.containers.ContainerBuilder` or compatible class that will be used to build the image. The default builder uses "kubeflow-pipelines-container-builder" service account in "kubeflow" namespace. It works with Kubeflow Pipelines clusters installed in "kubeflow" namespace using Google Cloud Marketplace or Standalone with version > 0.4.0. If your Kubeflow Pipelines is installed in a different namespace, you should use :code:`ContainerBuilder(namespace='<your-kfp-namespace>', ...)`. Depending on how you installed Kubeflow Pipelines, you need to configure your :code:`ContainerBuilder` instance's namespace and service_account: * For clusters installed with Kubeflow >= 0.7, use :code:`ContainerBuilder(namespace='<your-user-namespace>', service_account='default-editor', ...)`. You can omit the namespace if you use kfp sdk from in-cluster notebook, it uses notebook namespace by default. * For clusters installed with Kubeflow < 0.7, use :code:`ContainerBuilder(service_account='default', ...)`. * For clusters installed using Google Cloud Marketplace or Standalone with version <= 0.4.0, use :code:`ContainerBuilder(namespace='<your-kfp-namespace>' service_account='default')` You may refer to `installation guide <https://www.kubeflow.org/docs/pipelines/installation/overview/>`_ for more details about different installation options. Returns: The full name of the container image including the hash digest. E.g. :code:`gcr.io/my-org/my-image@sha256:86c1...793c`. ''' current_dir = working_dir or os.getcwd() with tempfile.TemporaryDirectory() as context_dir: logging.info('Creating the build context directory: {}'.format(context_dir)) # Copying all *.py and requirements.txt files for dirpath, dirnames, filenames in os.walk(current_dir): dst_dirpath = os.path.join(context_dir, os.path.relpath(dirpath, current_dir)) os.makedirs(dst_dirpath, exist_ok=True) for file_name in filenames: if re.match(file_filter_re, file_name) or file_name == 'requirements.txt': src_path = os.path.join(dirpath, file_name) dst_path = os.path.join(dst_dirpath, file_name) shutil.copy(src_path, dst_path) src_dockerfile_path = os.path.join(current_dir, 'Dockerfile') dst_dockerfile_path = os.path.join(context_dir, 'Dockerfile') if os.path.exists(src_dockerfile_path): if base_image: raise ValueError('Cannot specify base_image when using custom Dockerfile (which already specifies the base image).') shutil.copy(src_dockerfile_path, dst_dockerfile_path) else: dockerfile_text = _generate_dockerfile_text(context_dir, dst_dockerfile_path, base_image) with open(dst_dockerfile_path, 'w') as f: f.write(dockerfile_text) cache_name = 'build_image_from_working_dir' cache_key = calculate_recursive_dir_hash(context_dir) cached_image_name = try_read_value_from_cache(cache_name, cache_key) if cached_image_name: return cached_image_name if builder is None: builder = default_image_builder image_name = builder.build( local_dir=context_dir, target_image=image_name, timeout=timeout, ) if image_name: write_value_to_cache(cache_name, cache_key, image_name) return image_name