Source code for kfp.dsl.importer_node

# Copyright 2020-2022 The Kubeflow Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility function for building Importer Node spec."""

from typing import Any, Dict, Mapping, Optional, Type, Union

from kfp.dsl import importer_component
from kfp.dsl import pipeline_channel
from kfp.dsl import pipeline_task
from kfp.dsl import placeholders
from kfp.dsl import structures
from kfp.dsl import utils
from kfp.dsl.types import artifact_types
from kfp.dsl.types import type_utils

URI_KEY = 'uri'
OUTPUT_KEY = 'artifact'
METADATA_KEY = 'metadata'


[docs]def importer( artifact_uri: Union[pipeline_channel.PipelineParameterChannel, str], artifact_class: Type[artifact_types.Artifact], reimport: bool = False, metadata: Optional[Mapping[str, Any]] = None, ) -> pipeline_task.PipelineTask: """Imports an existing artifact for use in a downstream component. Args: artifact_uri: The URI of the artifact to import. artifact_class: The artifact class being imported. reimport: Whether to reimport the artifact. metadata: Properties of the artifact. Returns: A task with the artifact accessible via its ``.output`` attribute. Examples:: @dsl.pipeline(name='pipeline-with-importer') def pipeline_with_importer(): importer1 = importer( artifact_uri='gs://ml-pipeline-playground/shakespeare1.txt', artifact_class=Dataset, reimport=False) train(dataset=importer1.output) """ component_inputs: Dict[str, structures.InputSpec] = {} call_inputs: Dict[str, Any] = {} def traverse_dict_and_create_metadata_inputs(d: Any) -> Any: if isinstance(d, pipeline_channel.PipelineParameterChannel): reversed_call_inputs = { pipeline_param_chan: name for name, pipeline_param_chan in call_inputs.items() } # minimizes importer spec interface by not creating new # inputspec/parameters if the same input is used multiple places # in metadata unique_name = reversed_call_inputs.get( d, utils.make_name_unique_by_adding_index( METADATA_KEY, list(call_inputs), '-', ), ) call_inputs[unique_name] = d component_inputs[unique_name] = structures.InputSpec( type=d.channel_type) return placeholders.InputValuePlaceholder( input_name=unique_name)._to_string() elif isinstance(d, dict): # use this instead of list comprehension to ensure compiles are identical across Python versions res = {} for k, v in d.items(): new_k = traverse_dict_and_create_metadata_inputs(k) new_v = traverse_dict_and_create_metadata_inputs(v) res[new_k] = new_v return res elif isinstance(d, list): return [traverse_dict_and_create_metadata_inputs(el) for el in d] elif isinstance(d, str): # extract pipeline channels from f-strings, if any pipeline_channels = pipeline_channel.extract_pipeline_channels_from_any( d) # pass the channel back into the recursive function to create the placeholder, component inputs, and call inputs, then replace the channel with the placeholder for channel in pipeline_channels: input_placeholder = traverse_dict_and_create_metadata_inputs( channel) d = d.replace(channel.pattern, input_placeholder) return d else: return d metadata_with_placeholders = traverse_dict_and_create_metadata_inputs( metadata) component_spec = structures.ComponentSpec( name='importer', implementation=structures.Implementation( importer=structures.ImporterSpec( artifact_uri=placeholders.InputValuePlaceholder( URI_KEY)._to_string(), schema_title=type_utils.create_bundled_artifact_type( artifact_class.schema_title, artifact_class.schema_version), schema_version=artifact_class.schema_version, reimport=reimport, metadata=metadata_with_placeholders)), inputs={ URI_KEY: structures.InputSpec(type='String'), **component_inputs }, outputs={ OUTPUT_KEY: structures.OutputSpec( type=type_utils.create_bundled_artifact_type( artifact_class.schema_title, artifact_class.schema_version)) }, ) importer = importer_component.ImporterComponent( component_spec=component_spec) return importer(uri=artifact_uri, **call_inputs)