Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sdk/ml/azure-ai-ml/azure/ai/ml/_internal/_schema/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from azure.ai.ml._schema import ArmVersionedStr, NestedField, RegistryStr, StringTransformedEnum, UnionField
from azure.ai.ml._schema.pipeline.component_job import BaseNodeSchema, _resolve_inputs_outputs
from azure.ai.ml.constants._common import AzureMLResourceType
from azure.ai.ml.constants._common import AzureMLResourceType, BASE_PATH_CONTEXT_KEY

from .component import InternalBaseComponentSchema, NodeType

Expand Down Expand Up @@ -40,7 +40,7 @@ def make(self, data, **kwargs): # pylint: disable=unused-argument, no-self-use
# dict to node object
from azure.ai.ml.entities._job.pipeline._load_component import pipeline_node_factory

return pipeline_node_factory.load_from_dict(data) # pylint: disable=E1125, too-many-function-args
return pipeline_node_factory.load_from_dict(data=data)

@pre_dump
def resolve_inputs_outputs(self, job, **kwargs): # pylint: disable=unused-argument, no-self-use
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,9 @@ def command(self, value: str) -> None:
if isinstance(self.component, Component):
self.component.command = value
else:
msg = "Can't set command property for a registered component {}"
msg = "Can't set command property for a registered component {}. Tried to set it to {}."
raise ValidationException(
message=msg.format(self.component),
message=msg.format(self.component, value),
no_personal_data_message=msg,
target=ErrorTarget.COMMAND_JOB,
error_category=ErrorCategory.USER_ERROR,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from azure.ai.ml.entities._builders.do_while import DoWhile
from azure.ai.ml.entities._builders.pipeline import Pipeline
from azure.ai.ml.entities._component.component import Component
from azure.ai.ml.entities._component.component_factory import component_factory
from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob
from azure.ai.ml.entities._util import extract_label
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException
Expand Down Expand Up @@ -172,7 +173,19 @@ def load_from_dict(self, *, data: dict, _type: str = None) -> Union[BaseNode, Au
else:
data[CommonYamlFields.TYPE] = _type

new_instance = self.get_create_instance_func(_type)()
new_instance: Union[BaseNode, AutoMLJob] = self.get_create_instance_func(_type)()

if isinstance(new_instance, BaseNode):
# parse component
component_key = new_instance._get_component_attr_name()
if component_key in data and isinstance(data[component_key], dict):
data[component_key] = component_factory.load_from_dict(
data=data[component_key],
context={
BASE_PATH_CONTEXT_KEY: data[component_key].get(BASE_PATH_CONTEXT_KEY, None),
}
)

new_instance.__init__(**data)
return new_instance

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pytest
import yaml

from azure.ai.ml import Input, load_component
from azure.ai.ml import Input, load_component, load_job
from azure.ai.ml._internal import (
AISuperComputerConfiguration,
AISuperComputerScalePolicy,
Expand Down Expand Up @@ -592,3 +592,18 @@ def test_pipeline_with_setting_node_output_directly(self) -> None:
copy_file.outputs.output_dir.path = "path_on_datastore"
assert copy_file.outputs.output_dir.path == "path_on_datastore"
assert copy_file.outputs.output_dir.type == "path"

def test_job_properties(self):
pipeline_job: PipelineJob = load_job(
source="./tests/test_configs/internal/pipeline_jobs/pipeline_job_with_properties.yml"
)
pipeline_dict = pipeline_job._to_dict()
rest_pipeline_dict = pipeline_job._to_rest_object().as_dict()["properties"]
assert pipeline_dict["properties"] == {"AZURE_ML_PathOnCompute_input_data": "/tmp/test"}
assert rest_pipeline_dict["properties"] == pipeline_dict["properties"]
for name, node_dict in pipeline_dict["jobs"].items():
rest_node_dict = rest_pipeline_dict["jobs"][name]
assert len(node_dict["properties"]) == 1
assert "AZURE_ML_PathOnCompute_" in list(node_dict["properties"].keys())[0]
assert node_dict["properties"] == rest_node_dict["properties"]

Original file line number Diff line number Diff line change
Expand Up @@ -26,139 +26,13 @@ properties:
AZURE_ML_PathOnCompute_input_data: "/tmp/test"

jobs:
node0: # inline command job with properties
command: echo hello ${{inputs.hello_string}}
environment: azureml:AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest
inputs:
hello_string: ${{parent.inputs.hello_string}}
properties:
AZURE_ML_PathOnCompute_hello_string: "/tmp/test"

node1: # inline parallel job with properties
type: parallel
compute: "azureml:cpu-cluster"
inputs:
test1: ${{parent.inputs.input_data}}
resources:
instance_count: 3
mini_batch_size: "100kb"
mini_batch_error_threshold: 5
logging_level: "DEBUG"
input_data: ${{inputs.input_data}}
max_concurrency_per_instance: 2
task:
type: run_function
code: "../python"
entry_script: pass_through.py
append_row_to: ${{outputs.scored_result}} # optional, If Null, equals to summary_only mode in v1.
environment: azureml:my-env:1
properties:
AZURE_ML_PathOnCompute_input_data: "/tmp/test"

node2: # inline import job with properties
type: import
source:
type: azuresqldb
query: >-
select * from REGION
connection: azureml:my_username_password
output:
type: mltable
path: azureml://datastores/workspaceblobstore/paths/output_dir/
properties:
AZURE_ML_PathOnCompute_output: "/tmp/test"

node3: # inline spark job with properties
type: spark
inputs:
test1: ${{parent.inputs.input_data}}
file_input2: ${{parent.inputs.input_data}}
code: ../dsl_pipeline/spark_job_in_pipeline/src
entry:
file: entry.py # file path of the entry file relative to the code root folder
py_files:
- utils.zip
jars:
- scalaproj.jar
files:
- my_files.txt
args: >-
--file_input1 ${{inputs.test1}}
--file_input2 ${{inputs.file_input2}}
--output ${{outputs.output}}
compute: azureml:rezas-synapse-10
conf:
spark.driver.cores: 2
spark.driver.memory: "1g"
spark.executor.cores: 1
spark.executor.memory: "1g"
spark.executor.instances: 1
properties:
AZURE_ML_PathOnCompute_input_data: "/tmp/test"

node4: # inline automl job with properties
type: automl
task: text_ner
log_verbosity: info
primary_metric: accuracy
limits:
max_trials: 1
timeout_minutes: 60
training_data: ${{parent.inputs.text_ner_training_data}}
validation_data: ${{parent.inputs.text_ner_validation_data}}
properties:
AZURE_ML_PathOnCompute_training_data: "/tmp/test"

node5: # inline sweep job with properties
type: sweep
search_space:
component_in_number:
type: choice
values:
- 25
- 35
limits:
max_total_trials: 3
sampling_algorithm: random
objective:
goal: maximize
primary_metric: accuracy
trial: azureml:microsoftsamplescommandcomponentbasic_nopaths_test:1
properties:
AZURE_ML_PathOnCompute_input: "/tmp/test"

node6: # parallel node with properties as a typical implement of base node.
type: parallel
node7: # internal command node with properties as a typical implement of internal base node.
type: CommandComponent
compute: azureml:cpu-cluster
component: ../components/parallel_component_with_file_input.yml
component: file:../helloworld/helloworld_component_command.yml
inputs:
job_data_path: ${{parent.inputs.pipeline_job_data_path}}
outputs:
job_output_path:
mini_batch_size: "1"
mini_batch_error_threshold: 1
max_concurrency_per_instance: 1
properties:
AZURE_ML_PathOnCompute_job_data_path: "/tmp/test"

# Comment these lines out as internal node is not well supported in yaml now.
# node7: # internal command node with properties as a typical implement of internal base node.
# type: CommandComponent
# compute: azureml:cpu-cluster
# component: ../internal/helloworld/helloworld_component_command.yml
# inputs:
# training_data: ${{parent.inputs.input_data}}
# max_epochs: 10
# learning_rate: 0.01
# properties:
# AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"

node8: # pipeline node with properties
type: pipeline
inputs:
component_in_number: 11
component_in_path: ${{parent.inputs.input_data}}

component: ../components/helloworld_pipeline_component.yml
training_data: ${{parent.inputs.input_data}}
max_epochs: 10
learning_rate: 0.01
properties:
AZURE_ML_PathOnCompute_job_component_in_path: "/tmp/test"
AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ inputs:
jobs:
a:
component: azureml://registries/testFeed/components/my_hello_world_asset_2/versions/1
command: echo hello ${{inputs.hello_string}}
Comment thread
elliotzh marked this conversation as resolved.
environment: azureml://registries/testFeed/environments/sklearn-10-ubuntu2004-py38-cpu/versions/19.dev6
b:
command: echo "world" >> ${{outputs.world_output}}/world.txt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,18 +141,6 @@ jobs:
properties:
AZURE_ML_PathOnCompute_job_data_path: "/tmp/test"

# Comment these lines out as internal node is not well supported in yaml now.
# node7: # internal command node with properties as a typical implement of internal base node.
# type: CommandComponent
# compute: azureml:cpu-cluster
# component: ../internal/helloworld/helloworld_component_command.yml
# inputs:
# training_data: ${{parent.inputs.input_data}}
# max_epochs: 10
# learning_rate: 0.01
# properties:
# AZURE_ML_PathOnCompute_job_training_data: "/tmp/test"

node8: # pipeline node with properties
type: pipeline
inputs:
Expand Down