Skip to content

Encountered an internal AutoML error. Error Message/Code: Expected argument dataset_json to have a valid value #15849

@amitca71

Description

@amitca71
  • Package Name: azureml.core
  • Package Version: 1.19
  • Operating System: Linux driver 4.15.0-1098-azure Python 3.3+ support #109~16.04.1-Ubuntu SMP Wed Sep 30 18:53:14 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
  • Python Version: 3.6.9 :: Anaconda, Inc

Describe the bug
A clear and concise description of what the bug is.
when try to execute with automl_config, getting the exception. if executing with regular pipeline its nongt working.
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep
train_ds =train_dataset.parse_parquet_files()
automl_settings = {
"iteration_timeout_minutes": 10,
"experiment_timeout_hours": 0.25,
"n_cross_validations": 3,
"primary_metric": 'normalized_mean_absolute_error',
"max_concurrent_iterations": 3,
"max_cores_per_iteration": -1,
"verbosity": logging.INFO,
"enable_early_stopping": True
}

automl_config = AutoMLConfig(task = 'regression',
debug_log = 'automl_errors.log',
path = ".",
compute_target=compute_target,
training_data = train_ds,
label_column_name = target_column_name,
**automl_settings
)
from azureml.pipeline.core import PipelineData, TrainingOutput

metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
datastore=datastore,
pipeline_output_name=metrics_output_name,
training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
datastore=datastore,
pipeline_output_name=best_model_output_name,
training_output=TrainingOutput(type='Model'))
automl_step = AutoMLStep(
name='automl_module',
automl_config=automl_config,
outputs=[metrics_data, model_data],
allow_reuse=False)
training_pipeline = Pipeline(
description="training_pipeline",
workspace=ws,
steps=[automl_step])
training_pipeline_run = experiment.submit(training_pipeline)

exception when:
from azureml.core.experiment import Experiment
experiment=Experiment(ws, 'automl_remote')
remote_run = experiment.submit(automl_config, show_output=True)

To Reproduce
Steps to reproduce the behavior:

  1. see above

Expected behavior
A clear and concise description of what you expected to happen.
not fail with execption
Screenshots
If applicable, add screenshots to help explain your problem.
Running on remote.
No run_configuration provided, running on cont-cluster with default configuration
Running on remote compute: cont-cluster

ValidationException Traceback (most recent call last)
in
1 from azureml.core.experiment import Experiment
2 experiment=Experiment(ws, 'automl_remote')
----> 3 remote_run = experiment.submit(automl_config, show_output=True)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_jupyter_common/init.py in submit(self, config, tags, **kwargs)
84 def _experiment_submit_notebook_decorator(original_submit):
85 def submit(self, config, tags=None, **kwargs):
---> 86 run = original_submit(self, config, tags, **kwargs)
87 _update_run_created_from(run)
88 return run

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/experiment.py in submit(self, config, tags, **kwargs)
218 submit_func = get_experiment_submit(config)
219 with self._log_context("submit config {}".format(config.class.name)):
--> 220 run = submit_func(config, self.workspace, self.name, **kwargs)
221 if tags is not None:
222 run.set_tags(tags)

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py in _automl_static_submit(automl_config_object, workspace, experiment_name, **kwargs)
98 compute_target,
99 parent_run_id,
--> 100 show_output)
101
102 automl_run.add_properties(global_tracking_info_registry.gather_all(settings.path))

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py in _start_execution(experiment, settings_obj, fit_params, run_config, compute_target, parent_run_id, show_output)
209 if settings_obj.scenario == constants.Scenarios._NON_PROD:
210 validate_non_prod_env_exists(experiment.workspace)
--> 211 automl_run = _default_execution(experiment, settings_obj, fit_params, False, show_output)
212
213 return automl_run

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py in _default_execution(experiment, settings_obj, fit_params, legacy_local, show_output, parent_run_id)
122 automl_estimator = _azureautomlclient.AzureAutoMLClient(experiment, settings_obj)
123
--> 124 return automl_estimator.fit(**fit_params)
125
126

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in fit(self, run_configuration, compute_target, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, show_output, existing_run, training_data, validation_data, test_data, _script_run, parent_run_id, is_managed, kwargs)
407 cv_splits_indices=cv_splits_indices, show_output=show_output,
408 training_data=training_data, validation_data=validation_data,
--> 409 test_data=test_data)
410 except Exception as e:
411 self._fail_parent_run(error_details=e, is_aml_compute=run_configuration.target != 'local')

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _fit_remote(self, run_configuration, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, show_output, training_data, validation_data, test_data)
421 y_valid=y_valid, sample_weight_valid=sample_weight_valid,
422 cv_splits_indices=cv_splits_indices, training_data=training_data,
--> 423 validation_data=validation_data, test_data=test_data)
424
425 if show_output:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _fit_remote_core(self, run_configuration, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, training_data, validation_data, test_data)
484 run_config_object, X=X, y=y, sample_weight=sample_weight, X_valid=X_valid, y_valid=y_valid,
485 sample_weight_valid=sample_weight_valid, cv_splits_indices=cv_splits_indices,
--> 486 training_data=training_data, validation_data=validation_data, test_data=test_data)
487
488 try:

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _create_parent_run_for_remote(self, run_config_object, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, training_data, validation_data, test_data)
535 sample_weight_valid=sample_weight_valid,
536 cv_splits_indices=cv_splits_indices,
--> 537 test_data=test_data
538 )
539

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/_azureautomlclient.py in _create_and_validate_parent_run_dto(self, target, training_data, validation_data, X, y, sample_weight, X_valid, y_valid, sample_weight_valid, cv_splits_indices, parent_run_id, test_data)
600 get_datasets_json(training_data=training_data,
601 validation_data=validation_data,
--> 602 test_data=test_data)
603 else:
604 dataprep_json = dataprep_utilities.get_dataprep_json(X=X, y=y,

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/dataset_utilities.py in get_datasets_json(training_data, validation_data, test_data)
130
131 # We must always be able to JSON-ify Datasets
--> 132 Contract.assert_value(dataset_json, "dataset_json")
133
134 return dataset_json

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/shared/_diagnostics/contract.py in assert_value(value, name, reference_code, log_safe)
63
64 Contract.assert_true(value is not None, message=error_details,
---> 65 target=name, reference_code=reference_code, log_safe=log_safe)
66
67 @staticmethod

/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/automl/core/shared/_diagnostics/contract.py in assert_true(condition, message, target, reference_code, log_safe)
42
43 raise ValidationException._with_error(AzureMLError.create(
---> 44 AutoMLInternal, target=target, reference_code=reference_code, error_details=message)
45 )
46

ValidationException: ValidationException:
Message: Encountered an internal AutoML error. Error Message/Code: Expected argument dataset_json to have a valid value.
InnerException: None
ErrorResponse
{
"error": {
"code": "SystemError",
"message": "Encountered an internal AutoML error. Error Message/Code: Expected argument dataset_json to have a valid value.",
"details_uri": "https://docs.microsoft.com/azure/machine-learning/resource-known-issues#automated-machine-learning",
"target": "dataset_json",
"inner_error": {
"code": "ClientError",
"inner_error": {
"code": "AutoMLInternal"
}
}
}
Additional context
Add any other context about the problem here.

Metadata

Metadata

Assignees

No one assigned

    Labels

    ClientThis issue points to a problem in the data-plane of the library.ML-AutoMLAreaPathMachine LearningService AttentionWorkflow: This issue is responsible by Azure service team.customer-reportedIssues that are reported by GitHub users external to the Azure organization.needs-author-feedbackWorkflow: More information is needed from author to address the issue.no-recent-activityThere has been no recent activity on this issue.questionThe issue doesn't require a change to the product in order to be resolved. Most issues start as that

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions