From bb92380f1281466db95d31218d1e898c77e596f5 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Thu, 16 Mar 2023 17:28:20 -0700 Subject: [PATCH] feat: Add incremental training to AutoMLImageTrainingJob. PiperOrigin-RevId: 517272484 --- google/cloud/aiplatform/training_jobs.py | 21 +++++++++ .../test_automl_image_training_jobs.py | 44 ++++++++++++++----- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index ef3c566f7d..ea2b270ef8 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -5270,6 +5270,7 @@ def __init__( multi_label: bool = False, model_type: str = "CLOUD", base_model: Optional[models.Model] = None, + incremental_train_base_model: Optional[models.Model] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, @@ -5335,6 +5336,12 @@ def __init__( Otherwise, the new model will be trained from scratch. The `base` model must be in the same Project and Location as the new Model to train, and have the same model_type. + incremental_train_base_model: Optional[models.Model] = None + Optional for both Image Classification and Object detection models, to + incrementally train a new model using an existing model as the starting point, with + a reduced training time. If not specified, the new model will be trained from scratch. + The `base` model must be in the same Project and Location as the new Model to train, + and have the same prediction_type and model_type. project (str): Optional. Project to run training in. Overrides project set in aiplatform.init. location (str): @@ -5423,6 +5430,7 @@ def __init__( self._prediction_type = prediction_type self._multi_label = multi_label self._base_model = base_model + self._incremental_train_base_model = incremental_train_base_model def run( self, @@ -5603,6 +5611,7 @@ def run( return self._run( dataset=dataset, base_model=self._base_model, + incremental_train_base_model=self._incremental_train_base_model, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, @@ -5627,6 +5636,7 @@ def _run( self, dataset: datasets.ImageDataset, base_model: Optional[models.Model] = None, + incremental_train_base_model: Optional[models.Model] = None, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, @@ -5681,6 +5691,12 @@ def _run( Otherwise, the new model will be trained from scratch. The `base` model must be in the same Project and Location as the new Model to train, and have the same model_type. + incremental_train_base_model: Optional[models.Model] = None + Optional for both Image Classification and Object detection models, to + incrementally train a new model using an existing model as the starting point, with + a reduced training time. If not specified, the new model will be trained from scratch. + The `base` model must be in the same Project and Location as the new Model to train, + and have the same prediction_type and model_type. model_id (str): Optional. The ID to use for the Model produced by this job, which will become the final component of the model resource name. @@ -5818,6 +5834,11 @@ def _run( # Set ID of Vertex AI Model to base this training job off of training_task_inputs_dict["baseModelId"] = base_model.name + if incremental_train_base_model: + training_task_inputs_dict[ + "uptrainBaseModelId" + ] = incremental_train_base_model.name + return self._run_job( training_task_definition=training_task_definition, training_task_inputs=training_task_inputs_dict, diff --git a/tests/unit/aiplatform/test_automl_image_training_jobs.py b/tests/unit/aiplatform/test_automl_image_training_jobs.py index 861cf41d36..ecb03e3cf4 100644 --- a/tests/unit/aiplatform/test_automl_image_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_image_training_jobs.py @@ -85,6 +85,17 @@ struct_pb2.Value(), ) +_TEST_TRAINING_TASK_INPUTS_WITH_UPTRAIN_BASE_MODEL = json_format.ParseDict( + { + "modelType": "CLOUD", + "budgetMilliNodeHours": _TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, + "multiLabel": False, + "disableEarlyStopping": _TEST_TRAINING_DISABLE_EARLY_STOPPING, + "uptrainBaseModelId": _TEST_MODEL_ID, + }, + struct_pb2.Value(), +) + _TEST_FRACTION_SPLIT_TRAINING = 0.6 _TEST_FRACTION_SPLIT_VALIDATION = 0.2 _TEST_FRACTION_SPLIT_TEST = 0.2 @@ -213,6 +224,20 @@ def mock_model(): yield model +@pytest.fixture +def mock_uptrain_base_model(): + model = mock.MagicMock(models.Model) + model.name = _TEST_MODEL_ID + model._latest_future = None + model._exception = None + model._gca_resource = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description="This is the mock uptrain base Model's description", + name=_TEST_MODEL_NAME, + ) + yield model + + @pytest.mark.usefixtures("google_auth_mock") class TestAutoMLImageTrainingJob: def setup_method(self): @@ -223,7 +248,7 @@ def teardown_method(self): initializer.global_pool.shutdown(wait=True) def test_init_all_parameters(self, mock_model): - """Ensure all private members are set correctly at initialization""" + """Ensure all private members are set correctly at initialization.""" aiplatform.init(project=_TEST_PROJECT) @@ -275,7 +300,7 @@ def test_run_call_pipeline_service_create( mock_pipeline_service_get, mock_dataset_image, mock_model_service_get, - mock_model, + mock_uptrain_base_model, sync, ): """Create and run an AutoML ICN training job, verify calls and return value""" @@ -287,7 +312,7 @@ def test_run_call_pipeline_service_create( job = training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, - base_model=mock_model, + incremental_train_base_model=mock_uptrain_base_model, labels=_TEST_LABELS, ) @@ -315,8 +340,7 @@ def test_run_call_pipeline_service_create( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, - labels=mock_model._gca_resource.labels, - description=mock_model._gca_resource.description, + labels=_TEST_MODEL_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, version_aliases=["default"], ) @@ -330,7 +354,7 @@ def test_run_call_pipeline_service_create( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_image_classification, - training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_UPTRAIN_BASE_MODEL, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, @@ -754,7 +778,7 @@ def test_splits_default( mock_pipeline_service_get, mock_dataset_image, mock_model_service_get, - mock_model, + mock_uptrain_base_model, sync, ): """ @@ -768,7 +792,8 @@ def test_splits_default( ) job = training_jobs.AutoMLImageTrainingJob( - display_name=_TEST_DISPLAY_NAME, base_model=mock_model + display_name=_TEST_DISPLAY_NAME, + incremental_train_base_model=mock_uptrain_base_model, ) model_from_job = job.run( @@ -785,7 +810,6 @@ def test_splits_default( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, - description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, version_aliases=["default"], ) @@ -797,7 +821,7 @@ def test_splits_default( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, training_task_definition=schema.training_job.definition.automl_image_classification, - training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_UPTRAIN_BASE_MODEL, model_to_upload=true_managed_model, input_data_config=true_input_data_config, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,