googleapis · jaycee-li · May 10, 2022 · May 3, 2022 · May 4, 2022 · May 4, 2022
diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py
@@ -40,6 +40,7 @@
     job_state as gca_job_state,
     hyperparameter_tuning_job as gca_hyperparameter_tuning_job_compat,
     machine_resources as gca_machine_resources_compat,
+    manual_batch_tuning_parameters as gca_manual_batch_tuning_parameters_compact,
     study as gca_study_compat,
 )
 from google.cloud.aiplatform.constants import base as constants
@@ -364,6 +365,7 @@ def create(
         accelerator_count: Optional[int] = None,
         starting_replica_count: Optional[int] = None,
         max_replica_count: Optional[int] = None,
+        batch_size: Optional[int] = None,
         generate_explanation: Optional[bool] = False,
         explanation_metadata: Optional["aiplatform.explain.ExplanationMetadata"] = None,
         explanation_parameters: Optional[
@@ -477,6 +479,13 @@ def create(
                 The maximum number of machine replicas the batch operation may
                 be scaled to. Only used if `machine_type` is set.
                 Default is 10.
+            batch_size (Optional[int]):
+                The number of the records (e.g. instances) of the operation given in each batch
+                to a machine replica. Machine type, and size of a single record should be considered
+                when setting this parameter, higher value speeds up the batch operation's execution,
+                but too high value will result in a whole batch not fitting in a machine's memory,
+                and the whole operation will fail.
+                The default value is 64.
             generate_explanation (bool):
                 Optional. Generate explanation along with the batch prediction
                 results. This will cause the batch prediction output to include
@@ -647,7 +656,14 @@ def create(
 
             gapic_batch_prediction_job.dedicated_resources = dedicated_resources
 
-            gapic_batch_prediction_job.manual_batch_tuning_parameters = None
+            manual_batch_tuning_parameters = (
+                gca_manual_batch_tuning_parameters_compact.ManualBatchTuningParameters()
+            )
+            manual_batch_tuning_parameters.batch_size = batch_size
+
+            gapic_batch_prediction_job.manual_batch_tuning_parameters = (
+                manual_batch_tuning_parameters
+            )
 
         # User Labels
         gapic_batch_prediction_job.labels = labels

diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py
@@ -2276,6 +2276,7 @@ def batch_predict(
         accelerator_count: Optional[int] = None,
         starting_replica_count: Optional[int] = None,
         max_replica_count: Optional[int] = None,
+        batch_size: Optional[int] = None,
         generate_explanation: Optional[bool] = False,
         explanation_metadata: Optional[explain.ExplanationMetadata] = None,
         explanation_parameters: Optional[explain.ExplanationParameters] = None,
@@ -2391,6 +2392,13 @@ def batch_predict(
                 The maximum number of machine replicas the batch operation may
                 be scaled to. Only used if `machine_type` is set.
                 Default is 10.
+            batch_size (Optional[int]):
+                The number of the records (e.g. instances) of the operation given in each batch
+                to a machine replica. Machine type, and size of a single record should be considered
+                when setting this parameter, higher value speeds up the batch operation's execution,
+                but too high value will result in a whole batch not fitting in a machine's memory,
+                and the whole operation will fail.
+                The default value is 64.
             generate_explanation (bool):
                 Optional. Generate explanation along with the batch prediction
                 results. This will cause the batch prediction output to include
@@ -2462,6 +2470,7 @@ def batch_predict(
             accelerator_count=accelerator_count,
             starting_replica_count=starting_replica_count,
             max_replica_count=max_replica_count,
+            batch_size=batch_size,
             generate_explanation=generate_explanation,
             explanation_metadata=explanation_metadata,
             explanation_parameters=explanation_parameters,

diff --git a/tests/unit/aiplatform/test_jobs.py b/tests/unit/aiplatform/test_jobs.py
@@ -37,6 +37,7 @@
     io as gca_io_compat,
     job_state as gca_job_state_compat,
     machine_resources as gca_machine_resources_compat,
+    manual_batch_tuning_parameters as gca_manual_batch_tuning_parameters_compat,
 )
 
 from google.cloud.aiplatform_v1.services.job_service import client as job_service_client
@@ -132,6 +133,7 @@
 _TEST_ACCELERATOR_COUNT = 2
 _TEST_STARTING_REPLICA_COUNT = 2
 _TEST_MAX_REPLICA_COUNT = 12
+_TEST_BATCH_SIZE = 16
 
 _TEST_LABEL = {"team": "experimentation", "trial_id": "x435"}
 
@@ -718,6 +720,7 @@ def test_batch_predict_with_all_args(
             accelerator_count=_TEST_ACCELERATOR_COUNT,
             starting_replica_count=_TEST_STARTING_REPLICA_COUNT,
             max_replica_count=_TEST_MAX_REPLICA_COUNT,
+            batch_size=_TEST_BATCH_SIZE,
             generate_explanation=True,
             explanation_metadata=_TEST_EXPLANATION_METADATA,
             explanation_parameters=_TEST_EXPLANATION_PARAMETERS,
@@ -756,6 +759,9 @@ def test_batch_predict_with_all_args(
                 starting_replica_count=_TEST_STARTING_REPLICA_COUNT,
                 max_replica_count=_TEST_MAX_REPLICA_COUNT,
             ),
+            manual_batch_tuning_parameters=gca_manual_batch_tuning_parameters_compat.ManualBatchTuningParameters(
+                batch_size=_TEST_BATCH_SIZE
+            ),
             generate_explanation=True,
             explanation_spec=gca_explanation_compat.ExplanationSpec(
                 metadata=_TEST_EXPLANATION_METADATA,

diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py
@@ -49,6 +49,7 @@
     env_var as gca_env_var,
     explanation as gca_explanation,
     machine_resources as gca_machine_resources,
+    manual_batch_tuning_parameters as gca_manual_batch_tuning_parameters_compat,
     model_service as gca_model_service,
     model_evaluation as gca_model_evaluation,
     endpoint_service as gca_endpoint_service,
@@ -86,6 +87,8 @@
 _TEST_STARTING_REPLICA_COUNT = 2
 _TEST_MAX_REPLICA_COUNT = 12
 
+_TEST_BATCH_SIZE = 16
+
 _TEST_PIPELINE_RESOURCE_NAME = (
     "projects/my-project/locations/us-central1/trainingPipeline/12345"
 )
@@ -1394,6 +1397,7 @@ def test_batch_predict_with_all_args(self, create_batch_prediction_job_mock, syn
             accelerator_count=_TEST_ACCELERATOR_COUNT,
             starting_replica_count=_TEST_STARTING_REPLICA_COUNT,
             max_replica_count=_TEST_MAX_REPLICA_COUNT,
+            batch_size=_TEST_BATCH_SIZE,
             generate_explanation=True,
             explanation_metadata=_TEST_EXPLANATION_METADATA,
             explanation_parameters=_TEST_EXPLANATION_PARAMETERS,
@@ -1408,41 +1412,40 @@ def test_batch_predict_with_all_args(self, create_batch_prediction_job_mock, syn
             batch_prediction_job.wait()
 
         # Construct expected request
-        expected_gapic_batch_prediction_job = (
-            gca_batch_prediction_job.BatchPredictionJob(
-                display_name=_TEST_BATCH_PREDICTION_DISPLAY_NAME,
-                model=model_service_client.ModelServiceClient.model_path(
-                    _TEST_PROJECT, _TEST_LOCATION, _TEST_ID
-                ),
-                input_config=gca_batch_prediction_job.BatchPredictionJob.InputConfig(
-                    instances_format="jsonl",
-                    gcs_source=gca_io.GcsSource(
-                        uris=[_TEST_BATCH_PREDICTION_GCS_SOURCE]
-                    ),
-                ),
-                output_config=gca_batch_prediction_job.BatchPredictionJob.OutputConfig(
-                    gcs_destination=gca_io.GcsDestination(
-                        output_uri_prefix=_TEST_BATCH_PREDICTION_GCS_DEST_PREFIX
-                    ),
-                    predictions_format="csv",
-                ),
-                dedicated_resources=gca_machine_resources.BatchDedicatedResources(
-                    machine_spec=gca_machine_resources.MachineSpec(
-                        machine_type=_TEST_MACHINE_TYPE,
-                        accelerator_type=_TEST_ACCELERATOR_TYPE,
-                        accelerator_count=_TEST_ACCELERATOR_COUNT,
-                    ),
-                    starting_replica_count=_TEST_STARTING_REPLICA_COUNT,
-                    max_replica_count=_TEST_MAX_REPLICA_COUNT,
+        expected_gapic_batch_prediction_job = gca_batch_prediction_job.BatchPredictionJob(
+            display_name=_TEST_BATCH_PREDICTION_DISPLAY_NAME,
+            model=model_service_client.ModelServiceClient.model_path(
+                _TEST_PROJECT, _TEST_LOCATION, _TEST_ID
+            ),
+            input_config=gca_batch_prediction_job.BatchPredictionJob.InputConfig(
+                instances_format="jsonl",
+                gcs_source=gca_io.GcsSource(uris=[_TEST_BATCH_PREDICTION_GCS_SOURCE]),
+            ),
+            output_config=gca_batch_prediction_job.BatchPredictionJob.OutputConfig(
+                gcs_destination=gca_io.GcsDestination(
+                    output_uri_prefix=_TEST_BATCH_PREDICTION_GCS_DEST_PREFIX
                 ),
-                generate_explanation=True,
-                explanation_spec=gca_explanation.ExplanationSpec(
-                    metadata=_TEST_EXPLANATION_METADATA,
-                    parameters=_TEST_EXPLANATION_PARAMETERS,
+                predictions_format="csv",
+            ),
+            dedicated_resources=gca_machine_resources.BatchDedicatedResources(
+                machine_spec=gca_machine_resources.MachineSpec(
+                    machine_type=_TEST_MACHINE_TYPE,
+                    accelerator_type=_TEST_ACCELERATOR_TYPE,
+                    accelerator_count=_TEST_ACCELERATOR_COUNT,
                 ),
-                labels=_TEST_LABEL,
-                encryption_spec=_TEST_ENCRYPTION_SPEC,
-            )
+                starting_replica_count=_TEST_STARTING_REPLICA_COUNT,
+                max_replica_count=_TEST_MAX_REPLICA_COUNT,
+            ),
+            manual_batch_tuning_parameters=gca_manual_batch_tuning_parameters_compat.ManualBatchTuningParameters(
+                batch_size=_TEST_BATCH_SIZE
+            ),
+            generate_explanation=True,
+            explanation_spec=gca_explanation.ExplanationSpec(
+                metadata=_TEST_EXPLANATION_METADATA,
+                parameters=_TEST_EXPLANATION_PARAMETERS,
+            ),
+            labels=_TEST_LABEL,
+            encryption_spec=_TEST_ENCRYPTION_SPEC,
         )
 
         create_batch_prediction_job_mock.assert_called_once_with(