diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py index d8c2ed239d..c6bbd30fc0 100644 --- a/samples/model-builder/conftest.py +++ b/samples/model-builder/conftest.py @@ -221,6 +221,19 @@ def mock_run_automl_image_training_job(mock_image_training_job): yield mock +@pytest.fixture +def mock_get_automl_text_training_job(mock_text_training_job): + with patch.object(aiplatform, "AutoMLTextTrainingJob") as mock: + mock.return_value = mock_text_training_job + yield mock + + +@pytest.fixture +def mock_run_automl_text_training_job(mock_text_training_job): + with patch.object(mock_text_training_job, "run") as mock: + yield mock + + @pytest.fixture def mock_get_custom_training_job(mock_custom_training_job): with patch.object(aiplatform, "CustomTrainingJob") as mock: diff --git a/samples/model-builder/create_batch_prediction_job_sample.py b/samples/model-builder/create_batch_prediction_job_sample.py index 9bd5c697a5..cb5a5d3ad8 100644 --- a/samples/model-builder/create_batch_prediction_job_sample.py +++ b/samples/model-builder/create_batch_prediction_job_sample.py @@ -17,6 +17,9 @@ from google.cloud import aiplatform +# [START aiplatform_sdk_create_batch_prediction_job_text_classification_sample] +# [START aiplatform_sdk_create_batch_prediction_job_text_entity_extraction_sample] +# [START aiplatform_sdk_create_batch_prediction_job_text_sentiment_analysis_sample] # [START aiplatform_sdk_create_batch_prediction_job_sample] def create_batch_prediction_job_sample( project: str, @@ -46,4 +49,7 @@ def create_batch_prediction_job_sample( return batch_prediction_job +# [END aiplatform_sdk_create_batch_prediction_job_text_sentiment_analysis_sample] +# [END aiplatform_sdk_create_batch_prediction_job_text_entity_extraction_sample] +# [END aiplatform_sdk_create_batch_prediction_job_text_classification_sample] # [END aiplatform_sdk_create_batch_prediction_job_sample] diff --git a/samples/model-builder/create_training_pipeline_text_classification_sample.py b/samples/model-builder/create_training_pipeline_text_classification_sample.py new file mode 100644 index 0000000000..9306a82084 --- /dev/null +++ b/samples/model-builder/create_training_pipeline_text_classification_sample.py @@ -0,0 +1,64 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_training_pipeline_text_classification_sample] +def create_training_pipeline_text_classification_sample( + project: str, + location: str, + display_name: str, + dataset_id: int, + model_display_name: Optional[str] = None, + multi_label: bool = False, + training_fraction_split: float = 0.8, + validation_fraction_split: float = 0.1, + test_fraction_split: float = 0.1, + budget_milli_node_hours: int = 8000, + disable_early_stopping: bool = False, + sync: bool = True, +): + aiplatform.init(project=project, location=location) + + job = aiplatform.AutoMLTextTrainingJob( + display_name=display_name, + prediction_type="classification", + multi_label=multi_label, + ) + + text_dataset = aiplatform.TextDataset(dataset_id) + + model = job.run( + dataset=text_dataset, + model_display_name=model_display_name, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + budget_milli_node_hours=budget_milli_node_hours, + disable_early_stopping=disable_early_stopping, + sync=sync, + ) + + model.wait() + + print(model.display_name) + print(model.resource_name) + print(model.uri) + return model + + +# [END aiplatform_sdk_create_training_pipeline_text_classification_sample] diff --git a/samples/model-builder/create_training_pipeline_text_classification_sample_test.py b/samples/model-builder/create_training_pipeline_text_classification_sample_test.py new file mode 100644 index 0000000000..6f54218e45 --- /dev/null +++ b/samples/model-builder/create_training_pipeline_text_classification_sample_test.py @@ -0,0 +1,60 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_training_pipeline_text_classification_sample +import test_constants as constants + + +def test_create_training_pipeline_text_classification_sample( + mock_sdk_init, + mock_text_dataset, + mock_get_automl_text_training_job, + mock_run_automl_text_training_job, + mock_get_text_dataset, +): + + create_training_pipeline_text_classification_sample.create_training_pipeline_text_classification_sample( + project=constants.PROJECT, + location=constants.LOCATION, + display_name=constants.DISPLAY_NAME, + dataset_id=constants.RESOURCE_ID, + model_display_name=constants.DISPLAY_NAME_2, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + disable_early_stopping=False, + ) + + mock_get_text_dataset.assert_called_once_with(constants.RESOURCE_ID) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_get_automl_text_training_job.assert_called_once_with( + display_name=constants.DISPLAY_NAME, + multi_label=False, + prediction_type="classification", + ) + mock_run_automl_text_training_job.assert_called_once_with( + dataset=mock_text_dataset, + model_display_name=constants.DISPLAY_NAME_2, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + disable_early_stopping=False, + sync=True, + ) diff --git a/samples/model-builder/create_training_pipeline_text_entity_extraction_sample.py b/samples/model-builder/create_training_pipeline_text_entity_extraction_sample.py new file mode 100644 index 0000000000..2d53cb2d63 --- /dev/null +++ b/samples/model-builder/create_training_pipeline_text_entity_extraction_sample.py @@ -0,0 +1,61 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_training_pipeline_text_entity_extraction_sample] +def create_training_pipeline_text_entity_extraction_sample( + project: str, + location: str, + display_name: str, + dataset_id: int, + model_display_name: Optional[str] = None, + training_fraction_split: float = 0.8, + validation_fraction_split: float = 0.1, + test_fraction_split: float = 0.1, + budget_milli_node_hours: int = 8000, + disable_early_stopping: bool = False, + sync: bool = True, +): + aiplatform.init(project=project, location=location) + + job = aiplatform.AutoMLTextTrainingJob( + display_name=display_name, prediction_type="extraction" + ) + + text_dataset = aiplatform.TextDataset(dataset_id) + + model = job.run( + dataset=text_dataset, + model_display_name=model_display_name, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + budget_milli_node_hours=budget_milli_node_hours, + disable_early_stopping=disable_early_stopping, + sync=sync, + ) + + model.wait() + + print(model.display_name) + print(model.resource_name) + print(model.uri) + return model + + +# [END aiplatform_sdk_create_training_pipeline_text_entity_extraction_sample] diff --git a/samples/model-builder/create_training_pipeline_text_entity_extraction_sample_test.py b/samples/model-builder/create_training_pipeline_text_entity_extraction_sample_test.py new file mode 100644 index 0000000000..215b123942 --- /dev/null +++ b/samples/model-builder/create_training_pipeline_text_entity_extraction_sample_test.py @@ -0,0 +1,58 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_training_pipeline_text_entity_extraction_sample +import test_constants as constants + + +def test_create_training_pipeline_text_clentity_extraction_sample( + mock_sdk_init, + mock_text_dataset, + mock_get_automl_text_training_job, + mock_run_automl_text_training_job, + mock_get_text_dataset, +): + + create_training_pipeline_text_entity_extraction_sample.create_training_pipeline_text_entity_extraction_sample( + project=constants.PROJECT, + location=constants.LOCATION, + display_name=constants.DISPLAY_NAME, + dataset_id=constants.RESOURCE_ID, + model_display_name=constants.DISPLAY_NAME_2, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + disable_early_stopping=False, + ) + + mock_get_text_dataset.assert_called_once_with(constants.RESOURCE_ID) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_get_automl_text_training_job.assert_called_once_with( + display_name=constants.DISPLAY_NAME, prediction_type="extraction" + ) + mock_run_automl_text_training_job.assert_called_once_with( + dataset=mock_text_dataset, + model_display_name=constants.DISPLAY_NAME_2, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + disable_early_stopping=False, + sync=True, + ) diff --git a/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample.py b/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample.py new file mode 100644 index 0000000000..685bed6feb --- /dev/null +++ b/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample.py @@ -0,0 +1,64 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_training_pipeline_text_sentiment_analysis_sample] +def create_training_pipeline_text_sentiment_analysis_sample( + project: str, + location: str, + display_name: str, + dataset_id: int, + model_display_name: Optional[str] = None, + sentiment_max: int = 10, + training_fraction_split: float = 0.8, + validation_fraction_split: float = 0.1, + test_fraction_split: float = 0.1, + budget_milli_node_hours: int = 8000, + disable_early_stopping: bool = False, + sync: bool = True, +): + aiplatform.init(project=project, location=location) + + job = aiplatform.AutoMLTextTrainingJob( + display_name=display_name, + prediction_type="sentiment", + sentiment_max=sentiment_max, + ) + + text_dataset = aiplatform.TextDataset(dataset_id) + + model = job.run( + dataset=text_dataset, + model_display_name=model_display_name, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + budget_milli_node_hours=budget_milli_node_hours, + disable_early_stopping=disable_early_stopping, + sync=sync, + ) + + model.wait() + + print(model.display_name) + print(model.resource_name) + print(model.uri) + return model + + +# [END aiplatform_sdk_create_training_pipeline_text_sentiment_analysis_sample] diff --git a/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample_test.py b/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample_test.py new file mode 100644 index 0000000000..6ae5f414bd --- /dev/null +++ b/samples/model-builder/create_training_pipeline_text_sentiment_analysis_sample_test.py @@ -0,0 +1,60 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_training_pipeline_text_sentiment_analysis_sample +import test_constants as constants + + +def test_create_training_pipeline_text_sentiment_analysis_sample( + mock_sdk_init, + mock_text_dataset, + mock_get_automl_text_training_job, + mock_run_automl_text_training_job, + mock_get_text_dataset, +): + + create_training_pipeline_text_sentiment_analysis_sample.create_training_pipeline_text_sentiment_analysis_sample( + project=constants.PROJECT, + location=constants.LOCATION, + display_name=constants.DISPLAY_NAME, + dataset_id=constants.RESOURCE_ID, + model_display_name=constants.DISPLAY_NAME_2, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + disable_early_stopping=False, + ) + + mock_get_text_dataset.assert_called_once_with(constants.RESOURCE_ID) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_get_automl_text_training_job.assert_called_once_with( + display_name=constants.DISPLAY_NAME, + prediction_type="sentiment", + sentiment_max=10, + ) + mock_run_automl_text_training_job.assert_called_once_with( + dataset=mock_text_dataset, + model_display_name=constants.DISPLAY_NAME_2, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + disable_early_stopping=False, + sync=True, + )