Skip to content

Commit

Permalink
feat: add data_labeling samples (#78)
Browse files Browse the repository at this point in the history
  • Loading branch information
morgandu authored Nov 17, 2020
1 parent f79c0f4 commit 7daacd5
Show file tree
Hide file tree
Showing 6 changed files with 465 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_create_data_labeling_job_active_learning_sample]
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def create_data_labeling_job_active_learning_sample(
project: str,
display_name: str,
dataset: str,
instruction_uri: str,
inputs_schema_uri: str,
annotation_spec: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.JobServiceClient(client_options=client_options)
inputs_dict = {"annotation_specs": [annotation_spec]}
inputs = json_format.ParseDict(inputs_dict, Value())

active_learning_config = {"max_data_item_count": 1}

data_labeling_job = {
"display_name": display_name,
# Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id}
"datasets": [dataset],
"labeler_count": 1,
"instruction_uri": instruction_uri,
"inputs_schema_uri": inputs_schema_uri,
"inputs": inputs,
"annotation_labels": {
"aiplatform.googleapis.com/annotation_set_name": "data_labeling_job_active_learning"
},
"active_learning_config": active_learning_config,
}
parent = f"projects/{project}/locations/{location}"
response = client.create_data_labeling_job(
parent=parent, data_labeling_job=data_labeling_job
)
print("response:", response)


# [END aiplatform_create_data_labeling_job_active_learning_sample]
93 changes: 93 additions & 0 deletions samples/snippets/create_data_labeling_job_active_learning_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import os
import uuid
from google.cloud import aiplatform

import helpers

import create_data_labeling_job_active_learning_sample

API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT")
PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
LOCATION = "us-central1"
DATASET_ID = "1905673553261363200"
INPUTS_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/datalabelingjob/inputs/image_classification_1.0.0.yaml"
DISPLAY_NAME = f"temp_create_data_labeling_job_active_learning_test_{uuid.uuid4()}"

INSTRUCTIONS_GCS_URI = (
"gs://ucaip-sample-resources/images/datalabeling_instructions.pdf"
)
ANNOTATION_SPEC = "rose"


@pytest.fixture
def shared_state():
state = {}
yield state


@pytest.fixture
def job_client():
client_options = {"api_endpoint": API_ENDPOINT}
job_client = aiplatform.gapic.JobServiceClient(client_options=client_options)
yield job_client


@pytest.fixture(scope="function", autouse=True)
def teardown(capsys, shared_state, job_client):
yield

job_client.cancel_data_labeling_job(name=shared_state["data_labeling_job_name"])

# Verify Data Labelling Job is cancelled, or timeout after 400 seconds
helpers.wait_for_job_state(
get_job_method=job_client.get_data_labeling_job,
name=shared_state["data_labeling_job_name"],
timeout=400,
freq=10,
)

# Delete the data labeling job
response = job_client.delete_data_labeling_job(
name=shared_state["data_labeling_job_name"]
)

print("Delete LRO:", response.operation.name)
delete_data_labeling_job_response = response.result(timeout=300)
print("delete_data_labeling_job_response", delete_data_labeling_job_response)

out, _ = capsys.readouterr()
assert "delete_data_labeling_job_response" in out


# Creating a data labeling job for images
def test_create_data_labeling_job_active_learning_sample(capsys, shared_state):

create_data_labeling_job_active_learning_sample.create_data_labeling_job_active_learning_sample(
project=PROJECT_ID,
display_name=DISPLAY_NAME,
dataset=f"projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}",
instruction_uri=INSTRUCTIONS_GCS_URI,
inputs_schema_uri=INPUTS_SCHEMA_URI,
annotation_spec=ANNOTATION_SPEC,
api_endpoint=API_ENDPOINT,
)

out, _ = capsys.readouterr()

# Save resource name of the newly created data labeing job
shared_state["data_labeling_job_name"] = helpers.get_name(out)
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_create_data_labeling_job_image_segmentation_sample]
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def create_data_labeling_job_image_segmentation_sample(
project: str,
display_name: str,
dataset: str,
instruction_uri: str,
inputs_schema_uri: str,
annotation_spec: dict,
annotation_set_name: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.JobServiceClient(client_options=client_options)
inputs_dict = {"annotationSpecColors": [annotation_spec]}
inputs = json_format.ParseDict(inputs_dict, Value())

data_labeling_job = {
"display_name": display_name,
# Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id}
"datasets": [dataset],
"labeler_count": 1,
"instruction_uri": instruction_uri,
"inputs_schema_uri": inputs_schema_uri,
"inputs": inputs,
"annotation_labels": {
"aiplatform.googleapis.com/annotation_set_name": annotation_set_name
},
}
parent = f"projects/{project}/locations/{location}"
response = client.create_data_labeling_job(
parent=parent, data_labeling_job=data_labeling_job
)
print("response:", response)


# [END aiplatform_create_data_labeling_job_image_segmentation_sample]
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
import os
import uuid
from google.cloud import aiplatform

import helpers

import create_data_labeling_job_image_segmentation_sample

API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT")
PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT")
LOCATION = "us-central1"
DATASET_ID = "5111009432972558336"
INPUTS_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/datalabelingjob/inputs/image_segmentation_1.0.0.yaml"
DISPLAY_NAME = f"temp_create_data_labeling_job_image_segmentation_test_{uuid.uuid4()}"

INSTRUCTIONS_GCS_URI = (
"gs://ucaip-sample-resources/images/datalabeling_instructions.pdf"
)
ANNOTATION_SPEC = {"color": {"red": 1.0}, "displayName": "rose"}
ANNOTATION_SET_NAME = f"temp_image_segmentation_{uuid.uuid4()}"

@pytest.fixture
def shared_state():
state = {}
yield state


@pytest.fixture
def job_client():
client_options = {"api_endpoint": API_ENDPOINT}
job_client = aiplatform.gapic.JobServiceClient(client_options=client_options)
yield job_client


@pytest.fixture(scope="function", autouse=True)
def teardown(capsys, shared_state, job_client):
yield

job_client.cancel_data_labeling_job(name=shared_state["data_labeling_job_name"])

# Verify Data Labelling Job is cancelled, or timeout after 400 seconds
helpers.wait_for_job_state(
get_job_method=job_client.get_data_labeling_job,
name=shared_state["data_labeling_job_name"],
timeout=400,
freq=10,
)

# Delete the data labeling job
response = job_client.delete_data_labeling_job(
name=shared_state["data_labeling_job_name"]
)

print("Delete LRO:", response.operation.name)
delete_data_labeling_job_response = response.result(timeout=300)
print("delete_data_labeling_job_response", delete_data_labeling_job_response)

out, _ = capsys.readouterr()
assert "delete_data_labeling_job_response" in out


# Creating a data labeling job for images
def test_create_data_labeling_job_image_segmentation_sample(capsys, shared_state):

dataset = f"projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}"

create_data_labeling_job_image_segmentation_sample.create_data_labeling_job_image_segmentation_sample(
project=PROJECT_ID,
display_name=DISPLAY_NAME,
dataset=dataset,
instruction_uri=INSTRUCTIONS_GCS_URI,
inputs_schema_uri=INPUTS_SCHEMA_URI,
annotation_spec=ANNOTATION_SPEC,
annotation_set_name=ANNOTATION_SET_NAME,
api_endpoint=API_ENDPOINT,
)

out, _ = capsys.readouterr()

# Save resource name of the newly created data labeing job
shared_state["data_labeling_job_name"] = helpers.get_name(out)
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_create_data_labeling_job_specialist_pool_sample]
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def create_data_labeling_job_specialist_pool_sample(
project: str,
display_name: str,
dataset: str,
specialist_pool: str,
instruction_uri: str,
inputs_schema_uri: str,
annotation_spec: str,
location: str = "us-central1",
api_endpoint: str = "us-central1-aiplatform.googleapis.com",
):
client_options = {"api_endpoint": api_endpoint}
# Initialize client that will be used to create and send requests.
# This client only needs to be created once, and can be reused for multiple requests.
client = aiplatform.gapic.JobServiceClient(client_options=client_options)
inputs_dict = {"annotation_specs": [annotation_spec]}
inputs = json_format.ParseDict(inputs_dict, Value())

data_labeling_job = {
"display_name": display_name,
# Full resource name: projects/{project}/locations/{location}/datasets/{dataset_id}
"datasets": [dataset],
"labeler_count": 1,
"instruction_uri": instruction_uri,
"inputs_schema_uri": inputs_schema_uri,
"inputs": inputs,
"annotation_labels": {
"aiplatform.googleapis.com/annotation_set_name": "data_labeling_job_specialist_pool"
},
# Full resource name: projects/{project}/locations/{location}/specialistPools/{specialist_pool_id}
"specialist_pools": [specialist_pool],
}
parent = f"projects/{project}/locations/{location}"
response = client.create_data_labeling_job(
parent=parent, data_labeling_job=data_labeling_job
)
print("response:", response)


# [END aiplatform_create_data_labeling_job_specialist_pool_sample]
Loading

0 comments on commit 7daacd5

Please sign in to comment.