Merge pull request #1 from googleapis/main

sync with googleapis
jaycee-li · May 17, 2022 · bcbb21d · bcbb21d
2 parents 9007f53 + 5fdf151
commit bcbb21d
Show file tree

Hide file tree

Showing 13 changed files with 169 additions and 14 deletions.
diff --git a/docs/definition_v1/types.rst → docs/aiplatform/definition_v1/types.rst b/docs/definition_v1/types.rst → docs/aiplatform/definition_v1/types.rst
diff --git a/docs/definition_v1beta1/types.rst → docs/aiplatform/definition_v1beta1/types.rst b/docs/definition_v1beta1/types.rst → docs/aiplatform/definition_v1beta1/types.rst
diff --git a/docs/instance_v1/types.rst → docs/aiplatform/instance_v1/types.rst b/docs/instance_v1/types.rst → docs/aiplatform/instance_v1/types.rst
diff --git a/docs/instance_v1beta1/types.rst → docs/aiplatform/instance_v1beta1/types.rst b/docs/instance_v1beta1/types.rst → docs/aiplatform/instance_v1beta1/types.rst
diff --git a/docs/params_v1/types.rst → docs/aiplatform/params_v1/types.rst b/docs/params_v1/types.rst → docs/aiplatform/params_v1/types.rst
diff --git a/docs/params_v1beta1/types.rst → docs/aiplatform/params_v1beta1/types.rst b/docs/params_v1beta1/types.rst → docs/aiplatform/params_v1beta1/types.rst
diff --git a/docs/prediction_v1/types.rst → docs/aiplatform/prediction_v1/types.rst b/docs/prediction_v1/types.rst → docs/aiplatform/prediction_v1/types.rst
diff --git a/docs/prediction_v1beta1/types.rst → docs/aiplatform/prediction_v1beta1/types.rst b/docs/prediction_v1beta1/types.rst → docs/aiplatform/prediction_v1beta1/types.rst
diff --git a/docs/aiplatform.rst → docs/aiplatform/services.rst b/docs/aiplatform.rst → docs/aiplatform/services.rst
@@ -3,4 +3,4 @@ Google Cloud Aiplatform SDK
 
 .. automodule:: google.cloud.aiplatform
     :members:
-    :show-inheritance:
+    :show-inheritance:
diff --git a/docs/aiplatform/types.rst b/docs/aiplatform/types.rst
@@ -0,0 +1,13 @@
+Types for Google Cloud Aiplatform SDK API
+===========================================
+.. toctree::
+    :maxdepth: 2
+
+    instance_v1
+    instance_v1beta1
+    params_v1
+    params_v1beta1
+    prediction_v1
+    prediction_v1beta1
+    definition_v1
+    definition_v1beta1
diff --git a/docs/index.rst b/docs/index.rst
@@ -7,7 +7,9 @@ API Reference
 .. toctree::
     :maxdepth: 2
 
-    aiplatform
+    aiplatform/services
+    aiplatform/types
+
     aiplatform_v1/services
     aiplatform_v1/types
 
@@ -22,4 +24,4 @@ For a list of all ``google-cloud-aiplatform`` releases:
 .. toctree::
    :maxdepth: 2
 
-   changelog
+   changelog
diff --git a/google/cloud/aiplatform/matching_engine/_protos/match_service.proto b/google/cloud/aiplatform/matching_engine/_protos/match_service.proto
@@ -0,0 +1,136 @@
+syntax = "proto3";
+
+package google.cloud.aiplatform.container.v1beta1;
+
+import "google/rpc/status.proto";
+
+// MatchService is a Google managed service for efficient vector similarity
+// search at scale.
+service MatchService {
+  // Returns the nearest neighbors for the query. If it is a sharded
+  // deployment, calls the other shards and aggregates the responses.
+  rpc Match(MatchRequest) returns (MatchResponse) {}
+
+  // Returns the nearest neighbors for batch queries. If it is a sharded
+  // deployment, calls the other shards and aggregates the responses.
+  rpc BatchMatch(BatchMatchRequest) returns (BatchMatchResponse) {}
+}
+
+// Parameters for a match query.
+message MatchRequest {
+  // The ID of the DeploydIndex that will serve the request.
+  // This MatchRequest is sent to a specific IndexEndpoint of the Control API,
+  // as per the IndexEndpoint.network. That IndexEndpoint also has
+  // IndexEndpoint.deployed_indexes, and each such index has an
+  // DeployedIndex.id field.
+  // The value of the field below must equal one of the DeployedIndex.id
+  // fields of the IndexEndpoint that is being called for this request.
+  string deployed_index_id = 1;
+
+  // The embedding values.
+  repeated float float_val = 2;
+
+  // The number of nearest neighbors to be retrieved from database for
+  // each query. If not set, will use the default from
+  // the service configuration.
+  int32 num_neighbors = 3;
+
+  // The list of restricts.
+  repeated Namespace restricts = 4;
+
+  // Crowding is a constraint on a neighbor list produced by nearest neighbor
+  // search requiring that no more than some value k' of the k neighbors
+  // returned have the same value of crowding_attribute.
+  // It's used for improving result diversity.
+  // This field is the maximum number of matches with the same crowding tag.
+  int32 per_crowding_attribute_num_neighbors = 5;
+
+  // The number of neighbors to find via approximate search before
+  // exact reordering is performed. If not set, the default value from scam
+  // config is used; if set, this value must be > 0.
+  int32 approx_num_neighbors = 6;
+
+  // The fraction of the number of leaves to search, set at query time allows
+  // user to tune search performance. This value increase result in both search
+  // accuracy and latency increase. The value should be between 0.0 and 1.0. If
+  // not set or set to 0.0, query uses the default value specified in
+  // NearestNeighborSearchConfig.TreeAHConfig.leaf_nodes_to_search_percent.
+  int32 leaf_nodes_to_search_percent_override = 7;
+}
+
+// Response of a match query.
+message MatchResponse {
+  message Neighbor {
+    // The ids of the matches.
+    string id = 1;
+
+    // The distances of the matches.
+    double distance = 2;
+  }
+  // All its neighbors.
+  repeated Neighbor neighbor = 1;
+}
+
+// Parameters for a batch match query.
+message BatchMatchRequest {
+  // Batched requests against one index.
+  message BatchMatchRequestPerIndex {
+    // The ID of the DeploydIndex that will serve the request.
+    string deployed_index_id = 1;
+
+    // The requests against the index identified by the above deployed_index_id.
+    repeated MatchRequest requests = 2;
+
+    // Selects the optimal batch size to use for low-level batching. Queries
+    // within each low level batch are executed sequentially while low level
+    // batches are executed in parallel.
+    // This field is optional, defaults to 0 if not set. A non-positive number
+    // disables low level batching, i.e. all queries are executed sequentially.
+    int32 low_level_batch_size = 3;
+  }
+
+  // The batch requests grouped by indexes.
+  repeated BatchMatchRequestPerIndex requests = 1;
+}
+
+// Response of a batch match query.
+message BatchMatchResponse {
+  // Batched responses for one index.
+  message BatchMatchResponsePerIndex {
+    // The ID of the DeployedIndex that produced the responses.
+    string deployed_index_id = 1;
+
+    // The match responses produced by the index identified by the above
+    // deployed_index_id. This field is set only when the query against that
+    // index succeed.
+    repeated MatchResponse responses = 2;
+
+    // The status of response for the batch query identified by the above
+    // deployed_index_id.
+    google.rpc.Status status = 3;
+  }
+
+  // The batched responses grouped by indexes.
+  repeated BatchMatchResponsePerIndex responses = 1;
+}
+
+// Namespace specifies the rules for determining the datapoints that are
+// eligible for each matching query, overall query is an AND across namespaces.
+message Namespace {
+  // The string name of the namespace that this proto is specifying,
+  // such as "color", "shape", "geo", or "tags".
+  string name = 1;
+
+  // The allowed tokens in the namespace.
+  repeated string allow_tokens = 2;
+
+  // The denied tokens in the namespace.
+  // The denied tokens have exactly the same format as the token fields, but
+  // represents a negation. When a token is denied, then matches will be
+  // excluded whenever the other datapoint has that token.
+  //
+  // For example, if a query specifies {color: red, blue, !purple}, then that
+  // query will match datapoints that are red or blue, but if those points are
+  // also purple, then they will be excluded even if they are red/blue.
+  repeated string deny_tokens = 3;
+}
diff --git a/tests/system/aiplatform/test_dataset.py b/tests/system/aiplatform/test_dataset.py
@@ -131,7 +131,7 @@ def setup_method(self):
 
     @pytest.fixture()
     def storage_client(self):
-        yield storage.Client(project=e2e_base._PROJECT)
+        yield storage.Client(project=_TEST_PROJECT)
 
     @pytest.fixture()
     def staging_bucket(self, storage_client):
@@ -174,7 +174,7 @@ def test_get_new_dataset_and_import(self, dataset_gapic_client):
 
         try:
             text_dataset = aiplatform.TextDataset.create(
-                display_name=f"temp_sdk_integration_test_create_text_dataset_{uuid.uuid4()}",
+                display_name=self._make_display_name(key="get_new_dataset_and_import"),
             )
 
             my_dataset = aiplatform.TextDataset(dataset_name=text_dataset.name)
@@ -189,7 +189,6 @@ def test_get_new_dataset_and_import(self, dataset_gapic_client):
             my_dataset.import_data(
                 gcs_source=_TEST_TEXT_ENTITY_EXTRACTION_GCS_SOURCE,
                 import_schema_uri=_TEST_TEXT_ENTITY_IMPORT_SCHEMA,
-                import_request_timeout=600.0,
             )
 
             data_items_post_import = dataset_gapic_client.list_data_items(
@@ -198,8 +197,7 @@ def test_get_new_dataset_and_import(self, dataset_gapic_client):
 
             assert len(list(data_items_post_import)) == 469
         finally:
-            if text_dataset is not None:
-                text_dataset.delete()
+            text_dataset.delete()
 
     @vpcsc_config.skip_if_inside_vpcsc
     def test_create_and_import_image_dataset(self, dataset_gapic_client):
@@ -208,7 +206,9 @@ def test_create_and_import_image_dataset(self, dataset_gapic_client):
 
         try:
             img_dataset = aiplatform.ImageDataset.create(
-                display_name=f"temp_sdk_integration_create_and_import_dataset_{uuid.uuid4()}",
+                display_name=self._make_display_name(
+                    key="create_and_import_image_dataset"
+                ),
                 gcs_source=_TEST_IMAGE_OBJECT_DETECTION_GCS_SOURCE,
                 import_schema_uri=_TEST_IMAGE_OBJ_DET_IMPORT_SCHEMA,
                 create_request_timeout=None,
@@ -230,7 +230,7 @@ def test_create_tabular_dataset(self):
 
         try:
             tabular_dataset = aiplatform.TabularDataset.create(
-                display_name=f"temp_sdk_integration_create_and_import_dataset_{uuid.uuid4()}",
+                display_name=self._make_display_name(key="create_tabular_dataset"),
                 gcs_source=[_TEST_TABULAR_CLASSIFICATION_GCS_SOURCE],
                 create_request_timeout=None,
             )
@@ -250,13 +250,15 @@ def test_create_tabular_dataset(self):
                 tabular_dataset.delete()
 
     def test_create_tabular_dataset_from_dataframe(self, bigquery_dataset):
-        bq_staging_table = f"bq://{e2e_base._PROJECT}.{bigquery_dataset.dataset_id}.test_table{uuid.uuid4()}"
+        bq_staging_table = f"bq://{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.test_table{uuid.uuid4()}"
 
         try:
             tabular_dataset = aiplatform.TabularDataset.create_from_dataframe(
                 df_source=_TEST_DATAFRAME,
                 staging_path=bq_staging_table,
-                display_name=f"temp_sdk_integration_create_and_import_dataset_from_dataframe{uuid.uuid4()}",
+                display_name=self._make_display_name(
+                    key="create_and_import_dataset_from_dataframe"
+                ),
             )
 
             """Use the Dataset.create_from_dataframe() method to create a new tabular dataset.
@@ -281,12 +283,14 @@ def test_create_tabular_dataset_from_dataframe_with_provided_schema(
         created and references the BQ source."""
 
         try:
-            bq_staging_table = f"bq://{e2e_base._PROJECT}.{bigquery_dataset.dataset_id}.test_table{uuid.uuid4()}"
+            bq_staging_table = f"bq://{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.test_table{uuid.uuid4()}"
 
             tabular_dataset = aiplatform.TabularDataset.create_from_dataframe(
                 df_source=_TEST_DATAFRAME,
                 staging_path=bq_staging_table,
-                display_name=f"temp_sdk_integration_create_and_import_dataset_from_dataframe{uuid.uuid4()}",
+                display_name=self._make_display_name(
+                    key="create_and_import_dataset_from_dataframe"
+                ),
                 bq_schema=_TEST_DATAFRAME_BQ_SCHEMA,
             )