feat: support new AutoML problems; add batchPredict, exportModel methods

feat: support new AutoML problems; add batchPredict, exportModel methods This captures the following changes: - \+ these AutoML problem variants, and supporting fields to build and predict with these models: - Image object detection - Video classification - Text extraction - Text sentiment - Tables - \+ `batchPredict` method to perform batch prediction (long running operation). - \+ new response metadata to show model evaluation metrics, e.g. RMS error, MAE, R-squared - \+ `updateDataset` method to perform updates to a dataset after it's been created - \+ methods to get/update/list specs for a relational table: - `getAnnotationSpec`, `getTableSpec`, `listTableSpecs`, `updateTableSpec`, `getColumnSpec`, `listColumnSpecs`, `updateColumnSpec` - \+ `exportModel` method to export a trained, export-able model to a GCS location - \+ `exportEvaluatedExamples` method to export examples on which the models was evaluated #151 automerged by dpebot
googleapis · Apr 3, 2019 · 5fcc9c9 · 5fcc9c9
1 parent c74babd
commit 5fcc9c9
Show file tree

Hide file tree

Showing 63 changed files with 9,253 additions and 499 deletions.
diff --git a/packages/google-cloud-automl/.gitignore b/packages/google-cloud-automl/.gitignore
@@ -7,3 +7,4 @@ build/
 *.lock
 .DS_Store
 package-lock.json
+__pycache__
diff --git a/packages/google-cloud-automl/protos/google/cloud/automl/v1beta1/annotation_payload.proto b/packages/google-cloud-automl/protos/google/cloud/automl/v1beta1/annotation_payload.proto
@@ -1,4 +1,4 @@
-// Copyright 2018 Google LLC
+// Copyright 2018 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -11,42 +11,64 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+//
 
 syntax = "proto3";
 
 package google.cloud.automl.v1beta1;
 
 import "google/api/annotations.proto";
 import "google/cloud/automl/v1beta1/classification.proto";
+import "google/cloud/automl/v1beta1/detection.proto";
+import "google/cloud/automl/v1beta1/tables.proto";
+import "google/cloud/automl/v1beta1/text_extraction.proto";
+import "google/cloud/automl/v1beta1/text_sentiment.proto";
 import "google/cloud/automl/v1beta1/translation.proto";
+import "google/protobuf/any.proto";
 
 option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
 option java_multiple_files = true;
 option java_package = "com.google.cloud.automl.v1beta1";
 option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
 
+
 // Contains annotation information that is relevant to AutoML.
 message AnnotationPayload {
   // Output only . Additional information about the annotation
-  // specific to the AutoML solution.
+  // specific to the AutoML domain.
   oneof detail {
     // Annotation details for translation.
     TranslationAnnotation translation = 2;
 
     // Annotation details for content or image classification.
     ClassificationAnnotation classification = 3;
+
+    // Annotation details for image object detection.
+    ImageObjectDetectionAnnotation image_object_detection = 4;
+
+    // Annotation details for video classification.
+    // Returned for Video Classification predictions.
+    VideoClassificationAnnotation video_classification = 9;
+
+    // Annotation details for text extraction.
+    TextExtractionAnnotation text_extraction = 6;
+
+    // Annotation details for text sentiment.
+    TextSentimentAnnotation text_sentiment = 7;
+
+    // Annotation details for Tables.
+    TablesAnnotation tables = 10;
   }
 
   // Output only . The resource ID of the annotation spec that
   // this annotation pertains to. The annotation spec comes from either an
   // ancestor dataset, or the dataset that was used to train the model in use.
   string annotation_spec_id = 1;
 
-  // Output only. The value of
-  // [AnnotationSpec.display_name][google.cloud.automl.v1beta1.AnnotationSpec.display_name]
-  // when the model was trained. Because this field returns a value at model
-  // training time, for different models trained using the same dataset, the
-  // returned value could be different as model owner could update the
-  // display_name between any two model training.
+  // Output only. The value of [AnnotationSpec.display_name][google.cloud.automl.v1beta1.AnnotationSpec.display_name] when the model
+  // was trained. Because this field returns a value at model training time,
+  // for different models trained using the same dataset, the returned value
+  // could be different as model owner could update the display_name between
+  // any two model training.
   string display_name = 5;
 }
diff --git a/packages/google-cloud-automl/protos/google/cloud/automl/v1beta1/classification.proto b/packages/google-cloud-automl/protos/google/cloud/automl/v1beta1/classification.proto
@@ -1,4 +1,4 @@
-// Copyright 2018 Google LLC
+// Copyright 2018 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -11,18 +11,21 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+//
 
 syntax = "proto3";
 
 package google.cloud.automl.v1beta1;
 
 import "google/api/annotations.proto";
+import "google/cloud/automl/v1beta1/temporal.proto";
 
 option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
 option java_outer_classname = "ClassificationProto";
 option java_package = "com.google.cloud.automl.v1beta1";
 option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
 
+
 // Contains annotation details specific to classification.
 message ClassificationAnnotation {
   // Output only. A confidence estimate between 0.0 and 1.0. A higher value
@@ -33,47 +36,111 @@ message ClassificationAnnotation {
   float score = 1;
 }
 
+// Contains annotation details specific to video classification.
+message VideoClassificationAnnotation {
+  // Output only. Expresses the type of video classification. Possible values:
+  //
+  // *  `segment` - Classification done on a specified by user
+  //        time segment of a video. AnnotationSpec is answered to be present
+  //        in that time segment, if it is present in any part of it. The video
+  //        ML model evaluations are done only for this type of classification.
+  //
+  // *  `shot`- Shot-level classification.
+  //        AutoML Video Intelligence determines the boundaries
+  //        for each camera shot in the entire segment of the video that user
+  //        specified in the request configuration. AutoML Video Intelligence
+  //        then returns labels and their confidence scores for each detected
+  //        shot, along with the start and end time of the shot.
+  //        WARNING: Model evaluation is not done for this classification type,
+  //        the quality of it depends on training data, but there are no
+  //        metrics provided to describe that quality.
+  //
+  // *  `1s_interval` - AutoML Video Intelligence returns labels and their
+  //        confidence scores for each second of the entire segment of the video
+  //        that user specified in the request configuration.
+  //        WARNING: Model evaluation is not done for this classification type,
+  //        the quality of it depends on training data, but there are no
+  //        metrics provided to describe that quality.
+  string type = 1;
+
+  // Output only . The classification details of this annotation.
+  ClassificationAnnotation classification_annotation = 2;
+
+  // Output only . The time segment of the video to which the
+  // annotation applies.
+  TimeSegment time_segment = 3;
+}
+
 // Model evaluation metrics for classification problems.
-// Visible only to v1beta1
+// Note: For Video Classification this metrics only describe quality of the
+// Video Classification predictions of "segment_classification" type.
 message ClassificationEvaluationMetrics {
   // Metrics for a single confidence threshold.
   message ConfidenceMetricsEntry {
-    // Output only. The confidence threshold value used to compute the metrics.
+    // Output only. Metrics are computed with an assumption that the model
+    // never returns predictions with score lower than this value.
     float confidence_threshold = 1;
 
-    // Output only. Recall under the given confidence threshold.
+    // Output only. Metrics are computed with an assumption that the model
+    // always returns at most this many predictions (ordered by their score,
+    // descendingly), but they all still need to meet the confidence_threshold.
+    int32 position_threshold = 14;
+
+    // Output only. Recall (True Positive Rate) for the given confidence
+    // threshold.
     float recall = 2;
 
-    // Output only. Precision under the given confidence threshold.
+    // Output only. Precision for the given confidence threshold.
     float precision = 3;
 
+    // Output only. False Positive Rate for the given confidence threshold.
+    float false_positive_rate = 8;
+
     // Output only. The harmonic mean of recall and precision.
     float f1_score = 4;
 
-    // Output only. The recall when only considering the label that has the
-    // highest prediction score and not below the confidence threshold for each
-    // example.
+    // Output only. The Recall (True Positive Rate) when only considering the
+    // label that has the highest prediction score and not below the confidence
+    // threshold for each example.
     float recall_at1 = 5;
 
     // Output only. The precision when only considering the label that has the
-    // highest predictionscore and not below the confidence threshold for each
+    // highest prediction score and not below the confidence threshold for each
     // example.
     float precision_at1 = 6;
 
-    // Output only. The harmonic mean of
-    // [recall_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.recall_at1]
-    // and
-    // [precision_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.precision_at1].
+    // Output only. The False Positive Rate when only considering the label that
+    // has the highest prediction score and not below the confidence threshold
+    // for each example.
+    float false_positive_rate_at1 = 9;
+
+    // Output only. The harmonic mean of [recall_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.recall_at1] and [precision_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.precision_at1].
     float f1_score_at1 = 7;
+
+    // Output only. The number of model created labels that match a ground truth
+    // label.
+    int64 true_positive_count = 10;
+
+    // Output only. The number of model created labels that do not match a
+    // ground truth label.
+    int64 false_positive_count = 11;
+
+    // Output only. The number of ground truth labels that are not matched
+    // by a model created label.
+    int64 false_negative_count = 12;
+
+    // Output only. The number of labels that were not created by the model,
+    // but if they would, they would not match a ground truth label.
+    int64 true_negative_count = 13;
   }
 
   // Confusion matrix of the model running the classification.
   message ConfusionMatrix {
     // Output only. A row in the confusion matrix.
     message Row {
       // Output only. Value of the specific cell in the confusion matrix.
-      // The number of values each row is equal to the size of
-      // annotatin_spec_id.
+      // The number of values each row has (i.e. the length of the row) is equal
+      // to the length of the annotation_spec_id field.
       repeated int32 example_count = 1;
     }
 
@@ -88,14 +155,28 @@ message ClassificationEvaluationMetrics {
     repeated Row row = 2;
   }
 
-  // Output only. The Area under precision recall curve metric.
+  // Output only. The Area Under Precision-Recall Curve metric. Micro-averaged
+  // for the overall evaluation.
   float au_prc = 1;
 
-  // Output only. The Area under precision recall curve metric based on priors.
-  float base_au_prc = 2;
+  // Output only. The Area Under Precision-Recall Curve metric based on priors.
+  // Micro-averaged for the overall evaluation.
+  // Deprecated.
+  float base_au_prc = 2 [deprecated = true];
+
+  // Output only. The Area Under Receiver Operating Characteristic curve metric.
+  // Micro-averaged for the overall evaluation.
+  float au_roc = 6;
+
+  // Output only. The Log Loss metric.
+  float log_loss = 7;
 
-  // Output only. Metrics that have confidence thresholds.
-  // Precision-recall curve can be derived from it.
+  // Output only. Metrics for each confidence_threshold in
+  // 0.00,0.05,0.10,...,0.95,0.96,0.97,0.98,0.99 and
+  // position_threshold = INT32_MAX_VALUE.
+  // Precision-recall curve is derived from them.
+  // The above metrics may also be supplied for additional values of
+  // position_threshold.
   repeated ConfidenceMetricsEntry confidence_metrics_entry = 3;
 
   // Output only. Confusion matrix of the evaluation.

diff --git a/packages/google-cloud-automl/protos/google/cloud/automl/v1beta1/column_spec.proto b/packages/google-cloud-automl/protos/google/cloud/automl/v1beta1/column_spec.proto
@@ -0,0 +1,78 @@
+// Copyright 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.automl.v1beta1;
+
+import "google/api/annotations.proto";
+import "google/cloud/automl/v1beta1/data_stats.proto";
+import "google/cloud/automl/v1beta1/data_types.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
+option java_multiple_files = true;
+option java_package = "com.google.cloud.automl.v1beta1";
+option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
+
+
+// A representation of a column in a relational table. When listing them, column specs are returned in the same order in which they were
+// given on import .
+// Used by:
+//   *   Tables
+message ColumnSpec {
+  // Identifies the table's column, and its correlation with the column this
+  // ColumnSpec describes.
+  message CorrelatedColumn {
+    // The column_spec_id of the correlated column, which belongs to the same
+    // table as the in-context column.
+    string column_spec_id = 1;
+
+    // Correlation between this and the in-context column.
+    CorrelationStats correlation_stats = 2;
+  }
+
+  // Output only. The resource name of the column specs.
+  // Form:
+  //
+  // `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}/tableSpecs/{table_spec_id}/columnSpecs/{column_spec_id}`
+  string name = 1;
+
+  // The data type of elements stored in the column.
+  DataType data_type = 2;
+
+  // Output only. The name of the column to show in the interface. The name can
+  // be up to 100 characters long and can consist only of ASCII Latin letters
+  // A-Z and a-z, ASCII digits 0-9, underscores(_), and forward slashes(/), and
+  // must start with a letter or a digit.
+  string display_name = 3;
+
+  // Output only. Stats of the series of values in the column.
+  // This field may be stale, see the ancestor's
+  // Dataset.tables_dataset_metadata.stats_update_time field
+  // for the timestamp at which these stats were last updated.
+  DataStats data_stats = 4;
+
+  // Output only. Top 10 most correlated with this column columns of the table,
+  // ordered by
+  // [cramers_v][google.cloud.automl.v1beta1.CorrelationStats.cramers_v] metric.
+  // This field may be stale, see the ancestor's
+  // Dataset.tables_dataset_metadata.stats_update_time field
+  // for the timestamp at which these stats were last updated.
+  repeated CorrelatedColumn top_correlated_columns = 5;
+
+  // Used to perform consistent read-modify-write updates. If not set, a blind
+  // "overwrite" update happens.
+  string etag = 6;
+}