Skip to content

Commit

Permalink
feat: support new AutoML problems; add batchPredict, exportModel methods
Browse files Browse the repository at this point in the history
feat: support new AutoML problems; add batchPredict, exportModel methods

This captures the following changes:

- \+ these AutoML problem variants, and supporting fields to build and predict with these models:
   - Image object detection
   - Video classification
   - Text extraction
   - Text sentiment
   - Tables
- \+ `batchPredict` method to perform batch prediction (long running operation).
- \+ new response metadata to show model evaluation metrics, e.g. RMS error, MAE, R-squared
- \+ `updateDataset` method to perform updates to a dataset after it's been created
- \+ methods to get/update/list specs for a relational table:
    - `getAnnotationSpec`, `getTableSpec`, `listTableSpecs`, `updateTableSpec`, `getColumnSpec`, `listColumnSpecs`, `updateColumnSpec`
- \+ `exportModel` method to export a trained, export-able model to a GCS location
- \+ `exportEvaluatedExamples` method to export examples on which the models was evaluated

#151 automerged by dpebot
  • Loading branch information
jkwlui authored and yoshi-automation committed Apr 3, 2019
1 parent c74babd commit 5fcc9c9
Show file tree
Hide file tree
Showing 63 changed files with 9,253 additions and 499 deletions.
1 change: 1 addition & 0 deletions packages/google-cloud-automl/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ build/
*.lock
.DS_Store
package-lock.json
__pycache__
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018 Google LLC
// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -11,42 +11,64 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.automl.v1beta1;

import "google/api/annotations.proto";
import "google/cloud/automl/v1beta1/classification.proto";
import "google/cloud/automl/v1beta1/detection.proto";
import "google/cloud/automl/v1beta1/tables.proto";
import "google/cloud/automl/v1beta1/text_extraction.proto";
import "google/cloud/automl/v1beta1/text_sentiment.proto";
import "google/cloud/automl/v1beta1/translation.proto";
import "google/protobuf/any.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
option java_multiple_files = true;
option java_package = "com.google.cloud.automl.v1beta1";
option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";


// Contains annotation information that is relevant to AutoML.
message AnnotationPayload {
// Output only . Additional information about the annotation
// specific to the AutoML solution.
// specific to the AutoML domain.
oneof detail {
// Annotation details for translation.
TranslationAnnotation translation = 2;

// Annotation details for content or image classification.
ClassificationAnnotation classification = 3;

// Annotation details for image object detection.
ImageObjectDetectionAnnotation image_object_detection = 4;

// Annotation details for video classification.
// Returned for Video Classification predictions.
VideoClassificationAnnotation video_classification = 9;

// Annotation details for text extraction.
TextExtractionAnnotation text_extraction = 6;

// Annotation details for text sentiment.
TextSentimentAnnotation text_sentiment = 7;

// Annotation details for Tables.
TablesAnnotation tables = 10;
}

// Output only . The resource ID of the annotation spec that
// this annotation pertains to. The annotation spec comes from either an
// ancestor dataset, or the dataset that was used to train the model in use.
string annotation_spec_id = 1;

// Output only. The value of
// [AnnotationSpec.display_name][google.cloud.automl.v1beta1.AnnotationSpec.display_name]
// when the model was trained. Because this field returns a value at model
// training time, for different models trained using the same dataset, the
// returned value could be different as model owner could update the
// display_name between any two model training.
// Output only. The value of [AnnotationSpec.display_name][google.cloud.automl.v1beta1.AnnotationSpec.display_name] when the model
// was trained. Because this field returns a value at model training time,
// for different models trained using the same dataset, the returned value
// could be different as model owner could update the display_name between
// any two model training.
string display_name = 5;
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018 Google LLC
// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -11,18 +11,21 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.automl.v1beta1;

import "google/api/annotations.proto";
import "google/cloud/automl/v1beta1/temporal.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
option java_outer_classname = "ClassificationProto";
option java_package = "com.google.cloud.automl.v1beta1";
option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";


// Contains annotation details specific to classification.
message ClassificationAnnotation {
// Output only. A confidence estimate between 0.0 and 1.0. A higher value
Expand All @@ -33,47 +36,111 @@ message ClassificationAnnotation {
float score = 1;
}

// Contains annotation details specific to video classification.
message VideoClassificationAnnotation {
// Output only. Expresses the type of video classification. Possible values:
//
// * `segment` - Classification done on a specified by user
// time segment of a video. AnnotationSpec is answered to be present
// in that time segment, if it is present in any part of it. The video
// ML model evaluations are done only for this type of classification.
//
// * `shot`- Shot-level classification.
// AutoML Video Intelligence determines the boundaries
// for each camera shot in the entire segment of the video that user
// specified in the request configuration. AutoML Video Intelligence
// then returns labels and their confidence scores for each detected
// shot, along with the start and end time of the shot.
// WARNING: Model evaluation is not done for this classification type,
// the quality of it depends on training data, but there are no
// metrics provided to describe that quality.
//
// * `1s_interval` - AutoML Video Intelligence returns labels and their
// confidence scores for each second of the entire segment of the video
// that user specified in the request configuration.
// WARNING: Model evaluation is not done for this classification type,
// the quality of it depends on training data, but there are no
// metrics provided to describe that quality.
string type = 1;

// Output only . The classification details of this annotation.
ClassificationAnnotation classification_annotation = 2;

// Output only . The time segment of the video to which the
// annotation applies.
TimeSegment time_segment = 3;
}

// Model evaluation metrics for classification problems.
// Visible only to v1beta1
// Note: For Video Classification this metrics only describe quality of the
// Video Classification predictions of "segment_classification" type.
message ClassificationEvaluationMetrics {
// Metrics for a single confidence threshold.
message ConfidenceMetricsEntry {
// Output only. The confidence threshold value used to compute the metrics.
// Output only. Metrics are computed with an assumption that the model
// never returns predictions with score lower than this value.
float confidence_threshold = 1;

// Output only. Recall under the given confidence threshold.
// Output only. Metrics are computed with an assumption that the model
// always returns at most this many predictions (ordered by their score,
// descendingly), but they all still need to meet the confidence_threshold.
int32 position_threshold = 14;

// Output only. Recall (True Positive Rate) for the given confidence
// threshold.
float recall = 2;

// Output only. Precision under the given confidence threshold.
// Output only. Precision for the given confidence threshold.
float precision = 3;

// Output only. False Positive Rate for the given confidence threshold.
float false_positive_rate = 8;

// Output only. The harmonic mean of recall and precision.
float f1_score = 4;

// Output only. The recall when only considering the label that has the
// highest prediction score and not below the confidence threshold for each
// example.
// Output only. The Recall (True Positive Rate) when only considering the
// label that has the highest prediction score and not below the confidence
// threshold for each example.
float recall_at1 = 5;

// Output only. The precision when only considering the label that has the
// highest predictionscore and not below the confidence threshold for each
// highest prediction score and not below the confidence threshold for each
// example.
float precision_at1 = 6;

// Output only. The harmonic mean of
// [recall_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.recall_at1]
// and
// [precision_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.precision_at1].
// Output only. The False Positive Rate when only considering the label that
// has the highest prediction score and not below the confidence threshold
// for each example.
float false_positive_rate_at1 = 9;

// Output only. The harmonic mean of [recall_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.recall_at1] and [precision_at1][google.cloud.automl.v1beta1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.precision_at1].
float f1_score_at1 = 7;

// Output only. The number of model created labels that match a ground truth
// label.
int64 true_positive_count = 10;

// Output only. The number of model created labels that do not match a
// ground truth label.
int64 false_positive_count = 11;

// Output only. The number of ground truth labels that are not matched
// by a model created label.
int64 false_negative_count = 12;

// Output only. The number of labels that were not created by the model,
// but if they would, they would not match a ground truth label.
int64 true_negative_count = 13;
}

// Confusion matrix of the model running the classification.
message ConfusionMatrix {
// Output only. A row in the confusion matrix.
message Row {
// Output only. Value of the specific cell in the confusion matrix.
// The number of values each row is equal to the size of
// annotatin_spec_id.
// The number of values each row has (i.e. the length of the row) is equal
// to the length of the annotation_spec_id field.
repeated int32 example_count = 1;
}

Expand All @@ -88,14 +155,28 @@ message ClassificationEvaluationMetrics {
repeated Row row = 2;
}

// Output only. The Area under precision recall curve metric.
// Output only. The Area Under Precision-Recall Curve metric. Micro-averaged
// for the overall evaluation.
float au_prc = 1;

// Output only. The Area under precision recall curve metric based on priors.
float base_au_prc = 2;
// Output only. The Area Under Precision-Recall Curve metric based on priors.
// Micro-averaged for the overall evaluation.
// Deprecated.
float base_au_prc = 2 [deprecated = true];

// Output only. The Area Under Receiver Operating Characteristic curve metric.
// Micro-averaged for the overall evaluation.
float au_roc = 6;

// Output only. The Log Loss metric.
float log_loss = 7;

// Output only. Metrics that have confidence thresholds.
// Precision-recall curve can be derived from it.
// Output only. Metrics for each confidence_threshold in
// 0.00,0.05,0.10,...,0.95,0.96,0.97,0.98,0.99 and
// position_threshold = INT32_MAX_VALUE.
// Precision-recall curve is derived from them.
// The above metrics may also be supplied for additional values of
// position_threshold.
repeated ConfidenceMetricsEntry confidence_metrics_entry = 3;

// Output only. Confusion matrix of the evaluation.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.automl.v1beta1;

import "google/api/annotations.proto";
import "google/cloud/automl/v1beta1/data_stats.proto";
import "google/cloud/automl/v1beta1/data_types.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1beta1;automl";
option java_multiple_files = true;
option java_package = "com.google.cloud.automl.v1beta1";
option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";


// A representation of a column in a relational table. When listing them, column specs are returned in the same order in which they were
// given on import .
// Used by:
// * Tables
message ColumnSpec {
// Identifies the table's column, and its correlation with the column this
// ColumnSpec describes.
message CorrelatedColumn {
// The column_spec_id of the correlated column, which belongs to the same
// table as the in-context column.
string column_spec_id = 1;

// Correlation between this and the in-context column.
CorrelationStats correlation_stats = 2;
}

// Output only. The resource name of the column specs.
// Form:
//
// `projects/{project_id}/locations/{location_id}/datasets/{dataset_id}/tableSpecs/{table_spec_id}/columnSpecs/{column_spec_id}`
string name = 1;

// The data type of elements stored in the column.
DataType data_type = 2;

// Output only. The name of the column to show in the interface. The name can
// be up to 100 characters long and can consist only of ASCII Latin letters
// A-Z and a-z, ASCII digits 0-9, underscores(_), and forward slashes(/), and
// must start with a letter or a digit.
string display_name = 3;

// Output only. Stats of the series of values in the column.
// This field may be stale, see the ancestor's
// Dataset.tables_dataset_metadata.stats_update_time field
// for the timestamp at which these stats were last updated.
DataStats data_stats = 4;

// Output only. Top 10 most correlated with this column columns of the table,
// ordered by
// [cramers_v][google.cloud.automl.v1beta1.CorrelationStats.cramers_v] metric.
// This field may be stale, see the ancestor's
// Dataset.tables_dataset_metadata.stats_update_time field
// for the timestamp at which these stats were last updated.
repeated CorrelatedColumn top_correlated_columns = 5;

// Used to perform consistent read-modify-write updates. If not set, a blind
// "overwrite" update happens.
string etag = 6;
}
Loading

0 comments on commit 5fcc9c9

Please sign in to comment.