-
Notifications
You must be signed in to change notification settings - Fork 350
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Moved transformations code out of column_names_dataset
- Loading branch information
Showing
3 changed files
with
116 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
107 changes: 107 additions & 0 deletions
107
google/cloud/aiplatform/utils/column_transformations_utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright 2021 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
from google.cloud.aiplatform import base | ||
|
||
from google.cloud.aiplatform.datasets.column_names_dataset import _ColumnNamesDataset | ||
from typing import Dict, List, Optional, Union, Tuple | ||
|
||
import warnings | ||
|
||
|
||
def get_default_column_transformations( | ||
dataset: _ColumnNamesDataset, target_column: str, | ||
) -> Tuple[Dict, List[str]]: | ||
"""Get default column transformations from the column names, while omitting the target column. | ||
Args: | ||
target_column (str): | ||
Required. The name of the column values of which the Model is to predict. | ||
Returns: | ||
Dict | ||
The default column transformations. | ||
""" | ||
|
||
column_names = [ | ||
column_name | ||
for column_name in dataset.column_names | ||
if column_name != target_column | ||
] | ||
column_transformations = [ | ||
{"auto": {"column_name": column_name}} for column_name in column_names | ||
] | ||
|
||
return (column_transformations, column_names) | ||
|
||
|
||
def validate_and_get_column_transformations( | ||
column_specs: Optional[Dict[str, str]], | ||
column_transformations: Optional[Union[Dict, List[Dict]]], | ||
) -> Dict: | ||
"""Validates column specs and transformations, then returns processed transformations. | ||
Args: | ||
column_specs (Dict[str, str]): | ||
Optional. Alternative to column_transformations where the keys of the dict | ||
are column names and their respective values are one of | ||
AutoMLTabularTrainingJob.column_data_types. | ||
When creating transformation for BigQuery Struct column, the column | ||
should be flattened using "." as the delimiter. Only columns with no child | ||
should have a transformation. | ||
If an input column has no transformations on it, such a column is | ||
ignored by the training, except for the targetColumn, which should have | ||
no transformations defined on. | ||
Only one of column_transformations or column_specs should be passed. | ||
column_transformations (Union[Dict, List[Dict]]): | ||
Optional. Transformations to apply to the input columns (i.e. columns other | ||
than the targetColumn). Each transformation may produce multiple | ||
result values from the column's value, and all are used for training. | ||
When creating transformation for BigQuery Struct column, the column | ||
should be flattened using "." as the delimiter. Only columns with no child | ||
should have a transformation. | ||
If an input column has no transformations on it, such a column is | ||
ignored by the training, except for the targetColumn, which should have | ||
no transformations defined on. | ||
Only one of column_transformations or column_specs should be passed. | ||
Consider using column_specs as column_transformations will be deprecated eventually. | ||
Returns: | ||
Dict | ||
The column transformations. | ||
""" | ||
# user populated transformations | ||
if column_transformations is not None and column_specs is not None: | ||
raise ValueError( | ||
"Both column_transformations and column_specs were passed. Only one is allowed." | ||
) | ||
if column_transformations is not None: | ||
warnings.simplefilter("always", DeprecationWarning) | ||
warnings.warn( | ||
"consider using column_specs instead. column_transformations will be deprecated in the future.", | ||
DeprecationWarning, | ||
stacklevel=2, | ||
) | ||
|
||
return column_transformations | ||
elif column_specs is not None: | ||
return [ | ||
{transformation: {"column_name": column_name}} | ||
for column_name, transformation in column_specs.items() | ||
] | ||
else: | ||
return None |