@@ -333,7 +333,8 @@ def _ingest_row(
333333 (
334334 FeatureValue (
335335 feature_name = data_frame .columns [index - 1 ],
336- value_as_string_list = IngestionManagerPandas ._covert_feature_value_to_string_list (
336+ value_as_string_list
337+ = IngestionManagerPandas ._covert_feature_value_to_string_list (
337338 row [index ]
338339 ),
339340 )
@@ -373,7 +374,8 @@ def _is_feature_collection_type(
373374 feature_name (str): name of the feature.
374375 feature_definitions (Dict[str, Dict[Any, Any]]): dictionary of feature definitions.
375376 where the key is the feature name and the value is the FeatureDefinition.
376- The FeatureDefinition contains the data type of the feature and the type of collection.
377+ The FeatureDefinition contains the data type of the feature and
378+ the type of collection.
377379 If the feature is not a collection type, the value of the CollectionType attribute
378380 is None.
379381
@@ -383,22 +385,28 @@ def _is_feature_collection_type(
383385 feature_definition = feature_definitions .get (feature_name )
384386 if feature_definition is not None :
385387 return feature_definition .get ("CollectionType" ) is not None
388+ return None
386389
387390 @staticmethod
388391 def _feature_value_is_not_none (
389392 feature_value : Any ,
390393 ):
391394 """Check if the feature value is not None.
392395
393- For Collection Type feature, we want to keep this check simple, where if the value is not None,
396+ For Collection Type feature, we want to keep this check simple,
397+ where if the value is not None,
394398 we convert and pass it to PutRecord, instead of relying on Pandas.notna(obj).all().
395399
396- Also, we don't want to skip the collection attribute with partial None values, when calling PutRecord. Since,
397- vector value can have some dimensions as None. Instead, we want to let PutRecord either accept or fail the
398- entire record based on the service side implementation. As of this change the service fails any partial None
400+ Also, we don't want to skip the collection attribute with partial None values,
401+ when calling PutRecord. Since,
402+ vector value can have some dimensions as None. Instead,
403+ we want to let PutRecord either accept or fail the
404+ entire record based on the service side implementation.
405+ As of this change the service fails any partial None
399406 collection types.
400407
401- For the Scalar values (non Collection) we want to still use pd.notna() to keep the behavior same.
408+ For the Scalar values (non Collection) we want to still use pd.notna()
409+ to keep the behavior same.
402410
403411 Args:
404412 feature_value (Any): feature value.
@@ -422,7 +430,8 @@ def _covert_feature_value_to_string_list(feature_value: List[Any]):
422430 """
423431 if not is_list_like (feature_value ):
424432 raise ValueError (
425- f"Invalid feature value, feature value: { feature_value } for a collection type feature"
433+ f"Invalid feature value, feature value: { feature_value } "
434+ f" for a collection type feature"
426435 f" must be an Array, but instead was { type (feature_value )} "
427436 )
428437 return [str (value ) if value is not None else None for value in feature_value ]
@@ -996,18 +1005,25 @@ def load_feature_definitions(
9961005 No feature definitions will be loaded if the given data_frame contains
9971006 unsupported dtypes.
9981007
999- For IN_MEMORY online_storage_type all collection type columns within DataFrame will be inferred as a List,
1000- instead of a String. Due to performance limitations, only first 1,000 values of the column will be sampled,
1001- when inferring collection Type. Customers can manually update the inferred collection type as needed.
1008+ For IN_MEMORY online_storage_type all collection type columns within DataFrame
1009+ will be inferred as a List,
1010+ instead of a String. Due to performance limitations,
1011+ only first 1,000 values of the column will be sampled,
1012+ when inferring collection Type.
1013+ Customers can manually update the inferred collection type as needed.
10021014
10031015 Args:
10041016 data_frame (DataFrame): A Pandas DataFrame containing features.
10051017 online_storage_type (OnlineStoreStorageTypeEnum):
1006- Optional. Online storage type for the feature group. The value can be either STANDARD or IN_MEMORY
1018+ Optional. Online storage type for the feature group.
1019+ The value can be either STANDARD or IN_MEMORY
10071020 If not specified,STANDARD will be used by default.
1008- If specified as IN_MEMORY, we will infer any collection type column within DataFrame as a List
1009- instead of a String. All, collection types (List, Set and Vector) will be inferred as List.
1010- We will only sample the first 1,000 values of the column when inferring collection Type.
1021+ If specified as IN_MEMORY,
1022+ we will infer any collection type column within DataFrame as a List instead of a
1023+ String.
1024+ All, collection types (List, Set and Vector) will be inferred as List.
1025+ We will only sample the first 1,000 values of the column when inferring
1026+ collection Type.
10111027
10121028
10131029
@@ -1157,7 +1173,8 @@ def ingest(
11571173 feature_group_name = self .name ,
11581174 feature_definitions = feature_definition_dict ,
11591175 sagemaker_session = self .sagemaker_session ,
1160- sagemaker_fs_runtime_client_config = self .sagemaker_session .sagemaker_featurestore_runtime_client .meta .config ,
1176+ sagemaker_fs_runtime_client_config
1177+ = self .sagemaker_session .sagemaker_featurestore_runtime_client .meta .config ,
11611178 max_workers = max_workers ,
11621179 max_processes = max_processes ,
11631180 profile_name = profile_name ,
@@ -1169,6 +1186,7 @@ def ingest(
11691186
11701187 def _get_feature_definition_dict (self ) -> Dict [str , Dict [Any , Any ]]:
11711188 """Get a dictionary of feature definitions with Feature Name as Key.
1189+
11721190 We are converting the FeatureDefinition into a List for faster lookups.
11731191
11741192 Returns:
0 commit comments