-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Open
Labels
Description
Expected Behavior
Entity SQL as documented in Example: entity SQL query for generating training data
works.
entity_sql = f"""
SELECT
entity_id,
event_timestamp
FROM {feature_store.get_data_source("customer_credit_risk_feature_source").get_table_query_string()}
"""
print(entity_sql)
-----
SELECT
entity_id,
event_timestamp
FROM (SELECT * FROM credit.customer_credit_risk_features)
training_df = feature_store.get_historical_features(
entity_df=entity_sql,
features=customer_credit_risk_features,
).to_df()
Current Behavior
---------------------------------------------------------------------------
SyntaxError Traceback (most recent call last)
File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:2664, in SQLiteDatabase.execute(self, sql, params)
2663 try:
-> 2664 cur.execute(sql, *args)
2665 return cur
File ~/venv/feast/lib/python3.11/site-packages/psycopg/cursor.py:97, in Cursor.execute(self, query, params, prepare, binary)
96 except e._NO_TRACEBACK as ex:
---> 97 raise ex.with_traceback(None)
98 return self
SyntaxError: subquery in FROM must have an alias
LINE 5: FROM (SELECT * FROM credit.customer_credit_risk_features...
^
HINT: For example, FROM (SELECT ...) [AS] foo.
The above exception was the direct cause of the following exception:
DatabaseError Traceback (most recent call last)
Cell In[55], line 1
----> 1 training_df = feature_store.get_historical_features(
2 entity_df=entity_sql,
3 features=customer_credit_risk_features,
4 ).to_df()
File ~/venv/feast/lib/python3.11/site-packages/feast/feature_store.py:1207, in FeatureStore.get_historical_features(self, entity_df, features, full_feature_names, start_date, end_date)
1204 if end_date is not None:
1205 kwargs["end_date"] = end_date
-> 1207 job = provider.get_historical_features(
1208 self.config,
1209 feature_views,
1210 _feature_refs,
1211 entity_df,
1212 self._registry,
1213 self.project,
1214 full_feature_names,
1215 **kwargs,
1216 )
1218 return job
File ~/venv/feast/lib/python3.11/site-packages/feast/infra/passthrough_provider.py:467, in PassthroughProvider.get_historical_features(self, config, feature_views, feature_refs, entity_df, registry, project, full_feature_names, **kwargs)
456 def get_historical_features(
457 self,
458 config: RepoConfig,
(...) 465 **kwargs,
466 ) -> RetrievalJob:
--> 467 job = self.offline_store.get_historical_features(
468 config=config,
469 feature_views=feature_views,
470 feature_refs=feature_refs,
471 entity_df=entity_df,
472 registry=registry,
473 project=project,
474 full_feature_names=full_feature_names,
475 **kwargs,
476 )
478 return job
File ~/venv/feast/lib/python3.11/site-packages/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py:170, in PostgreSQLOfflineStore.get_historical_features(config, feature_views, feature_refs, entity_df, registry, project, full_feature_names, **kwargs)
160 start_date = make_tzaware(start_date)
162 entity_df = pd.DataFrame(
163 {
164 "event_timestamp": pd.date_range(
(...) 167 }
168 )
--> 170 entity_schema = _get_entity_schema(entity_df, config)
172 entity_df_event_timestamp_col = (
173 offline_utils.infer_event_timestamp_from_entity_df(entity_schema)
174 )
176 entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range(
177 entity_df,
178 entity_df_event_timestamp_col,
179 config,
180 )
File ~/venv/feast/lib/python3.11/site-packages/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py:503, in _get_entity_schema(entity_df, config)
501 elif isinstance(entity_df, str):
502 df_query = f"({entity_df}) AS sub"
--> 503 return get_query_schema(config.offline_store, df_query)
504 else:
505 raise InvalidEntityType(type(entity_df))
File ~/venv/feast/lib/python3.11/site-packages/feast/infra/utils/postgres/connection_utils.py:116, in get_query_schema(config, sql_query)
114 with _get_conn(config) as conn:
115 conn.read_only = True
--> 116 df = pd.read_sql(
117 f"SELECT * FROM {sql_query} LIMIT 0",
118 conn,
119 )
120 return dict(zip(df.columns, df.dtypes))
File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:708, in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize, dtype_backend, dtype)
706 with pandasSQL_builder(con) as pandas_sql:
707 if isinstance(pandas_sql, SQLiteDatabase):
--> 708 return pandas_sql.read_query(
709 sql,
710 index_col=index_col,
711 params=params,
712 coerce_float=coerce_float,
713 parse_dates=parse_dates,
714 chunksize=chunksize,
715 dtype_backend=dtype_backend,
716 dtype=dtype,
717 )
719 try:
720 _is_table_name = pandas_sql.has_table(sql)
File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:2728, in SQLiteDatabase.read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype, dtype_backend)
2717 def read_query(
2718 self,
2719 sql,
(...) 2726 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
2727 ) -> DataFrame | Iterator[DataFrame]:
-> 2728 cursor = self.execute(sql, params)
2729 columns = [col_desc[0] for col_desc in cursor.description]
2731 if chunksize is not None:
File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:2676, in SQLiteDatabase.execute(self, sql, params)
2673 raise ex from inner_exc
2675 ex = DatabaseError(f"Execution failed on sql '{sql}': {exc}")
-> 2676 raise ex from exc
DatabaseError: Execution failed on sql 'SELECT * FROM (
SELECT
entity_id,
event_timestamp
FROM (SELECT * FROM credit.customer_credit_risk_features)
) AS sub LIMIT 0': subquery in FROM must have an alias
LINE 5: FROM (SELECT * FROM credit.customer_credit_risk_features...
^
HINT: For example, FROM (SELECT ...) [AS] foo.
Steps to reproduce
Specifications
- Version: 0.53
- Platform: MacOSX
- Subsystem: Python 3.11
Possible Solution
entity_sql = f"""
SELECT
entity_id,
event_timestamp
FROM {feature_store.get_data_source("customer_credit_risk_feature_source").get_table_query_string()} AS ENTITY_SQL
"""
print(entity_sql)