Skip to content

Entity SQL does not work as documented with PostgreSQL Offline Store #5605

@oonisim

Description

@oonisim

Expected Behavior

Entity SQL as documented in Example: entity SQL query for generating training data
works.

entity_sql = f"""
    SELECT
        entity_id,
        event_timestamp
    FROM {feature_store.get_data_source("customer_credit_risk_feature_source").get_table_query_string()} 
"""
print(entity_sql)
-----
    SELECT
        entity_id,
        event_timestamp
    FROM (SELECT * FROM credit.customer_credit_risk_features) 
training_df = feature_store.get_historical_features(
    entity_df=entity_sql,
    features=customer_credit_risk_features,
).to_df()

Current Behavior

---------------------------------------------------------------------------
SyntaxError                               Traceback (most recent call last)
File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:2664, in SQLiteDatabase.execute(self, sql, params)
   2663 try:
-> 2664     cur.execute(sql, *args)
   2665     return cur

File ~/venv/feast/lib/python3.11/site-packages/psycopg/cursor.py:97, in Cursor.execute(self, query, params, prepare, binary)
     96 except e._NO_TRACEBACK as ex:
---> 97     raise ex.with_traceback(None)
     98 return self

SyntaxError: subquery in FROM must have an alias
LINE 5:     FROM (SELECT * FROM credit.customer_credit_risk_features...
                 ^
HINT:  For example, FROM (SELECT ...) [AS] foo.

The above exception was the direct cause of the following exception:

DatabaseError                             Traceback (most recent call last)
Cell In[55], line 1
----> 1 training_df = feature_store.get_historical_features(
      2     entity_df=entity_sql,
      3     features=customer_credit_risk_features,
      4 ).to_df()

File ~/venv/feast/lib/python3.11/site-packages/feast/feature_store.py:1207, in FeatureStore.get_historical_features(self, entity_df, features, full_feature_names, start_date, end_date)
   1204 if end_date is not None:
   1205     kwargs["end_date"] = end_date
-> 1207 job = provider.get_historical_features(
   1208     self.config,
   1209     feature_views,
   1210     _feature_refs,
   1211     entity_df,
   1212     self._registry,
   1213     self.project,
   1214     full_feature_names,
   1215     **kwargs,
   1216 )
   1218 return job

File ~/venv/feast/lib/python3.11/site-packages/feast/infra/passthrough_provider.py:467, in PassthroughProvider.get_historical_features(self, config, feature_views, feature_refs, entity_df, registry, project, full_feature_names, **kwargs)
    456 def get_historical_features(
    457     self,
    458     config: RepoConfig,
   (...)    465     **kwargs,
    466 ) -> RetrievalJob:
--> 467     job = self.offline_store.get_historical_features(
    468         config=config,
    469         feature_views=feature_views,
    470         feature_refs=feature_refs,
    471         entity_df=entity_df,
    472         registry=registry,
    473         project=project,
    474         full_feature_names=full_feature_names,
    475         **kwargs,
    476     )
    478     return job

File ~/venv/feast/lib/python3.11/site-packages/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py:170, in PostgreSQLOfflineStore.get_historical_features(config, feature_views, feature_refs, entity_df, registry, project, full_feature_names, **kwargs)
    160         start_date = make_tzaware(start_date)
    162     entity_df = pd.DataFrame(
    163         {
    164             "event_timestamp": pd.date_range(
   (...)    167         }
    168     )
--> 170 entity_schema = _get_entity_schema(entity_df, config)
    172 entity_df_event_timestamp_col = (
    173     offline_utils.infer_event_timestamp_from_entity_df(entity_schema)
    174 )
    176 entity_df_event_timestamp_range = _get_entity_df_event_timestamp_range(
    177     entity_df,
    178     entity_df_event_timestamp_col,
    179     config,
    180 )

File ~/venv/feast/lib/python3.11/site-packages/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py:503, in _get_entity_schema(entity_df, config)
    501 elif isinstance(entity_df, str):
    502     df_query = f"({entity_df}) AS sub"
--> 503     return get_query_schema(config.offline_store, df_query)
    504 else:
    505     raise InvalidEntityType(type(entity_df))

File ~/venv/feast/lib/python3.11/site-packages/feast/infra/utils/postgres/connection_utils.py:116, in get_query_schema(config, sql_query)
    114 with _get_conn(config) as conn:
    115     conn.read_only = True
--> 116     df = pd.read_sql(
    117         f"SELECT * FROM {sql_query} LIMIT 0",
    118         conn,
    119     )
    120     return dict(zip(df.columns, df.dtypes))

File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:708, in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize, dtype_backend, dtype)
    706 with pandasSQL_builder(con) as pandas_sql:
    707     if isinstance(pandas_sql, SQLiteDatabase):
--> 708         return pandas_sql.read_query(
    709             sql,
    710             index_col=index_col,
    711             params=params,
    712             coerce_float=coerce_float,
    713             parse_dates=parse_dates,
    714             chunksize=chunksize,
    715             dtype_backend=dtype_backend,
    716             dtype=dtype,
    717         )
    719     try:
    720         _is_table_name = pandas_sql.has_table(sql)

File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:2728, in SQLiteDatabase.read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize, dtype, dtype_backend)
   2717 def read_query(
   2718     self,
   2719     sql,
   (...)   2726     dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
   2727 ) -> DataFrame | Iterator[DataFrame]:
-> 2728     cursor = self.execute(sql, params)
   2729     columns = [col_desc[0] for col_desc in cursor.description]
   2731     if chunksize is not None:

File ~/venv/feast/lib/python3.11/site-packages/pandas/io/sql.py:2676, in SQLiteDatabase.execute(self, sql, params)
   2673     raise ex from inner_exc
   2675 ex = DatabaseError(f"Execution failed on sql '{sql}': {exc}")
-> 2676 raise ex from exc

DatabaseError: Execution failed on sql 'SELECT * FROM (
    SELECT
        entity_id,
        event_timestamp
    FROM (SELECT * FROM credit.customer_credit_risk_features) 
) AS sub LIMIT 0': subquery in FROM must have an alias
LINE 5:     FROM (SELECT * FROM credit.customer_credit_risk_features...
                 ^
HINT:  For example, FROM (SELECT ...) [AS] foo.

Steps to reproduce

Specifications

  • Version: 0.53
  • Platform: MacOSX
  • Subsystem: Python 3.11

Possible Solution

entity_sql = f"""
    SELECT
        entity_id,
        event_timestamp
    FROM {feature_store.get_data_source("customer_credit_risk_feature_source").get_table_query_string()} AS ENTITY_SQL
"""
print(entity_sql)

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions