-
Notifications
You must be signed in to change notification settings - Fork 181
feat(ibis): Add Athena connector #1207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
a3f1a6e
athena support
douenergy 2cbed79
exclude athena in all test
douenergy a2effbd
v3 test
douenergy d321e3c
wren core serd
douenergy 7dd1b5b
staging dir
douenergy 969e7e9
add glue test case
douenergy b47eaba
remove unrelated link
douenergy efea7e2
closing athena cursor
douenergy 02f1f72
closing other datasource cursor
douenergy File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,138 @@ | ||
| import re | ||
| from contextlib import closing | ||
|
|
||
| import pandas as pd | ||
|
|
||
| from app.model import AthenaConnectionInfo | ||
| from app.model.data_source import DataSource | ||
| from app.model.metadata.dto import ( | ||
| Column, | ||
| Constraint, | ||
| RustWrenEngineColumnType, | ||
| Table, | ||
| TableProperties, | ||
| ) | ||
| from app.model.metadata.metadata import Metadata | ||
|
|
||
|
|
||
| class AthenaMetadata(Metadata): | ||
| def __init__(self, connection_info: AthenaConnectionInfo): | ||
| super().__init__(connection_info) | ||
| self.connection = DataSource.athena.get_connection(connection_info) | ||
|
|
||
| def get_table_list(self) -> list[Table]: | ||
| schema_name = self.connection_info.schema_name.get_secret_value() | ||
|
|
||
| sql = f""" | ||
| SELECT | ||
| t.table_catalog, | ||
| t.table_schema, | ||
| t.table_name, | ||
| c.column_name, | ||
| c.ordinal_position, | ||
| c.is_nullable, | ||
| c.data_type | ||
| FROM | ||
| information_schema.tables AS t | ||
| JOIN | ||
| information_schema.columns AS c | ||
| ON t.table_catalog = c.table_catalog | ||
| AND t.table_schema = c.table_schema | ||
| AND t.table_name = c.table_name | ||
| WHERE t.table_schema = '{schema_name}' | ||
| ORDER BY t.table_name | ||
| """ | ||
|
|
||
| # We need to use raw_sql here because using the sql method causes Ibis to *create view* first, | ||
| # which does not work with information_schema queries. | ||
| with closing(self.connection.raw_sql(sql)) as cursor: | ||
| response = pd.DataFrame( | ||
| cursor.fetchall(), columns=[col[0] for col in cursor.description] | ||
| ).to_dict(orient="records") | ||
|
|
||
| def get_column(row) -> Column: | ||
| return Column( | ||
| name=row["column_name"], | ||
| type=self._transform_column_type(row["data_type"]), | ||
| notNull=row["is_nullable"].lower() == "no", | ||
| description="", # Athena doesn't provide column descriptions in information_schema | ||
| properties=None, | ||
| ) | ||
|
|
||
| def get_table(row) -> Table: | ||
| return Table( | ||
| name=self._format_athena_compact_table_name( | ||
| row["table_schema"], row["table_name"] | ||
| ), | ||
| description="", # Athena doesn't provide table descriptions in information_schema | ||
| columns=[], | ||
| properties=TableProperties( | ||
| schema=row["table_schema"], | ||
| catalog=row["table_catalog"], | ||
| table=row["table_name"], | ||
| ), | ||
| primaryKey="", | ||
| ) | ||
|
|
||
| unique_tables = {} | ||
|
|
||
| for column_metadata in response: | ||
| # generate unique table name | ||
| table_name = self._format_athena_compact_table_name( | ||
| column_metadata["table_schema"], column_metadata["table_name"] | ||
| ) | ||
| # init table if not exists | ||
| if table_name not in unique_tables: | ||
| unique_tables[table_name] = get_table(column_metadata) | ||
|
|
||
| current_table = unique_tables[table_name] | ||
| # add column to table | ||
| current_table.columns.append(get_column(column_metadata)) | ||
|
|
||
| return list(unique_tables.values()) | ||
|
|
||
| def get_constraints(self) -> list[Constraint]: | ||
| # Athena doesn't support foreign key constraints | ||
| return [] | ||
|
|
||
| def get_version(self) -> str: | ||
| return "AWS Athena - Follow AWS service versioning" | ||
|
|
||
| def _format_athena_compact_table_name(self, schema: str, table: str) -> str: | ||
| return f"{schema}.{table}" | ||
|
|
||
| def _transform_column_type(self, data_type): | ||
| data_type = re.sub(r"\(.*\)", "", data_type).strip() | ||
| switcher = { | ||
| # String Types (ignore Binary and Spatial Types for now) | ||
| "char": RustWrenEngineColumnType.CHAR, | ||
| "varchar": RustWrenEngineColumnType.VARCHAR, | ||
| "tinytext": RustWrenEngineColumnType.TEXT, | ||
| "text": RustWrenEngineColumnType.TEXT, | ||
| "mediumtext": RustWrenEngineColumnType.TEXT, | ||
| "longtext": RustWrenEngineColumnType.TEXT, | ||
| "enum": RustWrenEngineColumnType.VARCHAR, | ||
| "set": RustWrenEngineColumnType.VARCHAR, | ||
| "bit": RustWrenEngineColumnType.TINYINT, | ||
| "tinyint": RustWrenEngineColumnType.TINYINT, | ||
| "smallint": RustWrenEngineColumnType.SMALLINT, | ||
| "mediumint": RustWrenEngineColumnType.INTEGER, | ||
| "int": RustWrenEngineColumnType.INTEGER, | ||
| "integer": RustWrenEngineColumnType.INTEGER, | ||
| "bigint": RustWrenEngineColumnType.BIGINT, | ||
| # boolean | ||
| "bool": RustWrenEngineColumnType.BOOL, | ||
| "boolean": RustWrenEngineColumnType.BOOL, | ||
| # Decimal | ||
| "float": RustWrenEngineColumnType.FLOAT4, | ||
| "double": RustWrenEngineColumnType.DOUBLE, | ||
| "decimal": RustWrenEngineColumnType.DECIMAL, | ||
| "numeric": RustWrenEngineColumnType.NUMERIC, | ||
| "date": RustWrenEngineColumnType.DATE, | ||
| "datetime": RustWrenEngineColumnType.TIMESTAMP, | ||
| "timestamp": RustWrenEngineColumnType.TIMESTAMPTZ, | ||
| # JSON Type | ||
| "json": RustWrenEngineColumnType.JSON, | ||
| } | ||
|
|
||
| return switcher.get(data_type.lower(), RustWrenEngineColumnType.UNKNOWN) | ||
douenergy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.