|
62 | 62 |
|
63 | 63 | FIELD_ILLEGAL_CHARACTERS = re.compile(r"[^\w]+")
|
64 | 64 |
|
| 65 | +TABLE_VALUED_ALIAS_ALIASES = "bigquery_table_valued_alias_aliases" |
| 66 | + |
65 | 67 |
|
66 | 68 | def assert_(cond, message="Assertion failed"): # pragma: NO COVER
|
67 | 69 | if not cond:
|
@@ -114,39 +116,41 @@ def format_label(self, label, name=None):
|
114 | 116 |
|
115 | 117 |
|
116 | 118 | _type_map = {
|
117 |
| - "STRING": types.String, |
118 |
| - "BOOL": types.Boolean, |
| 119 | + "ARRAY": types.ARRAY, |
| 120 | + "BIGNUMERIC": types.Numeric, |
119 | 121 | "BOOLEAN": types.Boolean,
|
120 |
| - "INT64": types.Integer, |
121 |
| - "INTEGER": types.Integer, |
| 122 | + "BOOL": types.Boolean, |
| 123 | + "BYTES": types.BINARY, |
| 124 | + "DATETIME": types.DATETIME, |
| 125 | + "DATE": types.DATE, |
122 | 126 | "FLOAT64": types.Float,
|
123 | 127 | "FLOAT": types.Float,
|
| 128 | + "INT64": types.Integer, |
| 129 | + "INTEGER": types.Integer, |
| 130 | + "NUMERIC": types.Numeric, |
| 131 | + "RECORD": types.JSON, |
| 132 | + "STRING": types.String, |
124 | 133 | "TIMESTAMP": types.TIMESTAMP,
|
125 |
| - "DATETIME": types.DATETIME, |
126 |
| - "DATE": types.DATE, |
127 |
| - "BYTES": types.BINARY, |
128 | 134 | "TIME": types.TIME,
|
129 |
| - "RECORD": types.JSON, |
130 |
| - "NUMERIC": types.Numeric, |
131 |
| - "BIGNUMERIC": types.Numeric, |
132 | 135 | }
|
133 | 136 |
|
134 | 137 | # By convention, dialect-provided types are spelled with all upper case.
|
135 |
| -STRING = _type_map["STRING"] |
136 |
| -BOOL = _type_map["BOOL"] |
| 138 | +ARRAY = _type_map["ARRAY"] |
| 139 | +BIGNUMERIC = _type_map["NUMERIC"] |
137 | 140 | BOOLEAN = _type_map["BOOLEAN"]
|
138 |
| -INT64 = _type_map["INT64"] |
139 |
| -INTEGER = _type_map["INTEGER"] |
| 141 | +BOOL = _type_map["BOOL"] |
| 142 | +BYTES = _type_map["BYTES"] |
| 143 | +DATETIME = _type_map["DATETIME"] |
| 144 | +DATE = _type_map["DATE"] |
140 | 145 | FLOAT64 = _type_map["FLOAT64"]
|
141 | 146 | FLOAT = _type_map["FLOAT"]
|
| 147 | +INT64 = _type_map["INT64"] |
| 148 | +INTEGER = _type_map["INTEGER"] |
| 149 | +NUMERIC = _type_map["NUMERIC"] |
| 150 | +RECORD = _type_map["RECORD"] |
| 151 | +STRING = _type_map["STRING"] |
142 | 152 | TIMESTAMP = _type_map["TIMESTAMP"]
|
143 |
| -DATETIME = _type_map["DATETIME"] |
144 |
| -DATE = _type_map["DATE"] |
145 |
| -BYTES = _type_map["BYTES"] |
146 | 153 | TIME = _type_map["TIME"]
|
147 |
| -RECORD = _type_map["RECORD"] |
148 |
| -NUMERIC = _type_map["NUMERIC"] |
149 |
| -BIGNUMERIC = _type_map["NUMERIC"] |
150 | 154 |
|
151 | 155 | try:
|
152 | 156 | _type_map["GEOGRAPHY"] = GEOGRAPHY
|
@@ -246,6 +250,56 @@ def visit_insert(self, insert_stmt, asfrom=False, **kw):
|
246 | 250 | insert_stmt, asfrom=False, **kw
|
247 | 251 | )
|
248 | 252 |
|
| 253 | + def visit_table_valued_alias(self, element, **kw): |
| 254 | + # When using table-valued functions, like UNNEST, BigQuery requires a |
| 255 | + # FROM for any table referenced in the function, including expressions |
| 256 | + # in function arguments. |
| 257 | + # |
| 258 | + # For example, given SQLAlchemy code: |
| 259 | + # |
| 260 | + # print( |
| 261 | + # select([func.unnest(foo.c.objects).alias('foo_objects').column]) |
| 262 | + # .compile(engine)) |
| 263 | + # |
| 264 | + # Left to it's own devices, SQLAlchemy would outout: |
| 265 | + # |
| 266 | + # SELECT `foo_objects` |
| 267 | + # FROM unnest(`foo`.`objects`) AS `foo_objects` |
| 268 | + # |
| 269 | + # But BigQuery diesn't understand the `foo` reference unless |
| 270 | + # we add as reference to `foo` in the FROM: |
| 271 | + # |
| 272 | + # SELECT foo_objects |
| 273 | + # FROM `foo`, UNNEST(`foo`.`objects`) as foo_objects |
| 274 | + # |
| 275 | + # This is tricky because: |
| 276 | + # 1. We have to find the table references. |
| 277 | + # 2. We can't know practically if there's already a FROM for a table. |
| 278 | + # |
| 279 | + # We leverage visit_column to find a table reference. Whenever we find |
| 280 | + # one, we create an alias for it, so as not to conflict with an existing |
| 281 | + # reference if one is present. |
| 282 | + # |
| 283 | + # This requires communicating between this function and visit_column. |
| 284 | + # We do this by sticking a dictionary in the keyword arguments. |
| 285 | + # This dictionary: |
| 286 | + # a. Tells visit_column that it's an a table-valued alias expresssion, and |
| 287 | + # b. Gives it a place to record the aliases it creates. |
| 288 | + # |
| 289 | + # This function creates aliases in the FROM list for any aliases recorded |
| 290 | + # by visit_column. |
| 291 | + |
| 292 | + kw[TABLE_VALUED_ALIAS_ALIASES] = {} |
| 293 | + ret = super().visit_table_valued_alias(element, **kw) |
| 294 | + aliases = kw.pop(TABLE_VALUED_ALIAS_ALIASES) |
| 295 | + if aliases: |
| 296 | + aliases = ", ".join( |
| 297 | + f"{self.preparer.quote(tablename)} {self.preparer.quote(alias)}" |
| 298 | + for tablename, alias in aliases.items() |
| 299 | + ) |
| 300 | + ret = f"{aliases}, {ret}" |
| 301 | + return ret |
| 302 | + |
249 | 303 | def visit_column(
|
250 | 304 | self,
|
251 | 305 | column,
|
@@ -281,6 +335,13 @@ def visit_column(
|
281 | 335 | tablename = table.name
|
282 | 336 | if isinstance(tablename, elements._truncated_label):
|
283 | 337 | tablename = self._truncated_identifier("alias", tablename)
|
| 338 | + elif TABLE_VALUED_ALIAS_ALIASES in kwargs: |
| 339 | + aliases = kwargs[TABLE_VALUED_ALIAS_ALIASES] |
| 340 | + if tablename not in aliases: |
| 341 | + aliases[tablename] = self.anon_map[ |
| 342 | + f"{TABLE_VALUED_ALIAS_ALIASES} {tablename}" |
| 343 | + ] |
| 344 | + tablename = aliases[tablename] |
284 | 345 |
|
285 | 346 | return self.preparer.quote(tablename) + "." + name
|
286 | 347 |
|
|
0 commit comments