Skip to content

Commit d118238

Browse files
author
Jim Fulton
authored
feat: Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types (#180)
1 parent fe0591a commit d118238

File tree

10 files changed

+241
-95
lines changed

10 files changed

+241
-95
lines changed

pybigquery/_helpers.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,6 @@ def substitute_re_method(r, flags=0, repl=None):
6969

7070
r = re.compile(r, flags)
7171

72-
if isinstance(repl, str):
73-
return lambda self, s: r.sub(repl, s)
74-
7572
@functools.wraps(repl)
7673
def sub(self, s, *args, **kw):
7774
def repl_(m):
@@ -80,3 +77,8 @@ def repl_(m):
8077
return r.sub(repl_, s)
8178

8279
return sub
80+
81+
82+
def substitute_string_re_method(r, *, repl, flags=0):
83+
r = re.compile(r, flags)
84+
return lambda self, s: r.sub(repl, s)

pybigquery/sqlalchemy_bigquery.py

+90-35
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ def format_label(self, label, name=None):
122122
"BYTES": types.BINARY,
123123
"TIME": types.TIME,
124124
"RECORD": types.JSON,
125-
"NUMERIC": types.DECIMAL,
126-
"BIGNUMERIC": types.DECIMAL,
125+
"NUMERIC": types.Numeric,
126+
"BIGNUMERIC": types.Numeric,
127127
}
128128

129129
STRING = _type_map["STRING"]
@@ -158,23 +158,33 @@ def get_insert_default(self, column): # pragma: NO COVER
158158
elif isinstance(column.type, String):
159159
return str(uuid.uuid4())
160160

161-
__remove_type_from_empty_in = _helpers.substitute_re_method(
162-
r" IN UNNEST\(\[ ("
163-
r"(?:NULL|\(NULL(?:, NULL)+\))\)"
164-
r" (?:AND|OR) \(1 !?= 1"
165-
r")"
166-
r"(?:[:][A-Z0-9]+)?"
167-
r" \]\)",
168-
re.IGNORECASE,
169-
r" IN(\1)",
161+
__remove_type_from_empty_in = _helpers.substitute_string_re_method(
162+
r"""
163+
\sIN\sUNNEST\(\[\s # ' IN UNNEST([ '
164+
(
165+
(?:NULL|\(NULL(?:,\sNULL)+\))\) # '(NULL)' or '((NULL, NULL, ...))'
166+
\s(?:AND|OR)\s\(1\s!?=\s1 # ' and 1 != 1' or ' or 1 = 1'
167+
)
168+
(?:[:][A-Z0-9]+)? # Maybe ':TYPE' (e.g. ':INT64')
169+
\s\]\) # Close: ' ])'
170+
""",
171+
flags=re.IGNORECASE | re.VERBOSE,
172+
repl=r" IN(\1)",
170173
)
171174

172175
@_helpers.substitute_re_method(
173-
r" IN UNNEST\(\[ "
174-
r"(%\([^)]+_\d+\)s(?:, %\([^)]+_\d+\)s)*)?" # Placeholders. See below.
175-
r":([A-Z0-9]+)" # Type
176-
r" \]\)",
177-
re.IGNORECASE,
176+
r"""
177+
\sIN\sUNNEST\(\[\s # ' IN UNNEST([ '
178+
( # Placeholders. See below.
179+
%\([^)]+_\d+\)s # Placeholder '%(foo_1)s'
180+
(?:,\s # 0 or more placeholders
181+
%\([^)]+_\d+\)s
182+
)*
183+
)?
184+
:([A-Z0-9]+) # Type ':TYPE' (e.g. ':INT64')
185+
\s\]\) # Close: ' ])'
186+
""",
187+
flags=re.IGNORECASE | re.VERBOSE,
178188
)
179189
def __distribute_types_to_expanded_placeholders(self, m):
180190
# If we have an in parameter, it sometimes gets expaned to 0 or more
@@ -282,10 +292,20 @@ def group_by_clause(self, select, **kw):
282292
"EXPANDING" if __sqlalchemy_version_info < (1, 4) else "POSTCOMPILE"
283293
)
284294

285-
__in_expanding_bind = _helpers.substitute_re_method(
286-
fr" IN \((\[" fr"{__expandng_text}" fr"_[^\]]+\](:[A-Z0-9]+)?)\)$",
287-
re.IGNORECASE,
288-
r" IN UNNEST([ \1 ])",
295+
__in_expanding_bind = _helpers.substitute_string_re_method(
296+
fr"""
297+
\sIN\s\( # ' IN ('
298+
(
299+
\[ # Expanding placeholder
300+
{__expandng_text} # e.g. [EXPANDING_foo_1]
301+
_[^\]]+ #
302+
\]
303+
(:[A-Z0-9]+)? # type marker (e.g. ':INT64'
304+
)
305+
\)$ # close w ending )
306+
""",
307+
flags=re.IGNORECASE | re.VERBOSE,
308+
repl=r" IN UNNEST([ \1 ])",
289309
)
290310

291311
def visit_in_op_binary(self, binary, operator_, **kw):
@@ -360,6 +380,18 @@ def visit_notendswith_op_binary(self, binary, operator, **kw):
360380

361381
__expanded_param = re.compile(fr"\(\[" fr"{__expandng_text}" fr"_[^\]]+\]\)$").match
362382

383+
__remove_type_parameter = _helpers.substitute_string_re_method(
384+
r"""
385+
(STRING|BYTES|NUMERIC|BIGNUMERIC) # Base type
386+
\( # Dimensions e.g. '(42)', '(4, 2)':
387+
\s*\d+\s* # First dimension
388+
(?:,\s*\d+\s*)* # Remaining dimensions
389+
\)
390+
""",
391+
repl=r"\1",
392+
flags=re.VERBOSE | re.IGNORECASE,
393+
)
394+
363395
def visit_bindparam(
364396
self,
365397
bindparam,
@@ -397,6 +429,7 @@ def visit_bindparam(
397429
if bq_type[-1] == ">" and bq_type.startswith("ARRAY<"):
398430
# Values get arrayified at a lower level.
399431
bq_type = bq_type[6:-1]
432+
bq_type = self.__remove_type_parameter(bq_type)
400433

401434
assert_(param != "%s", f"Unexpected param: {param}")
402435

@@ -429,6 +462,10 @@ def visit_FLOAT(self, type_, **kw):
429462
visit_REAL = visit_FLOAT
430463

431464
def visit_STRING(self, type_, **kw):
465+
if (type_.length is not None) and isinstance(
466+
kw.get("type_expression"), Column
467+
): # column def
468+
return f"STRING({type_.length})"
432469
return "STRING"
433470

434471
visit_CHAR = visit_NCHAR = visit_STRING
@@ -438,17 +475,29 @@ def visit_ARRAY(self, type_, **kw):
438475
return "ARRAY<{}>".format(self.process(type_.item_type, **kw))
439476

440477
def visit_BINARY(self, type_, **kw):
478+
if type_.length is not None:
479+
return f"BYTES({type_.length})"
441480
return "BYTES"
442481

443482
visit_VARBINARY = visit_BINARY
444483

445484
def visit_NUMERIC(self, type_, **kw):
446-
if (type_.precision is not None and type_.precision > 38) or (
447-
type_.scale is not None and type_.scale > 9
448-
):
449-
return "BIGNUMERIC"
485+
if (type_.precision is not None) and isinstance(
486+
kw.get("type_expression"), Column
487+
): # column def
488+
if type_.scale is not None:
489+
suffix = f"({type_.precision}, {type_.scale})"
490+
else:
491+
suffix = f"({type_.precision})"
450492
else:
451-
return "NUMERIC"
493+
suffix = ""
494+
495+
return (
496+
"BIGNUMERIC"
497+
if (type_.precision is not None and type_.precision > 38)
498+
or (type_.scale is not None and type_.scale > 9)
499+
else "NUMERIC"
500+
) + suffix
452501

453502
visit_DECIMAL = visit_NUMERIC
454503

@@ -800,18 +849,16 @@ def _get_columns_helper(self, columns, cur_columns):
800849
"""
801850
results = []
802851
for col in columns:
803-
results += [
804-
SchemaField(
805-
name=".".join(col.name for col in cur_columns + [col]),
806-
field_type=col.field_type,
807-
mode=col.mode,
808-
description=col.description,
809-
fields=col.fields,
810-
)
811-
]
852+
results += [col]
812853
if col.field_type == "RECORD":
813854
cur_columns.append(col)
814-
results += self._get_columns_helper(col.fields, cur_columns)
855+
fields = [
856+
SchemaField.from_api_repr(
857+
dict(f.to_api_repr(), name=f"{col.name}.{f.name}")
858+
)
859+
for f in col.fields
860+
]
861+
results += self._get_columns_helper(fields, cur_columns)
815862
cur_columns.pop()
816863
return results
817864

@@ -829,13 +876,21 @@ def get_columns(self, connection, table_name, schema=None, **kw):
829876
)
830877
coltype = types.NullType
831878

879+
if col.field_type.endswith("NUMERIC"):
880+
coltype = coltype(precision=col.precision, scale=col.scale)
881+
elif col.field_type == "STRING" or col.field_type == "BYTES":
882+
coltype = coltype(col.max_length)
883+
832884
result.append(
833885
{
834886
"name": col.name,
835887
"type": types.ARRAY(coltype) if col.mode == "REPEATED" else coltype,
836888
"nullable": col.mode == "NULLABLE" or col.mode == "REPEATED",
837889
"comment": col.description,
838890
"default": None,
891+
"precision": col.precision,
892+
"scale": col.scale,
893+
"max_length": col.max_length,
839894
}
840895
)
841896

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def readme():
6767
install_requires=[
6868
"google-api-core>=1.23.0", # Work-around bug in cloud core deps.
6969
"google-auth>=1.24.0,<2.0dev", # Work around pip wack.
70-
"google-cloud-bigquery>=2.16.1",
70+
"google-cloud-bigquery>=2.17.0",
7171
"sqlalchemy>=1.2.0,<1.5.0dev",
7272
"future",
7373
],

testing/constraints-3.6.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@
66
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
77
sqlalchemy==1.2.0
88
google-auth==1.24.0
9-
google-cloud-bigquery==2.16.1
9+
google-cloud-bigquery==2.17.0
1010
google-api-core==1.23.0

tests/sqlalchemy_dialect_compliance/test_dialect_compliance.py

-8
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ def literal(value):
7171

7272
else:
7373
from sqlalchemy.testing.suite import (
74-
ComponentReflectionTestExtra as _ComponentReflectionTestExtra,
7574
FetchLimitOffsetTest as _FetchLimitOffsetTest,
7675
RowCountTest as _RowCountTest,
7776
)
@@ -107,13 +106,6 @@ def test_limit_render_multiple_times(self, connection):
107106
# over the backquotes that we add everywhere. XXX Why do we do that?
108107
del PostCompileParamsTest
109108

110-
class ComponentReflectionTestExtra(_ComponentReflectionTestExtra):
111-
@pytest.mark.skip("BQ types don't have parameters like precision and length")
112-
def test_numeric_reflection(self):
113-
pass
114-
115-
test_varchar_reflection = test_numeric_reflection
116-
117109
class TimestampMicrosecondsTest(_TimestampMicrosecondsTest):
118110

119111
data = datetime.datetime(2012, 10, 15, 12, 57, 18, 396, tzinfo=pytz.UTC)

tests/system/test_sqlalchemy_bigquery.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
{"name": "timestamp", "type": types.TIMESTAMP(), "nullable": True, "default": None},
9393
{"name": "string", "type": types.String(), "nullable": True, "default": None},
9494
{"name": "float", "type": types.Float(), "nullable": True, "default": None},
95-
{"name": "numeric", "type": types.DECIMAL(), "nullable": True, "default": None},
95+
{"name": "numeric", "type": types.Numeric(), "nullable": True, "default": None},
9696
{"name": "boolean", "type": types.Boolean(), "nullable": True, "default": None},
9797
{"name": "date", "type": types.DATE(), "nullable": True, "default": None},
9898
{"name": "datetime", "type": types.DATETIME(), "nullable": True, "default": None},

0 commit comments

Comments
 (0)