Skip to content

Commit b7b6000

Browse files
author
Jim Fulton
authored
feat: Add support for SQLAlchemy 1.4 (#177)
1 parent 9dd3cf4 commit b7b6000

19 files changed

+499
-148
lines changed

noxfile.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@
2828
BLACK_PATHS = ["docs", "pybigquery", "tests", "noxfile.py", "setup.py"]
2929

3030
DEFAULT_PYTHON_VERSION = "3.8"
31-
SYSTEM_TEST_PYTHON_VERSIONS = ["3.9"]
31+
32+
# We're using two Python versions to test with sqlalchemy 1.3 and 1.4.
33+
SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.9"]
3234
UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"]
3335

3436
CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
@@ -47,6 +49,7 @@
4749

4850
# Error if a python version is missing
4951
nox.options.error_on_missing_interpreters = True
52+
nox.options.stop_on_first_error = True
5053

5154

5255
@nox.session(python=DEFAULT_PYTHON_VERSION)

pybigquery/_helpers.py

+22
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
# license that can be found in the LICENSE file or at
55
# https://opensource.org/licenses/MIT.
66

7+
import functools
8+
import re
9+
710
from google.api_core import client_info
811
import google.auth
912
from google.cloud import bigquery
@@ -58,3 +61,22 @@ def create_bigquery_client(
5861
location=location,
5962
default_query_job_config=default_query_job_config,
6063
)
64+
65+
66+
def substitute_re_method(r, flags=0, repl=None):
67+
if repl is None:
68+
return lambda f: substitute_re_method(r, flags, f)
69+
70+
r = re.compile(r, flags)
71+
72+
if isinstance(repl, str):
73+
return lambda self, s: r.sub(repl, s)
74+
75+
@functools.wraps(repl)
76+
def sub(self, s, *args, **kw):
77+
def repl_(m):
78+
return repl(self, m, *args, **kw)
79+
80+
return r.sub(repl_, s)
81+
82+
return sub

pybigquery/parse_url.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def parse_boolean(bool_string):
4444

4545

4646
def parse_url(url): # noqa: C901
47-
query = url.query
47+
query = dict(url.query) # need mutable query.
4848

4949
# use_legacy_sql (legacy)
5050
if "use_legacy_sql" in query:

pybigquery/requirements.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,14 @@ def comment_reflection(self):
154154
def unicode_ddl(self):
155155
"""Target driver must support some degree of non-ascii symbol
156156
names.
157+
158+
However:
159+
160+
Must contain only letters (a-z, A-Z), numbers (0-9), or underscores (_)
161+
162+
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#column_name_and_column_schema
157163
"""
158-
return supported()
164+
return unsupported()
159165

160166
@property
161167
def datetime_literals(self):
@@ -219,6 +225,14 @@ def order_by_label_with_expression(self):
219225
"""
220226
return supported()
221227

228+
@property
229+
def sql_expression_limit_offset(self):
230+
"""target database can render LIMIT and/or OFFSET with a complete
231+
SQL expression, such as one that uses the addition operator.
232+
parameter
233+
"""
234+
return unsupported()
235+
222236

223237
class WithSchemas(Requirements):
224238
"""

pybigquery/sqlalchemy_bigquery.py

+77-33
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from google.cloud.bigquery.table import TableReference
3535
from google.api_core.exceptions import NotFound
3636

37+
import sqlalchemy
3738
import sqlalchemy.sql.sqltypes
3839
import sqlalchemy.sql.type_api
3940
from sqlalchemy.exc import NoSuchTableError
@@ -57,6 +58,11 @@
5758
FIELD_ILLEGAL_CHARACTERS = re.compile(r"[^\w]+")
5859

5960

61+
def assert_(cond, message="Assertion failed"): # pragma: NO COVER
62+
if not cond:
63+
raise AssertionError(message)
64+
65+
6066
class BigQueryIdentifierPreparer(IdentifierPreparer):
6167
"""
6268
Set containing everything
@@ -152,15 +158,25 @@ def get_insert_default(self, column): # pragma: NO COVER
152158
elif isinstance(column.type, String):
153159
return str(uuid.uuid4())
154160

155-
def pre_exec(
156-
self,
157-
in_sub=re.compile(
158-
r" IN UNNEST\(\[ "
159-
r"(%\([^)]+_\d+\)s(?:, %\([^)]+_\d+\)s)*)?" # Placeholders. See below.
160-
r":([A-Z0-9]+)" # Type
161-
r" \]\)"
162-
).sub,
163-
):
161+
__remove_type_from_empty_in = _helpers.substitute_re_method(
162+
r" IN UNNEST\(\[ ("
163+
r"(?:NULL|\(NULL(?:, NULL)+\))\)"
164+
r" (?:AND|OR) \(1 !?= 1"
165+
r")"
166+
r"(?:[:][A-Z0-9]+)?"
167+
r" \]\)",
168+
re.IGNORECASE,
169+
r" IN(\1)",
170+
)
171+
172+
@_helpers.substitute_re_method(
173+
r" IN UNNEST\(\[ "
174+
r"(%\([^)]+_\d+\)s(?:, %\([^)]+_\d+\)s)*)?" # Placeholders. See below.
175+
r":([A-Z0-9]+)" # Type
176+
r" \]\)",
177+
re.IGNORECASE,
178+
)
179+
def __distribute_types_to_expanded_placeholders(self, m):
164180
# If we have an in parameter, it sometimes gets expaned to 0 or more
165181
# parameters and we need to move the type marker to each
166182
# parameter.
@@ -171,29 +187,29 @@ def pre_exec(
171187
# suffixes refect that when an array parameter is expanded,
172188
# numeric suffixes are added. For example, a placeholder like
173189
# `%(foo)s` gets expaneded to `%(foo_0)s, `%(foo_1)s, ...`.
190+
placeholders, type_ = m.groups()
191+
if placeholders:
192+
placeholders = placeholders.replace(")", f":{type_})")
193+
else:
194+
placeholders = ""
195+
return f" IN UNNEST([ {placeholders} ])"
174196

175-
def repl(m):
176-
placeholders, type_ = m.groups()
177-
if placeholders:
178-
placeholders = placeholders.replace(")", f":{type_})")
179-
else:
180-
placeholders = ""
181-
return f" IN UNNEST([ {placeholders} ])"
182-
183-
self.statement = in_sub(repl, self.statement)
197+
def pre_exec(self):
198+
self.statement = self.__distribute_types_to_expanded_placeholders(
199+
self.__remove_type_from_empty_in(self.statement)
200+
)
184201

185202

186203
class BigQueryCompiler(SQLCompiler):
187204

188205
compound_keywords = SQLCompiler.compound_keywords.copy()
189-
compound_keywords[selectable.CompoundSelect.UNION] = "UNION ALL"
206+
compound_keywords[selectable.CompoundSelect.UNION] = "UNION DISTINCT"
207+
compound_keywords[selectable.CompoundSelect.UNION_ALL] = "UNION ALL"
190208

191-
def __init__(self, dialect, statement, column_keys=None, inline=False, **kwargs):
209+
def __init__(self, dialect, statement, *args, **kwargs):
192210
if isinstance(statement, Column):
193211
kwargs["compile_kwargs"] = util.immutabledict({"include_table": False})
194-
super(BigQueryCompiler, self).__init__(
195-
dialect, statement, column_keys, inline, **kwargs
196-
)
212+
super(BigQueryCompiler, self).__init__(dialect, statement, *args, **kwargs)
197213

198214
def visit_insert(self, insert_stmt, asfrom=False, **kw):
199215
# The (internal) documentation for `inline` is confusing, but
@@ -260,24 +276,37 @@ def group_by_clause(self, select, **kw):
260276
# no way to tell sqlalchemy that, so it works harder than
261277
# necessary and makes us do the same.
262278

263-
_in_expanding_bind = re.compile(r" IN \((\[EXPANDING_\w+\](:[A-Z0-9]+)?)\)$")
279+
__sqlalchemy_version_info = tuple(map(int, sqlalchemy.__version__.split(".")))
264280

265-
def _unnestify_in_expanding_bind(self, in_text):
266-
return self._in_expanding_bind.sub(r" IN UNNEST([ \1 ])", in_text)
281+
__expandng_text = (
282+
"EXPANDING" if __sqlalchemy_version_info < (1, 4) else "POSTCOMPILE"
283+
)
284+
285+
__in_expanding_bind = _helpers.substitute_re_method(
286+
fr" IN \((\[" fr"{__expandng_text}" fr"_[^\]]+\](:[A-Z0-9]+)?)\)$",
287+
re.IGNORECASE,
288+
r" IN UNNEST([ \1 ])",
289+
)
267290

268291
def visit_in_op_binary(self, binary, operator_, **kw):
269-
return self._unnestify_in_expanding_bind(
292+
return self.__in_expanding_bind(
270293
self._generate_generic_binary(binary, " IN ", **kw)
271294
)
272295

273296
def visit_empty_set_expr(self, element_types):
274297
return ""
275298

276-
def visit_notin_op_binary(self, binary, operator, **kw):
277-
return self._unnestify_in_expanding_bind(
278-
self._generate_generic_binary(binary, " NOT IN ", **kw)
299+
def visit_not_in_op_binary(self, binary, operator, **kw):
300+
return (
301+
"("
302+
+ self.__in_expanding_bind(
303+
self._generate_generic_binary(binary, " NOT IN ", **kw)
304+
)
305+
+ ")"
279306
)
280307

308+
visit_notin_op_binary = visit_not_in_op_binary # before 1.4
309+
281310
############################################################################
282311

283312
############################################################################
@@ -327,6 +356,10 @@ def visit_notendswith_op_binary(self, binary, operator, **kw):
327356

328357
############################################################################
329358

359+
__placeholder = re.compile(r"%\(([^\]:]+)(:[^\]:]+)?\)s$").match
360+
361+
__expanded_param = re.compile(fr"\(\[" fr"{__expandng_text}" fr"_[^\]]+\]\)$").match
362+
330363
def visit_bindparam(
331364
self,
332365
bindparam,
@@ -365,8 +398,20 @@ def visit_bindparam(
365398
# Values get arrayified at a lower level.
366399
bq_type = bq_type[6:-1]
367400

368-
assert param != "%s"
369-
return param.replace(")", f":{bq_type})")
401+
assert_(param != "%s", f"Unexpected param: {param}")
402+
403+
if bindparam.expanding:
404+
assert_(self.__expanded_param(param), f"Unexpected param: {param}")
405+
param = param.replace(")", f":{bq_type})")
406+
407+
else:
408+
m = self.__placeholder(param)
409+
if m:
410+
name, type_ = m.groups()
411+
assert_(type_ is None)
412+
param = f"%({name}:{bq_type})s"
413+
414+
return param
370415

371416

372417
class BigQueryTypeCompiler(GenericTypeCompiler):
@@ -541,7 +586,6 @@ class BigQueryDialect(DefaultDialect):
541586
supports_unicode_statements = True
542587
supports_unicode_binds = True
543588
supports_native_decimal = True
544-
returns_unicode_strings = True
545589
description_encoding = None
546590
supports_native_boolean = True
547591
supports_simple_order_by_label = True

setup.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ def readme():
6565
],
6666
platforms="Posix; MacOS X; Windows",
6767
install_requires=[
68-
"sqlalchemy>=1.2.0,<1.4.0dev",
69-
"google-auth>=1.24.0,<2.0dev", # Work around pip wack.
70-
"google-cloud-bigquery>=2.15.0",
7168
"google-api-core>=1.23.0", # Work-around bug in cloud core deps.
69+
"google-auth>=1.24.0,<2.0dev", # Work around pip wack.
70+
"google-cloud-bigquery>=2.16.1",
71+
"sqlalchemy>=1.2.0,<1.5.0dev",
7272
"future",
7373
],
7474
python_requires=">=3.6, <3.10",

testing/constraints-3.6.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@
66
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
77
sqlalchemy==1.2.0
88
google-auth==1.24.0
9-
google-cloud-bigquery==2.15.0
9+
google-cloud-bigquery==2.16.1
1010
google-api-core==1.23.0

testing/constraints-3.8.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
sqlalchemy==1.3.24

testing/constraints-3.9.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
sqlalchemy>=1.4.13

tests/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,8 @@
2020
from sqlalchemy.dialects import registry
2121

2222
registry.register("bigquery", "pybigquery.sqlalchemy_bigquery", "BigQueryDialect")
23+
24+
# sqlalchemy's dialect-testing machinery wants an entry like this. It is wack. :(
25+
registry.register(
26+
"bigquery.bigquery", "pybigquery.sqlalchemy_bigquery", "BigQueryDialect"
27+
)

tests/sqlalchemy_dialect_compliance/conftest.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919

2020
import contextlib
2121
import random
22+
import re
2223
import traceback
2324

24-
import sqlalchemy
2525
from sqlalchemy.testing import config
2626
from sqlalchemy.testing.plugin.pytestplugin import * # noqa
2727
from sqlalchemy.testing.plugin.pytestplugin import (
@@ -35,23 +35,28 @@
3535
pybigquery.sqlalchemy_bigquery.BigQueryDialect.preexecute_autoincrement_sequences = True
3636
google.cloud.bigquery.dbapi.connection.Connection.rollback = lambda self: None
3737

38+
_where = re.compile(r"\s+WHERE\s+", re.IGNORECASE).search
3839

3940
# BigQuery requires delete statements to have where clauses. Other
4041
# databases don't and sqlalchemy doesn't include where clauses when
4142
# cleaning up test data. So we add one when we see a delete without a
4243
# where clause when tearing down tests. We only do this during tear
4344
# down, by inspecting the stack, because we don't want to hide bugs
4445
# outside of test house-keeping.
45-
def visit_delete(self, delete_stmt, *args, **kw):
46-
if delete_stmt._whereclause is None and "teardown" in set(
47-
f.name for f in traceback.extract_stack()
48-
):
49-
delete_stmt._whereclause = sqlalchemy.true()
5046

51-
return super(pybigquery.sqlalchemy_bigquery.BigQueryCompiler, self).visit_delete(
47+
48+
def visit_delete(self, delete_stmt, *args, **kw):
49+
text = super(pybigquery.sqlalchemy_bigquery.BigQueryCompiler, self).visit_delete(
5250
delete_stmt, *args, **kw
5351
)
5452

53+
if not _where(text) and any(
54+
"teardown" in f.name.lower() for f in traceback.extract_stack()
55+
):
56+
text += " WHERE true"
57+
58+
return text
59+
5560

5661
pybigquery.sqlalchemy_bigquery.BigQueryCompiler.visit_delete = visit_delete
5762

0 commit comments

Comments
 (0)