Skip to content

Commit e0f1496

Browse files
authored
feat: Feature/list tables page size (#174)
1 parent c8e2cfe commit e0f1496

File tree

8 files changed

+92
-14
lines changed

8 files changed

+92
-14
lines changed

README.rst

+11-1
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,15 @@ By default, ``arraysize`` is set to ``5000``. ``arraysize`` is used to set the b
148148
149149
engine = create_engine('bigquery://project', arraysize=1000)
150150
151+
Page size for dataset.list_tables
152+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
153+
154+
By default, ``list_tables_page_size`` is set to ``1000``. ``list_tables_page_size`` is used to set the max_results for `dataset.list_tables`_ operation. To change it, pass ``list_tables_page_size`` to ``create_engine()``:
155+
156+
.. _`dataset.list_tables`: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
157+
.. code-block:: python
158+
159+
engine = create_engine('bigquery://project', list_tables_page_size=100)
151160
152161
Adding a Default Dataset
153162
^^^^^^^^^^^^^^^^^^^^^^^^
@@ -180,7 +189,7 @@ Connection String Parameters
180189

181190
There are many situations where you can't call ``create_engine`` directly, such as when using tools like `Flask SQLAlchemy <http://flask-sqlalchemy.pocoo.org/2.3/>`_. For situations like these, or for situations where you want the ``Client`` to have a `default_query_job_config <https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.client.Client.html#google.cloud.bigquery.client.Client>`_, you can pass many arguments in the query of the connection string.
182191

183-
The ``credentials_path``, ``credentials_info``, ``location``, and ``arraysize`` parameters are used by this library, and the rest are used to create a `QueryJobConfig <https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig>`_
192+
The ``credentials_path``, ``credentials_info``, ``location``, ``arraysize`` and ``list_tables_page_size`` parameters are used by this library, and the rest are used to create a `QueryJobConfig <https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery/generated/google.cloud.bigquery.job.QueryJobConfig.html#google.cloud.bigquery.job.QueryJobConfig>`_
184193

185194
Note that if you want to use query strings, it will be more reliable if you use three slashes, so ``'bigquery:///?a=b'`` will work reliably, but ``'bigquery://?a=b'`` might be interpreted as having a "database" of ``?a=b``, depending on the system being used to parse the connection string.
186195

@@ -193,6 +202,7 @@ Here are examples of all the supported arguments. Any not present are either for
193202
'credentials_path=/some/path/to.json' '&'
194203
'location=some-location' '&'
195204
'arraysize=1000' '&'
205+
'list_tables_page_size=100' '&'
196206
'clustering_fields=a,b,c' '&'
197207
'create_disposition=CREATE_IF_NEEDED' '&'
198208
'destination=different-project.different-dataset.table' '&'

pybigquery/parse_url.py

+30-2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def parse_url(url): # noqa: C901
6868
dataset_id = url.database or None
6969
arraysize = None
7070
credentials_path = None
71+
list_tables_page_size = None
7172

7273
# location
7374
if "location" in query:
@@ -85,6 +86,16 @@ def parse_url(url): # noqa: C901
8586
except ValueError:
8687
raise ValueError("invalid int in url query arraysize: " + str_arraysize)
8788

89+
if "list_tables_page_size" in query:
90+
str_list_tables_page_size = query.pop("list_tables_page_size")
91+
try:
92+
list_tables_page_size = int(str_list_tables_page_size)
93+
except ValueError:
94+
raise ValueError(
95+
"invalid int in url query list_tables_page_size: "
96+
+ str_list_tables_page_size
97+
)
98+
8899
# if only these "non-config" values were present, the dict will now be empty
89100
if not query:
90101
# if a dataset_id exists, we need to return a job_config that isn't None
@@ -97,9 +108,18 @@ def parse_url(url): # noqa: C901
97108
arraysize,
98109
credentials_path,
99110
QueryJobConfig(),
111+
list_tables_page_size,
100112
)
101113
else:
102-
return project_id, location, dataset_id, arraysize, credentials_path, None
114+
return (
115+
project_id,
116+
location,
117+
dataset_id,
118+
arraysize,
119+
credentials_path,
120+
None,
121+
list_tables_page_size,
122+
)
103123

104124
job_config = QueryJobConfig()
105125

@@ -239,4 +259,12 @@ def parse_url(url): # noqa: C901
239259
"invalid write_disposition in url query: " + query["write_disposition"]
240260
)
241261

242-
return project_id, location, dataset_id, arraysize, credentials_path, job_config
262+
return (
263+
project_id,
264+
location,
265+
dataset_id,
266+
arraysize,
267+
credentials_path,
268+
job_config,
269+
list_tables_page_size,
270+
)

pybigquery/sqlalchemy_bigquery.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,7 @@ def __init__(
657657
credentials_path=None,
658658
location=None,
659659
credentials_info=None,
660+
list_tables_page_size=1000,
660661
*args,
661662
**kwargs,
662663
):
@@ -666,6 +667,7 @@ def __init__(
666667
self.credentials_info = credentials_info
667668
self.location = location
668669
self.dataset_id = None
670+
self.list_tables_page_size = list_tables_page_size
669671

670672
@classmethod
671673
def dbapi(cls):
@@ -694,9 +696,11 @@ def create_connect_args(self, url):
694696
arraysize,
695697
credentials_path,
696698
default_query_job_config,
699+
list_tables_page_size,
697700
) = parse_url(url)
698701

699-
self.arraysize = self.arraysize or arraysize
702+
self.arraysize = arraysize or self.arraysize
703+
self.list_tables_page_size = list_tables_page_size or self.list_tables_page_size
700704
self.location = location or self.location
701705
self.credentials_path = credentials_path or self.credentials_path
702706
self.dataset_id = dataset_id
@@ -737,7 +741,9 @@ def _get_table_or_view_names(self, connection, table_type, schema=None):
737741
continue
738742

739743
try:
740-
tables = client.list_tables(dataset.reference)
744+
tables = client.list_tables(
745+
dataset.reference, page_size=self.list_tables_page_size
746+
)
741747
for table in tables:
742748
if table_type == table.table_type:
743749
result.append(get_table_name(table))

setup.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ def readme():
6565
],
6666
platforms="Posix; MacOS X; Windows",
6767
install_requires=[
68-
"google-api-core>=1.23.0", # Work-around bug in cloud core deps.
69-
"google-auth>=1.24.0,<2.0dev", # Work around pip wack.
70-
"google-cloud-bigquery>=2.17.0",
68+
"google-api-core>=1.30.0", # Work-around bug in cloud core deps.
69+
"google-auth>=1.25.0,<2.0dev", # Work around pip wack.
70+
"google-cloud-bigquery>=2.19.0",
7171
"sqlalchemy>=1.2.0,<1.5.0dev",
7272
"future",
7373
],

testing/constraints-3.6.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55
#
66
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
77
sqlalchemy==1.2.0
8-
google-auth==1.24.0
9-
google-cloud-bigquery==2.17.0
10-
google-api-core==1.23.0
8+
google-auth==1.25.0
9+
google-cloud-bigquery==2.19.0
10+
google-api-core==1.30.0

tests/unit/fauxdbi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ def list_datasets(self):
462462
google.cloud.bigquery.Dataset("myproject.yourdataset"),
463463
]
464464

465-
def list_tables(self, dataset):
465+
def list_tables(self, dataset, page_size):
466466
with contextlib.closing(self.connection.connection.cursor()) as cursor:
467467
cursor.execute("select * from sqlite_master")
468468
return [

tests/unit/test_engine.py

+12
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,15 @@ def test_set_arraysize(faux_conn, metadata):
5252

5353
# Because we gave a false array size, the array size wasn't set on the cursor:
5454
assert conn.connection.test_data["arraysize"] == 42
55+
56+
57+
def test_arraysize_querystring_takes_precedence_over_default(faux_conn, metadata):
58+
arraysize = 42
59+
engine = sqlalchemy.create_engine(
60+
f"bigquery://myproject/mydataset?arraysize={arraysize}"
61+
)
62+
sqlalchemy.Table("t", metadata, sqlalchemy.Column("c", sqlalchemy.Integer))
63+
conn = engine.connect()
64+
metadata.create_all(engine)
65+
66+
assert conn.connection.test_data["arraysize"] == arraysize

tests/unit/test_parse_url.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def url_with_everything():
5050
"?credentials_path=/some/path/to.json"
5151
"&location=some-location"
5252
"&arraysize=1000"
53+
"&list_tables_page_size=5000"
5354
"&clustering_fields=a,b,c"
5455
"&create_disposition=CREATE_IF_NEEDED"
5556
"&destination=different-project.different-dataset.table"
@@ -72,12 +73,14 @@ def test_basic(url_with_everything):
7273
arraysize,
7374
credentials_path,
7475
job_config,
76+
list_tables_page_size,
7577
) = parse_url(url_with_everything)
7678

7779
assert project_id == "some-project"
7880
assert location == "some-location"
7981
assert dataset_id == "some-dataset"
8082
assert arraysize == 1000
83+
assert list_tables_page_size == 5000
8184
assert credentials_path == "/some/path/to.json"
8285
assert isinstance(job_config, QueryJobConfig)
8386

@@ -136,6 +139,7 @@ def test_all_values(url_with_everything, param, value, default):
136139
"param, value",
137140
[
138141
("arraysize", "not-int"),
142+
("list_tables_page_size", "not-int"),
139143
("create_disposition", "not-attribute"),
140144
("destination", "not.fully-qualified"),
141145
("dry_run", "not-bool"),
@@ -167,25 +171,43 @@ def test_empty_with_non_config():
167171
"bigquery:///?location=some-location&arraysize=1000&credentials_path=/some/path/to.json"
168172
)
169173
)
170-
project_id, location, dataset_id, arraysize, credentials_path, job_config = url
174+
(
175+
project_id,
176+
location,
177+
dataset_id,
178+
arraysize,
179+
credentials_path,
180+
job_config,
181+
list_tables_page_size,
182+
) = url
171183

172184
assert project_id is None
173185
assert location == "some-location"
174186
assert dataset_id is None
175187
assert arraysize == 1000
176188
assert credentials_path == "/some/path/to.json"
177189
assert job_config is None
190+
assert list_tables_page_size is None
178191

179192

180193
def test_only_dataset():
181194
url = parse_url(make_url("bigquery:///some-dataset"))
182-
project_id, location, dataset_id, arraysize, credentials_path, job_config = url
195+
(
196+
project_id,
197+
location,
198+
dataset_id,
199+
arraysize,
200+
credentials_path,
201+
job_config,
202+
list_tables_page_size,
203+
) = url
183204

184205
assert project_id is None
185206
assert location is None
186207
assert dataset_id == "some-dataset"
187208
assert arraysize is None
188209
assert credentials_path is None
210+
assert list_tables_page_size is None
189211
assert isinstance(job_config, QueryJobConfig)
190212
# we can't actually test that the dataset is on the job_config,
191213
# since we take care of that afterwards, when we have a client to fill in the project

0 commit comments

Comments
 (0)