From 9cd7554d38058546114683d5f63b937255fbf81f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 27 Jul 2021 23:21:27 +0200 Subject: [PATCH 01/35] deps!: BigQuery Storage and pyarrow are required dependencies (#776) * process: make BQ Storage and pyarrow required * Make pyarrow required in _pandas_helpers.py * Make pyarrow required in client.py * Make pyarrow required in table.py * Make pyarrow required in job/query.py * Make pyarrow required in DBAPI tests * Make pyarrow required in snippets tests * Make BQ storage required in client.py * Make BQ storage required in table.py * Make BQ storage required in DB API tests * Make BQ storage required in magics.py * Make BQ storage required in test__helpers.py * Make BQ storage required in test__pandas_helpers.py * Make BQ storage required in test_query_pandas.py * Make method signatures compatible again The annotations caused a mismatch * Remove checks for minimum BQ Storage version Since this is now a required dependency, there should not be any more pip quirks that used to allow installing BQ Storage as an extra, but without always respecting its minimum version pin. * Remove LegacyBigQueryStorageError Since it will be released in a major version bump, we can make this a breaking change, i.e. without deprecation. * Bump minimum pyarrow version to 3.0.0 * Remove unneeded pytest.importorskip for BQ Storage * Remove pyarrow version checks in pandas helpers tests * Conditionally skip pandas tests where needed * Remove unneeded conditional pyarrow version paths * Cover schema autodetect failed code path in test * fix bad merge Co-authored-by: Tim Swast --- docs/snippets.py | 4 - google/cloud/bigquery/__init__.py | 3 - google/cloud/bigquery/_helpers.py | 26 --- google/cloud/bigquery/_pandas_helpers.py | 122 ++++------ google/cloud/bigquery/client.py | 67 +----- google/cloud/bigquery/exceptions.py | 21 -- google/cloud/bigquery/job/query.py | 11 +- google/cloud/bigquery/magics/magics.py | 11 - google/cloud/bigquery/table.py | 52 +---- setup.py | 24 +- testing/constraints-3.6.txt | 2 +- tests/system/test_client.py | 38 +-- tests/system/test_pandas.py | 35 ++- tests/unit/job/test_query_pandas.py | 27 +-- tests/unit/test__helpers.py | 37 --- tests/unit/test__pandas_helpers.py | 145 ++---------- tests/unit/test_client.py | 281 +++++------------------ tests/unit/test_dbapi__helpers.py | 6 - tests/unit/test_dbapi_connection.py | 22 +- tests/unit/test_dbapi_cursor.py | 26 +-- tests/unit/test_magics.py | 79 +------ tests/unit/test_table.py | 155 +------------ 22 files changed, 196 insertions(+), 998 deletions(-) delete mode 100644 google/cloud/bigquery/exceptions.py diff --git a/docs/snippets.py b/docs/snippets.py index 3f9b9a88c..82e07901e 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -30,10 +30,6 @@ import pandas except (ImportError, AttributeError): pandas = None -try: - import pyarrow -except (ImportError, AttributeError): - pyarrow = None from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 222aadcc9..59bb08ce5 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -42,7 +42,6 @@ from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -171,8 +170,6 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", - # Custom exceptions - "LegacyBigQueryStorageError", ] diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index bf0f80e22..3d83ddee9 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -28,8 +28,6 @@ from google.cloud._helpers import _to_bytes import packaging.version -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -41,7 +39,6 @@ re.VERBOSE, ) -_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") @@ -75,29 +72,6 @@ def is_read_session_optional(self) -> bool: """ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION - def verify_version(self): - """Verify that a recent enough version of BigQuery Storage extra is - installed. - - The function assumes that google-cloud-bigquery-storage extra is - installed, and should thus be used in places where this assumption - holds. - - Because `pip` can install an outdated version of this extra despite the - constraints in `setup.py`, the calling code can use this helper to - verify the version compatibility at runtime. - - Raises: - LegacyBigQueryStorageError: - If the google-cloud-bigquery-storage package is outdated. - """ - if self.installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {self.installed_version})." - ) - raise LegacyBigQueryStorageError(msg) - BQ_STORAGE_VERSIONS = BQStorageVersions() diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index b381fa5f7..817930ddd 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,18 +20,13 @@ import queue import warnings -from packaging import version - try: import pandas except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.parquet -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow +import pyarrow.parquet try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -106,63 +101,52 @@ def pyarrow_timestamp(): return pyarrow.timestamp("us", tz="UTC") -if pyarrow: - # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py - # When modifying it be sure to update it there as well. - BQ_TO_ARROW_SCALARS = { - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, - } - ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False - -else: # pragma: NO COVER - BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER - ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER +# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py +# When modifying it be sure to update it there as well. +BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, +} +ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", +} def bq_to_arrow_struct_data_type(field): @@ -346,13 +330,6 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # If schema detection was not successful for all columns, also try with # pyarrow, if available. if unknown_type_fields: - if not pyarrow: - msg = u"Could not determine the type of columns: {}".format( - ", ".join(field.name for field in unknown_type_fields) - ) - warnings.warn(msg) - return None # We cannot detect the schema in full. - # The augment_schema() helper itself will also issue unknown type # warnings if detection still fails for any of the fields. bq_schema_out = augment_schema(dataframe, bq_schema_out) @@ -494,9 +471,6 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN serializing method. Defaults to "SNAPPY". https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table """ - if pyarrow is None: - raise ValueError("pyarrow is required for BigQuery schema conversion.") - bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 742ecac2e..acae2fe36 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,19 +27,11 @@ import json import math import os -import packaging.version import tempfile from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid import warnings -try: - import pyarrow - - _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload @@ -53,18 +45,14 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # pytype: disable=import-error -try: - from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( - DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, - ) -except ImportError: - DEFAULT_BQSTORAGE_CLIENT_INFO = None +from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, +) from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -72,7 +60,6 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -121,9 +108,6 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 -# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 -_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) - class Project(object): """Wrapper for resource describing a BigQuery project. @@ -483,17 +467,10 @@ def _ensure_bqstorage_client( ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to a missing or outdated dependency - `google-cloud-bigquery-storage`, raise a warning and return ``None``. - - If the `bqstorage_client` argument is not ``None``, still perform the version - check and return the argument back to the caller if the check passes. If it - fails, raise a warning and return ``None``. - Args: bqstorage_client: - An existing BigQuery Storage client instance to check for version - compatibility. If ``None``, a new instance is created and returned. + An existing BigQuery Storage client instance. If ``None``, a new + instance is created and returned. client_options: Custom options used with a new BigQuery Storage client instance if one is created. @@ -504,20 +481,7 @@ def _ensure_bqstorage_client( Returns: A BigQuery Storage API client. """ - try: - from google.cloud import bigquery_storage - except ImportError: - warnings.warn( - "Cannot create BigQuery Storage client, the dependency " - "google-cloud-bigquery-storage is not installed." - ) - return None - - try: - BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return None + from google.cloud import bigquery_storage if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( @@ -2496,7 +2460,7 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. - Indexes are not loaded. Requires the :mod:`pyarrow` library. + Indexes are not loaded. By default, this method uses the parquet source format. To override this, supply a value for @@ -2526,9 +2490,6 @@ def load_table_from_dataframe( google.cloud.bigquery.job.LoadJob: A new load job. Raises: - ValueError: - If a usable parquet engine cannot be found. This method - requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. @@ -2556,10 +2517,6 @@ def load_table_from_dataframe( ) ) - if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: - # pyarrow is now the only supported parquet engine. - raise ValueError("This method requires pyarrow to be installed") - if location is None: location = self.location @@ -2615,16 +2572,6 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: - if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: - msg = ( - "Loading dataframe data in PARQUET format with pyarrow " - f"{_PYARROW_VERSION} can result in data corruption. It is " - "therefore *strongly* advised to use a different pyarrow " - "version or a different source format. " - "See: https://github.com/googleapis/python-bigquery/issues/781" - ) - warnings.warn(msg, category=RuntimeWarning) - if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py deleted file mode 100644 index 6e5c27eb1..000000000 --- a/google/cloud/bigquery/exceptions.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class BigQueryError(Exception): - """Base class for all custom exceptions defined by the BigQuery client.""" - - -class LegacyBigQueryStorageError(BigQueryError): - """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 2cb7ee28e..f4a78a9da 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1342,7 +1342,7 @@ def result( def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, ) -> "pyarrow.Table": @@ -1373,8 +1373,7 @@ def to_arrow( BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. Reading from a specific partition or snapshot is not currently supported by this method. @@ -1399,10 +1398,6 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) @@ -1417,7 +1412,7 @@ def to_arrow( # that should only exist here in the QueryJob method. def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2b8c2928e..dce911232 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -766,17 +766,6 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError as err: - customized_error = ImportError( - "The default BigQuery Storage API client cannot be used, install " - "the missing google-cloud-bigquery-storage and pyarrow packages " - "to use it. Alternatively, use the classic REST API by specifying " - "the --use_rest_api magic option." - ) - raise customized_error from err - try: from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 18d969a3f..a058dca91 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -30,10 +30,7 @@ except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator @@ -41,7 +38,6 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers -from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -50,21 +46,17 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER + from google.cloud import bigquery_storage + # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas - import pyarrow - from google.cloud import bigquery_storage _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) -_NO_PYARROW_ERROR = ( - "The pyarrow library is not installed, please install " - "pyarrow to use the to_arrow() function." -) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -1559,17 +1551,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False - try: - from google.cloud import bigquery_storage # noqa: F401 - except ImportError: - return False - - try: - _helpers.BQ_STORAGE_VERSIONS.verify_version() - except LegacyBigQueryStorageError as exc: - warnings.warn(str(exc)) - return False - return True def _get_next_page_response(self): @@ -1641,7 +1622,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): def to_arrow( self, progress_bar_type: str = None, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a @@ -1670,8 +1651,7 @@ def to_arrow( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. This API is a billable API. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1692,14 +1672,8 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. - Raises: - ValueError: If the :mod:`pyarrow` library cannot be imported. - .. versionadded:: 1.17.0 """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1743,7 +1717,7 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, ) -> "pandas.DataFrame": @@ -1754,8 +1728,7 @@ def to_dataframe_iterable( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1818,7 +1791,7 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, @@ -1831,8 +1804,7 @@ def to_dataframe( A BigQuery Storage API client. If supplied, use the faster BigQuery Storage API to fetch rows from BigQuery. - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. + This method requires ``google-cloud-bigquery-storage`` library. This method only exposes a subset of the capabilities of the BigQuery Storage API. For full access to all features @@ -1886,9 +1858,7 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1` module is - required but cannot be imported. + If the :mod:`pandas` library cannot be imported. """ if pandas is None: @@ -1974,8 +1944,6 @@ def to_arrow( Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. """ - if pyarrow is None: - raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) def to_dataframe( diff --git a/setup.py b/setup.py index 0ca19b576..91458bb78 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.29.0, <3.0.0dev", "proto-plus >= 1.10.0", + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 @@ -42,22 +43,14 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "pyarrow >= 3.0.0, < 5.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 5.0dev", - ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], + # Keep the no-op bqstorage extra for backward compatibility. + # See: https://github.com/googleapis/python-bigquery/issues/757 + "bqstorage": [], + "pandas": ["pandas>=0.23.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", @@ -69,11 +62,6 @@ all_extras = [] for extra in extras: - # Exclude this extra from all to avoid overly strict dependencies on core - # libraries such as pyarrow. - # https://github.com/googleapis/python-bigquery/issues/563 - if extra in {"bignumeric_type"}: - continue all_extras.extend(extras[extra]) extras["all"] = all_extras diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index af6e82efd..ce012f0d7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0 pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 -pyarrow==1.0.0 +pyarrow==3.0.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index baa2b6ad8..4970ef281 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -30,24 +30,15 @@ import psutil import pytest -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - try: import fastavro # to parse BQ storage client results except ImportError: # pragma: NO COVER fastavro = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - pyarrow = None +import pyarrow +import pyarrow.types from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest @@ -66,6 +57,7 @@ from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums +from google.cloud import bigquery_storage from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient @@ -1602,10 +1594,6 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -1664,9 +1652,6 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -1869,6 +1854,11 @@ def test_query_w_query_params(self): "expected": pi_numeric, "query_parameters": [pi_numeric_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, { "sql": "SELECT @truthy", "expected": truthy, @@ -1939,14 +1929,6 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] - if _BIGNUMERIC_SUPPORT: - examples.append( - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - } - ) for example in examples: jconfig = QueryJobConfig() @@ -2333,10 +2315,6 @@ def test_create_table_rows_fetch_nested_schema(self): def _fetch_dataframe(self, query): return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index ddf5eaf43..4b1828c86 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -26,15 +26,11 @@ import pytz from google.cloud import bigquery -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from google.cloud import bigquery_storage from . import helpers -bigquery_storage = pytest.importorskip( - "google.cloud.bigquery_storage", minversion="2.0.0" -) pandas = pytest.importorskip("pandas", minversion="0.23.0") -pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version @@ -184,12 +180,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -211,12 +206,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -292,12 +286,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -335,6 +328,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -349,17 +350,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ], ), ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c537802f4..e5105974f 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -17,20 +17,16 @@ import json import mock +import pyarrow import pytest try: import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None + +from google.cloud import bigquery_storage + try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -83,9 +79,6 @@ def test__contains_order_by(query, expected): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "query", ( @@ -151,7 +144,6 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class @@ -238,7 +230,6 @@ def test_to_arrow(): ] -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class @@ -273,7 +264,6 @@ def test_to_arrow_max_results_no_progress_bar(): assert tbl.num_rows == 2 -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_query_plan(): from google.cloud.bigquery import table @@ -330,7 +320,6 @@ def test_to_arrow_w_tqdm_w_query_plan(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_pending_status(): from google.cloud.bigquery import table @@ -383,7 +372,6 @@ def test_to_arrow_w_tqdm_w_pending_status(): ) -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_wo_query_plan(): from google.cloud.bigquery import table @@ -485,9 +473,6 @@ def test_to_dataframe_ddl_query(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -538,9 +523,6 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class @@ -643,7 +625,6 @@ def test_to_dataframe_column_dtypes(): assert df.date.dtype.name == "object" -@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index af026ccbe..aaafdb0f7 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,50 +19,13 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None - -@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): def _object_under_test(self): from google.cloud.bigquery import _helpers return _helpers.BQStorageVersions() - def _call_fut(self): - from google.cloud.bigquery import _helpers - - _helpers.BQ_STORAGE_VERSIONS._installed_version = None - return _helpers.BQ_STORAGE_VERSIONS.verify_version() - - def test_raises_no_error_w_recent_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): - try: - self._call_fut() - except LegacyBigQueryStorageError: # pragma: NO COVER - self.fail("Legacy error raised with a non-legacy dependency version.") - - def test_raises_error_w_legacy_bqstorage(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): - with self.assertRaises(LegacyBigQueryStorageError): - self._call_fut() - - def test_raises_error_w_unknown_bqstorage_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: - del fake_module.__version__ - error_pattern = r"version found: 0.0.0" - with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): - self._call_fut() - def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0ba671cd9..85c507b2a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -29,27 +29,18 @@ import pandas.testing except ImportError: # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types -except ImportError: # pragma: NO COVER - # Mock out pyarrow when missing, because methods from pyarrow.types are - # used in test parameterization. - pyarrow = mock.Mock() + +import pyarrow +import pyarrow.types import pytest import pytz from google import api_core from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT -try: - from google.cloud import bigquery_storage +from google.cloud import bigquery_storage - _helpers.BQ_STORAGE_VERSIONS.verify_version() -except ImportError: # pragma: NO COVER - bigquery_storage = None PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -60,11 +51,6 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") -skip_if_no_bignumeric = pytest.mark.skipif( - not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", -) - - @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -120,7 +106,6 @@ def all_(*functions): return functools.partial(do_all, functions) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -153,9 +138,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - pytest.param( - "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, - ), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -234,11 +217,10 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - pytest.param( + ( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), - marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -294,7 +276,6 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -302,7 +283,6 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -312,6 +292,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -321,9 +302,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -335,6 +313,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -343,8 +322,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected = pyarrow.struct(expected) assert pyarrow.types.is_struct(actual) @@ -353,7 +330,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -363,6 +339,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -372,9 +349,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -386,6 +360,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -394,8 +369,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected_value_type = pyarrow.struct(expected) assert pyarrow.types.is_list(actual) @@ -404,7 +377,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -441,7 +413,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - pytest.param( + ( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -449,7 +421,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], - marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), @@ -502,7 +473,6 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -537,7 +507,6 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] series = pandas.Series(rows) @@ -548,7 +517,6 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -560,7 +528,6 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -582,7 +549,6 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -599,7 +565,6 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -861,7 +826,6 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( schema.SchemaField("str_index", "STRING"), @@ -928,7 +892,6 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -938,6 +901,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -946,8 +910,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - if _BIGNUMERIC_SUPPORT: - bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) data = { "field01": ["hello", "world"], @@ -957,6 +919,10 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], "field09": [True, False], "field10": [False, True], "field11": [ @@ -971,11 +937,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ], "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], } - if _BIGNUMERIC_SUPPORT: - data["field08"] = [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ] dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) @@ -987,7 +948,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -1020,7 +980,6 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1042,15 +1001,6 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): - monkeypatch.setattr(module_under_test, "pyarrow", None) - with pytest.raises(ValueError) as exc_context: - module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) - assert "pyarrow is required" in str(exc_context.value) - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1062,7 +1012,6 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1074,7 +1023,6 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -1094,34 +1042,6 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): - dataframe = pandas.DataFrame( - data=[ - {"id": 10, "status": u"FOO", "execution_date": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, - ] - ) - - no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) - - with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: - detected_schema = module_under_test.dataframe_to_bq_schema( - dataframe, bq_schema=[] - ) - - assert detected_schema is None - - # a warning should also be issued - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - msg = str(expected_warnings[0]) - assert "execution_date" in msg and "created_at" in msg - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1151,7 +1071,6 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1176,7 +1095,6 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1210,11 +1128,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) - if _BIGNUMERIC_SUPPORT: - current_schema += ( - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1236,20 +1151,16 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) - if _BIGNUMERIC_SUPPORT: - expected_schema += ( - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), - ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1285,8 +1196,9 @@ def test_augment_schema_type_detection_fails(module_under_test): assert "struct_field" in warning_msg and "struct_field_2" in warning_msg -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): + pandas = pytest.importorskip("pandas") + dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, {"name": "field02", "type": "BOOL", "mode": "NULLABLE"}, @@ -1314,9 +1226,6 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage_stream_includes_read_session( monkeypatch, module_under_test ): @@ -1347,8 +1256,7 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( - bigquery_storage is None - or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1388,9 +1296,6 @@ def test__download_table_bqstorage_stream_omits_read_session( (7, {"max_queue_size": None}, 7, 0), # infinite queue size ], ) -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__download_table_bqstorage( module_under_test, stream_count, @@ -1440,7 +1345,6 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1476,7 +1380,6 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test): assert col.to_pylist() == [2.2, 22.22, 222.222] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1511,7 +1414,6 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test): assert col.to_pylist() == ["2.2", "22.22", "222.222"] -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1539,7 +1441,6 @@ def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 535685511..0dc9c3f55 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,7 +27,6 @@ import warnings import mock -import packaging import requests import pytest import pytz @@ -47,22 +46,14 @@ ) except (ImportError, AttributeError): # pragma: NO COVER opentelemetry = None -try: - import pyarrow -except (ImportError, AttributeError): # pragma: NO COVER - pyarrow = None import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers +from google.cloud import bigquery_storage from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None -from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -605,9 +596,6 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() @@ -630,55 +618,6 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) - def test_ensure_bqstorage_client_missing_dependency(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning - for warning in warned - if "not installed" in str(warning) - and "google-cloud-bigquery-storage" in str(warning) - ] - assert matching_warnings, "Missing dependency warning not raised." - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_ensure_bqstorage_client_obsolete_dependency(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client() - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -690,29 +629,6 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_ensure_bqstorage_client_existing_client_check_fails(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - mock_storage_client = mock.sentinel.mock_storage_client - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) - - self.assertIsNone(bqstorage_client) - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -6679,7 +6595,6 @@ def test_load_table_from_file_w_invalid_job_config(self): assert "Expected an instance of LoadJobConfig" in err_msg @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6766,7 +6681,6 @@ def test_load_table_from_dataframe(self): assert "description" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6811,7 +6725,6 @@ def test_load_table_from_dataframe_w_client_location(self): assert sent_config.source_format == job.SourceFormat.PARQUET @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6866,7 +6779,6 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6922,7 +6834,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): from google.cloud.bigquery import job @@ -6942,7 +6853,6 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7019,7 +6929,65 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + client = self._make_client() + + df_data = [ + [[{"name": "n1.1", "value": 1.1}, {"name": "n1.2", "value": 1.2}]], + [[{"name": "n2.1", "value": 2.1}, {"name": "n2.2", "value": 2.2}]], + ] + dataframe = pandas.DataFrame(df_data, columns=["col_record_list"]) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + + with load_patch as load_table_from_file, get_table_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION + ) + + # There should be a warning that schema detection failed. + expected_warnings = [ + warning + for warning in warned + if "schema could not be detected" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass( + expected_warnings[0].category, + (DeprecationWarning, PendingDeprecationWarning), + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + timeout=None, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema is None + + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7081,7 +7049,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): assert sent_schema == expected_sent_schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -7120,7 +7087,6 @@ def test_load_table_from_dataframe_unknown_table(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7168,7 +7134,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7213,7 +7178,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7273,7 +7237,6 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7357,7 +7320,6 @@ def test_load_table_from_dataframe_w_partial_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7394,63 +7356,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_partial_schema_missing_types(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES - from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField - - client = self._make_client() - df_data = collections.OrderedDict( - [ - ("string_col", ["abc", "def", "ghi"]), - ("unknown_col", [b"jkl", None, b"mno"]), - ] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - - schema = (SchemaField("string_col", "STRING"),) - job_config = job.LoadJobConfig(schema=schema) - with pyarrow_patch, load_patch as load_table_from_file, warnings.catch_warnings( - record=True - ) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, - self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - size=mock.ANY, - job_id=mock.ANY, - job_id_prefix=None, - location=self.LOCATION, - project=None, - job_config=mock.ANY, - timeout=None, - ) - - assert warned # there should be at least one warning - unknown_col_warnings = [ - warning for warning in warned if "unknown_col" in str(warning) - ] - assert unknown_col_warnings - assert unknown_col_warnings[0].category == UserWarning - - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema is None - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7483,72 +7388,6 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args.kwargs.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): - client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - to_parquet_patch = mock.patch.object( - dataframe, "to_parquet", wraps=dataframe.to_parquet - ) - - with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: - with pytest.raises(ValueError): - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - location=self.LOCATION, - parquet_compression="gzip", - ) - - def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): - pytest.importorskip("pandas", reason="Requires `pandas`") - pytest.importorskip("pyarrow", reason="Requires `pyarrow`") - - client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) - - pyarrow_version_patch = mock.patch( - "google.cloud.bigquery.client._PYARROW_VERSION", - packaging.version.parse("2.0.0"), # A known bad version of pyarrow. - ) - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - - with load_patch, get_table_patch, pyarrow_version_patch: - with warnings.catch_warnings(record=True) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION, - ) - - expected_warnings = [ - warning for warning in warned if "pyarrow" in str(warning).lower() - ] - assert len(expected_warnings) == 1 - assert issubclass(expected_warnings[0].category, RuntimeWarning) - msg = str(expected_warnings[0].message) - assert "pyarrow 2.0.0" in msg - assert "data corruption" in msg - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index b33203354..4afc47b6c 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -21,11 +21,6 @@ import pytest -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - import google.cloud._helpers from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import _helpers @@ -215,7 +210,6 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ dict( diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 0576cad38..6b3a99439 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -17,10 +17,7 @@ import mock -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage class TestConnection(unittest.TestCase): @@ -40,8 +37,6 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - # Assumption: bigquery_storage exists. It's the test's responisbility to - # not use this helper or skip itself if bqstroage is not installed. mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -58,9 +53,6 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -89,9 +81,6 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -107,9 +96,6 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -142,9 +128,6 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -165,9 +148,6 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 026810aaf..f075bb6f7 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -18,18 +18,8 @@ import pytest - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.api_core import exceptions - -try: - from google.cloud import bigquery_storage -except ImportError: # pragma: NO COVER - bigquery_storage = None +from google.cloud import bigquery_storage from tests.unit.helpers import _to_pyarrow @@ -279,10 +269,6 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -334,9 +320,6 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -358,9 +341,6 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -390,10 +370,6 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index d030482cc..bb3a8d1fd 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -76,19 +76,6 @@ def ipython_ns_cleanup(): del ip.user_ns[name] -@pytest.fixture(scope="session") -def missing_bq_storage(): - """Provide a patcher that can make the bigquery storage import to fail.""" - - def fail_if(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - return maybe_fail_import(predicate=fail_if) - - @pytest.fixture(scope="session") def missing_grpcio_lib(): """Provide a patcher that can make the gapic library import to fail.""" @@ -324,9 +311,6 @@ def test__make_bqstorage_client_false(): assert got is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -338,53 +322,6 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) -def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(test_client, True, {}) - - error_msg = str(exc_context.value) - assert "google-cloud-bigquery-storage" in error_msg - assert "pyarrow" in error_msg - - -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) -def test__make_bqstorage_client_true_obsolete_dependency(): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - test_client = bigquery.Client( - project="test_project", credentials=credentials_mock, location="test_location" - ) - - patcher = mock.patch( - "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - got = magics._make_bqstorage_client(test_client, True, {}) - - assert got is None - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( @@ -440,9 +377,6 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -603,10 +537,9 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -667,10 +600,9 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) def test_bigquery_magic_with_rest_client_requested(monkeypatch): + pandas = pytest.importorskip("pandas") + ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -887,9 +819,6 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 37650cd27..533ed610f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -19,36 +19,23 @@ import warnings import mock -import pkg_resources +import pyarrow +import pyarrow.types import pytest import pytz import google.api_core.exceptions -from test_utils.imports import maybe_fail_import -try: - from google.cloud import bigquery_storage - from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, - ) -except ImportError: # pragma: NO COVER - bigquery_storage = None - big_query_read_grpc_transport = None +from google.cloud import bigquery_storage +from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, +) try: import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None -try: - import pyarrow - import pyarrow.types - - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) -except ImportError: # pragma: NO COVER - pyarrow = None - PYARROW_VERSION = pkg_resources.parse_version("0.0.1") - try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -57,9 +44,6 @@ from google.cloud.bigquery.dataset import DatasetReference -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - - def _mock_client(): from google.cloud.bigquery import client @@ -1619,13 +1603,6 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) - @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - def test_to_arrow_error_if_pyarrow_is_none(self): - row_iterator = self._make_one() - with self.assertRaises(ValueError): - row_iterator.to_arrow() - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): row_iterator = self._make_one() tbl = row_iterator.to_arrow() @@ -1862,49 +1839,6 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) - def test__validate_bqstorage_returns_false_if_missing_dependency(self): - iterator = self._make_one(first_page_response=None) # not cached - - def fail_bqstorage_import(name, globals, locals, fromlist, level): - # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage" in name or ( - fromlist is not None and "bigquery_storage" in fromlist - ) - - no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) - - with no_bqstorage: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): - from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError - - iterator = self._make_one(first_page_response=None) # not cached - - patcher = mock.patch( - "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", - side_effect=LegacyBigQueryStorageError("BQ Storage too old"), - ) - with patcher, warnings.catch_warnings(record=True) as warned: - result = iterator._validate_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - - self.assertFalse(result) - - matching_warnings = [ - warning for warning in warned if "BQ Storage too old" in str(warning) - ] - assert matching_warnings, "Obsolete dependency warning not raised." - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -1986,7 +1920,6 @@ def test_to_arrow(self): ], ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): from google.cloud.bigquery.schema import SchemaField @@ -2019,7 +1952,6 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): from google.cloud.bigquery.schema import SchemaField @@ -2062,7 +1994,6 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2101,10 +2032,6 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_max_results_w_create_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2141,10 +2068,6 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._ensure_bqstorage_client.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2222,10 +2145,6 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2253,7 +2172,6 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField @@ -2280,10 +2198,6 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2320,7 +2234,6 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @mock.patch("tqdm.tqdm_notebook") @@ -2456,10 +2369,6 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2573,7 +2482,6 @@ def test_to_dataframe(self): self.assertEqual(df.age.dtype.name, "int64") @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -2588,9 +2496,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) - tzinfo = None - if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = datetime.timezone.utc + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows @@ -2604,7 +2510,6 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -2963,9 +2868,6 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2994,9 +2896,6 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3022,11 +2921,7 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3048,10 +2943,6 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3103,10 +2994,6 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3183,10 +3070,6 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3237,11 +3120,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): from google.cloud.bigquery import schema @@ -3316,10 +3195,6 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3435,9 +3310,6 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertTrue(df.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut @@ -3454,9 +3326,6 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3474,9 +3343,6 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3495,10 +3361,6 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3983,9 +3845,6 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) @pytest.mark.parametrize( "table_path", ( From 9319eb1ef492999b9007c43f1e128d402334eaaa Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 28 Jul 2021 17:28:59 +0200 Subject: [PATCH 02/35] chore: merge recent changes from master (#823) * chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name * fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. * feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 * chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast Co-authored-by: gcf-owl-bot[bot] <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- .github/sync-repo-settings.yaml | 2 +- CHANGELOG.md | 18 +++ google/cloud/bigquery/table.py | 30 +++- google/cloud/bigquery/version.py | 2 +- google/cloud/bigquery_v2/types/model.py | 104 ++++++++++-- .../bigquery_v2/types/table_reference.py | 12 ++ tests/unit/test_table.py | 152 +++++++++++++++++- 7 files changed, 298 insertions(+), 22 deletions(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index b18fb9c29..2697f214c 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,7 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: master +- pattern: '{master,v3}' requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dbc5d4da..966a8744a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) + + +### Features + +* Update proto definitions for bigquery/v2 to support new proto fields for BQML. ([#817](https://www.github.com/googleapis/python-bigquery/issues/817)) ([fe7a902](https://www.github.com/googleapis/python-bigquery/commit/fe7a902e8b3e723ace335c9b499aea6d180a025b)) + + +### Bug Fixes + +* no longer raise a warning in `to_dataframe` if `max_results` set ([#815](https://www.github.com/googleapis/python-bigquery/issues/815)) ([3c1be14](https://www.github.com/googleapis/python-bigquery/commit/3c1be149e76b1d1d8879fdcf0924ddb1c1839e94)) +* retry ChunkedEncodingError by default ([#802](https://www.github.com/googleapis/python-bigquery/issues/802)) ([419d36d](https://www.github.com/googleapis/python-bigquery/commit/419d36d6b1887041e5795dbc8fc808890e91ab11)) + + +### Documentation + +* correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107)) + ### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a058dca91..8433e37fa 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1544,11 +1544,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False if self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) return False return True @@ -1585,6 +1580,25 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows + def _maybe_warn_max_results( + self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], + ): + """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. + + This helper method should be used directly in the relevant top-level public + methods, so that the warning is issued for the correct line in user code. + + Args: + bqstorage_client: + The BigQuery Storage client intended to use for downloading result rows. + """ + if bqstorage_client is not None and self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the REST endpoint.", + stacklevel=3, + ) + def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): @@ -1674,6 +1688,8 @@ def to_arrow( .. versionadded:: 1.17.0 """ + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1763,6 +1779,8 @@ def to_dataframe_iterable( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, @@ -1866,6 +1884,8 @@ def to_dataframe( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index dbc524478..416bf20ed 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.1" +__version__ = "2.23.0" diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index 17e101d25..706418401 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -96,6 +96,8 @@ class Model(proto.Message): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. + best_trial_id (int): + The best trial_id across all training runs. """ class ModelType(proto.Enum): @@ -113,6 +115,7 @@ class ModelType(proto.Enum): ARIMA = 11 AUTOML_REGRESSOR = 12 AUTOML_CLASSIFIER = 13 + ARIMA_PLUS = 19 class LossType(proto.Enum): r"""Loss metric to evaluate model training performance.""" @@ -151,6 +154,7 @@ class DataFrequency(proto.Enum): WEEKLY = 5 DAILY = 6 HOURLY = 7 + PER_MINUTE = 8 class HolidayRegion(proto.Enum): r"""Type of supported holiday regions for time series forecasting @@ -285,7 +289,7 @@ class RegressionMetrics(proto.Message): median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. r_squared (google.protobuf.wrappers_pb2.DoubleValue): - R^2 score. + R^2 score. This corresponds to r2_score in ML.EVALUATE. """ mean_absolute_error = proto.Field( @@ -528,7 +532,7 @@ class ClusteringMetrics(proto.Message): Mean of squared distances between each sample to its cluster centroid. clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): - [Beta] Information for all clusters. + Information for all clusters. """ class Cluster(proto.Message): @@ -697,10 +701,29 @@ class ArimaSingleModelForecastingMetrics(proto.Message): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -711,9 +734,19 @@ class ArimaSingleModelForecastingMetrics(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=3,) time_series_id = proto.Field(proto.STRING, number=4,) + time_series_ids = proto.RepeatedField(proto.STRING, number=9,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", @@ -901,7 +934,7 @@ class TrainingRun(proto.Message): """ class TrainingOptions(proto.Message): - r""" + r"""Options used in model training. Attributes: max_iterations (int): The maximum number of iterations in training. @@ -972,8 +1005,9 @@ class TrainingOptions(proto.Message): num_clusters (int): Number of clusters for clustering models. model_uri (str): - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. + Google Cloud Storage URI from which the model + was imported. Only applicable for imported + models. optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. @@ -1030,8 +1064,11 @@ class TrainingOptions(proto.Message): If a valid value is specified, then holiday effects modeling is enabled. time_series_id_column (str): - The id column that will be used to indicate - different time series to forecast in parallel. + The time series id column that was used + during ARIMA model training. + time_series_id_columns (Sequence[str]): + The time series id columns that were used + during ARIMA model training. horizon (int): The number of periods ahead that need to be forecasted. @@ -1042,6 +1079,15 @@ class TrainingOptions(proto.Message): output feature name is A.b. auto_arima_max_order (int): The max value of non-seasonal p and q. + decompose_time_series (google.protobuf.wrappers_pb2.BoolValue): + If true, perform decompose time series and + save the results. + clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, clean spikes and dips in the input + time series. + adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, detect step changes and make data + adjustment in the input time series. """ max_iterations = proto.Field(proto.INT64, number=1,) @@ -1120,9 +1166,19 @@ class TrainingOptions(proto.Message): proto.ENUM, number=42, enum="Model.HolidayRegion", ) time_series_id_column = proto.Field(proto.STRING, number=43,) + time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) horizon = proto.Field(proto.INT64, number=44,) preserve_input_structs = proto.Field(proto.BOOL, number=45,) auto_arima_max_order = proto.Field(proto.INT64, number=46,) + decompose_time_series = proto.Field( + proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + ) + clean_spikes_and_dips = proto.Field( + proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + ) + adjust_step_changes = proto.Field( + proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + ) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. @@ -1218,10 +1274,29 @@ class ArimaModelInfo(proto.Message): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -1237,11 +1312,21 @@ class ArimaModelInfo(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=4,) time_series_id = proto.Field(proto.STRING, number=5,) + time_series_ids = proto.RepeatedField(proto.STRING, number=10,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + ) arima_model_info = proto.RepeatedField( proto.MESSAGE, @@ -1319,6 +1404,7 @@ class ArimaModelInfo(proto.Message): label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) + best_trial_id = proto.Field(proto.INT64, number=19,) class GetModelRequest(proto.Message): diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py index a0a8ee4c9..d56e5b09f 100644 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -36,11 +36,23 @@ class TableReference(proto.Message): maximum length is 1,024 characters. Certain operations allow suffixing of the table ID with a partition decorator, such as ``sample_table$20190123``. + project_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + dataset_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + table_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. """ project_id = proto.Field(proto.STRING, number=1,) dataset_id = proto.Field(proto.STRING, number=2,) table_id = proto.Field(proto.STRING, number=3,) + project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) + dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) + table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 533ed610f..20336b227 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -15,6 +15,7 @@ import datetime import logging import time +import types import unittest import warnings @@ -1839,6 +1840,15 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_max_results_set(self): + iterator = self._make_one( + max_results=10, first_page_response=None # not cached + ) + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2032,7 +2042,7 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") - def test_to_arrow_max_results_w_create_bqstorage_warning(self): + def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2046,6 +2056,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2056,7 +2067,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_arrow(create_bqstorage_client=True) + row_iterator.to_arrow(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2066,6 +2077,45 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_arrow(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() def test_to_arrow_w_bqstorage(self): @@ -2285,7 +2335,6 @@ def test_to_arrow_w_pyarrow_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2328,7 +2377,6 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_with_dtypes(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2436,6 +2484,57 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + + iterator_schema = [ + schema.SchemaField("name", "STRING", mode="REQUIRED"), + schema.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + row_iterator = mut.RowIterator( + _mock_client(), + api_request, + path, + iterator_schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=iterator_schema, + max_results=25, + ) + + with warnings.catch_warnings(record=True) as warned: + dfs = row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client) + + # Was a warning emitted? + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + assert len(matches) == 1, "User warning was not emitted." + assert __file__ in str(matches[0]), "Warning emitted with incorrect stacklevel" + + # Basic check of what we got as a result. + dataframes = list(dfs) + assert len(dataframes) == 2 + assert isinstance(dataframes[0], pandas.DataFrame) + assert isinstance(dataframes[1], pandas.DataFrame) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField @@ -2831,7 +2930,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_max_results_w_create_bqstorage_warning(self): + def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2845,6 +2944,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2855,7 +2955,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_dataframe(create_bqstorage_client=True) + row_iterator.to_dataframe(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2865,6 +2965,46 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_dataframe(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") From e26d879ca7e6885b01a3e560f743c5feb9d3f557 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 5 Aug 2021 17:24:45 +0200 Subject: [PATCH 03/35] chore: sync v3 with master (#851) (no issue) Just a regular update of the `v3` branch. --- .github/CODEOWNERS | 2 +- .github/sync-repo-settings.yaml | 15 +++++++++- CHANGELOG.md | 14 +++++++++ google/cloud/bigquery/_helpers.py | 12 ++++---- google/cloud/bigquery/version.py | 2 +- samples/geography/requirements.txt | 4 +-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 8 +++--- setup.py | 2 +- tests/system/test_pandas.py | 40 +++++++++++++++++--------- tests/unit/test__helpers.py | 24 ++++++++++++++++ 11 files changed, 95 insertions(+), 30 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ae570eb01..76112476b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 2697f214c..8634a3043 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,20 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: '{master,v3}' +- pattern: master + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true + requiredStatusCheckContexts: + - 'Kokoro' + - 'Kokoro snippets-3.8' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' +- pattern: v3 + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' diff --git a/CHANGELOG.md b/CHANGELOG.md index 966a8744a..0c08e7910 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) + + +### Dependencies + +* expand pyarrow pins to support 5.x releases ([#833](https://www.github.com/googleapis/python-bigquery/issues/833)) ([80e3a61](https://www.github.com/googleapis/python-bigquery/commit/80e3a61c60419fb19b70b664c6415cd01ba82f5b)) + +### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) + + +### Bug Fixes + +* `insert_rows()` accepts float column values as strings again ([#824](https://www.github.com/googleapis/python-bigquery/issues/824)) ([d9378af](https://www.github.com/googleapis/python-bigquery/commit/d9378af13add879118a1d004529b811f72c325d6)) + ## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 3d83ddee9..cb2ce40a3 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import decimal import math import re +from typing import Union from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -312,14 +313,15 @@ def _int_to_json(value): return value -def _float_to_json(value): +def _float_to_json(value) -> Union[None, str, float]: """Coerce 'value' to an JSON-compatible representation.""" if value is None: return None - elif math.isnan(value) or math.isinf(value): - return str(value) - else: - return float(value) + + if isinstance(value, str): + value = float(value) + + return str(value) if (math.isnan(value) or math.isinf(value)) else float(value) def _decimal_to_json(value): diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 416bf20ed..0460e7bb9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.0" +__version__ = "2.23.2" diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3a83eda64..5aa967b24 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery==2.23.2 +google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9e9d4e40f..b8dee50d0 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==0.3.0 +google-cloud-testutils==1.0.0 pytest==6.2.4 mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ffa689a9e..4f2eaf90b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ -google-cloud-bigquery==2.22.1 -google-cloud-bigquery-storage==2.6.0 -google-auth-oauthlib==0.4.4 +google-cloud-bigquery==2.23.2 +google-cloud-bigquery-storage==2.6.2 +google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.1 +pyarrow==5.0.0 pytz==2021.1 diff --git a/setup.py b/setup.py index 91458bb78..5205b5365 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", - "pyarrow >= 3.0.0, < 5.0dev", + "pyarrow >= 3.0.0, < 6.0dev", "requests >= 2.18.0, < 3.0.0dev", ] extras = { diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 4b1828c86..6a96dff62 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -21,6 +21,7 @@ import io import operator +import google.api_core.retry import pkg_resources import pytest import pytz @@ -37,6 +38,10 @@ PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") +class MissingDataError(Exception): + pass + + def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): """Test that a DataFrame with dtypes that map well to BigQuery types can be uploaded without specifying a schema. @@ -657,19 +662,6 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): ) for errors in chunk_errors: assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - bigquery_client.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] expected = [ # Pandas often represents NULL values as NaN. Convert to None for # easier comparison. @@ -677,7 +669,27 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): for data_row in dataframe.itertuples(index=False) ] - assert len(row_tuples) == len(expected) + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer "within a few seconds". + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + @google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type(MissingDataError) + ) + def get_rows(): + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + if len(rows) != len(expected): + raise MissingDataError() + return rows + + rows = get_rows() + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] for row, expected_row in zip(row_tuples, expected): assert ( diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index aaafdb0f7..cf60cf749 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -653,21 +653,45 @@ def _call_fut(self, value): def test_w_none(self): self.assertEqual(self._call_fut(None), None) + def test_w_non_numeric(self): + with self.assertRaises(TypeError): + self._call_fut(object()) + + def test_w_integer(self): + result = self._call_fut(123) + self.assertIsInstance(result, float) + self.assertEqual(result, 123.0) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_float_as_string(self): + self.assertEqual(self._call_fut("1.23"), 1.23) + def test_w_nan(self): result = self._call_fut(float("nan")) self.assertEqual(result.lower(), "nan") + def test_w_nan_as_string(self): + result = self._call_fut("NaN") + self.assertEqual(result.lower(), "nan") + def test_w_infinity(self): result = self._call_fut(float("inf")) self.assertEqual(result.lower(), "inf") + def test_w_infinity_as_string(self): + result = self._call_fut("inf") + self.assertEqual(result.lower(), "inf") + def test_w_negative_infinity(self): result = self._call_fut(float("-inf")) self.assertEqual(result.lower(), "-inf") + def test_w_negative_infinity_as_string(self): + result = self._call_fut("-inf") + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From 66014c3225eb6f868a0d38877bddd50e57c5478a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Aug 2021 11:34:12 -0500 Subject: [PATCH 04/35] chore: merge changes from master (#872) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- .github/.OwlBot.lock.yaml | 2 +- .github/CODEOWNERS | 2 +- CHANGELOG.md | 22 +++ docs/reference.rst | 2 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/client.py | 112 ++++++++---- google/cloud/bigquery/enums.py | 24 +-- google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/base.py | 29 +++ google/cloud/bigquery/job/query.py | 84 +++++++-- google/cloud/bigquery/query.py | 42 +++-- google/cloud/bigquery/retry.py | 22 ++- google/cloud/bigquery/table.py | 42 ++++- google/cloud/bigquery/version.py | 2 +- samples/geography/noxfile.py | 5 +- samples/geography/requirements.txt | 4 +- samples/snippets/noxfile.py | 5 +- samples/snippets/requirements.txt | 4 +- tests/system/test_client.py | 34 ++++ tests/system/test_job_retry.py | 72 ++++++++ tests/unit/job/helpers.py | 1 + tests/unit/job/test_base.py | 14 ++ tests/unit/job/test_query.py | 29 +++ tests/unit/test_client.py | 36 ++++ tests/unit/test_job_retry.py | 247 ++++++++++++++++++++++++++ tests/unit/test_query.py | 13 ++ tests/unit/test_retry.py | 24 +++ tests/unit/test_table.py | 225 ++++++++++++++++++++--- 28 files changed, 1005 insertions(+), 97 deletions(-) create mode 100644 tests/system/test_job_retry.py create mode 100644 tests/unit/test_job_retry.py diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9ee60f7e4..649877dc4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b + digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 76112476b..6763f258c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c08e7910..83b409015 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) + + +### Features + +* add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) +* make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) +* retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) +* support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) + + +### Bug Fixes + +* make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) + +### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) + + +### Bug Fixes + +* increase default retry deadline to 10 minutes ([#859](https://www.github.com/googleapis/python-bigquery/issues/859)) ([30770fd](https://www.github.com/googleapis/python-bigquery/commit/30770fd0575fbd5aaa70c14196a4cc54627aecd2)) + ### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) diff --git a/docs/reference.rst b/docs/reference.rst index 8a5bff9a4..d8738e67b 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -68,6 +68,7 @@ Job-Related Types job.SourceFormat job.WriteDisposition job.SchemaUpdateOption + job.TransactionInfo Dataset @@ -137,6 +138,7 @@ Query query.ArrayQueryParameter query.ScalarQueryParameter + query.ScalarQueryParameterType query.StructQueryParameter query.UDFResource diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 59bb08ce5..5529f9b2e 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -69,6 +69,7 @@ from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob +from google.cloud.bigquery.job import TransactionInfo from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference @@ -148,6 +149,7 @@ "GoogleSheetsOptions", "ParquetOptions", "ScriptOptions", + "TransactionInfo", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index acae2fe36..1a826eb55 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -73,7 +73,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -2709,7 +2709,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = "\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( @@ -3110,6 +3110,7 @@ def query( project: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3139,21 +3140,52 @@ def query( Project ID of the project of where to run the job. Defaults to the client's project. retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` is + provided, then the job returned by the query will not + be retryable, and an exception will be raised if a + non-``None`` (and non-default) value for ``job_retry`` + is also provided. + + Note that errors aren't detected until ``result()`` is + called on the job returned. The ``job_retry`` + specified here becomes the default ``job_retry`` for + ``result()``, where it can also be specified. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class, or if both ``job_id`` and non-``None`` non-default + ``job_retry`` are provided. """ job_id_given = job_id is not None - job_id = _make_job_id(job_id, job_id_prefix) + if ( + job_id_given + and job_retry is not None + and job_retry is not DEFAULT_JOB_RETRY + ): + raise TypeError( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ) + + job_id_save = job_id if project is None: project = self.project @@ -3161,8 +3193,6 @@ def query( if location is None: location = self.location - job_config = copy.deepcopy(job_config) - if self._default_query_job_config: if job_config: _verify_job_config_type( @@ -3172,6 +3202,8 @@ def query( # that is in the default, # should be filled in with the default # the incoming therefore has precedence + # + # Note that _fill_from_default doesn't mutate the receiver job_config = job_config._fill_from_default( self._default_query_job_config ) @@ -3180,34 +3212,54 @@ def query( self._default_query_job_config, google.cloud.bigquery.job.QueryJobConfig, ) - job_config = copy.deepcopy(self._default_query_job_config) + job_config = self._default_query_job_config - job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) + # Note that we haven't modified the original job_config (or + # _default_query_job_config) up to this point. + job_config_save = job_config - try: - query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.Conflict as create_exc: - # The thought is if someone is providing their own job IDs and they get - # their job ID generation wrong, this could end up returning results for - # the wrong query. We thus only try to recover if job ID was not given. - if job_id_given: - raise create_exc + def do_query(): + # Make a copy now, so that original doesn't get changed by the process + # below and to facilitate retry + job_config = copy.deepcopy(job_config_save) + + job_id = _make_job_id(job_id_save, job_id_prefix) + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) try: - query_job = self.get_job( - job_id, - project=project, - location=location, - retry=retry, - timeout=timeout, - ) - except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job else: return query_job - else: - return query_job + + future = do_query() + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + if not job_id_given: + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future def insert_rows( self, diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 0da01d665..d67cebd4c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -259,23 +259,23 @@ class SqlTypeNames(str, enum.Enum): class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" - STRING = ScalarQueryParameterType("STRING") + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") BYTES = ScalarQueryParameterType("BYTES") - INTEGER = ScalarQueryParameterType("INT64") - INT64 = ScalarQueryParameterType("INT64") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") FLOAT = ScalarQueryParameterType("FLOAT64") FLOAT64 = ScalarQueryParameterType("FLOAT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - DECIMAL = ScalarQueryParameterType("NUMERIC") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BOOLEAN = ScalarQueryParameterType("BOOL") - BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - DATE = ScalarQueryParameterType("DATE") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") TIME = ScalarQueryParameterType("TIME") - DATETIME = ScalarQueryParameterType("DATETIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") class WriteDisposition(object): diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 4c16d0e20..f51311b0b 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -22,6 +22,7 @@ from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import TransactionInfo from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig @@ -81,5 +82,6 @@ "QueryPriority", "SchemaUpdateOption", "SourceFormat", + "TransactionInfo", "WriteDisposition", ] diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 20ad81c0b..e5fc592a6 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -19,6 +19,7 @@ import http import threading import typing +from typing import Dict, Optional from google.api_core import exceptions import google.api_core.future.polling @@ -88,6 +89,22 @@ def _error_result_to_exception(error_result): ) +class TransactionInfo(typing.NamedTuple): + """[Alpha] Information of a multi-statement transaction. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#TransactionInfo + + .. versionadded:: 2.24.0 + """ + + transaction_id: str + """Output only. ID of the transaction.""" + + @classmethod + def from_api_repr(cls, transaction_info: Dict[str, str]) -> "TransactionInfo": + return cls(transaction_info["transactionId"]) + + class _JobReference(object): """A reference to a job. @@ -336,6 +353,18 @@ def reservation_usage(self): for usage in usage_stats_raw ] + @property + def transaction_info(self) -> Optional[TransactionInfo]: + """Information of the multi-statement transaction if this job is part of one. + + .. versionadded:: 2.24.0 + """ + info = self._properties.get("statistics", {}).get("transactionInfo") + if info is None: + return None + else: + return TransactionInfo.from_api_repr(info) + @property def error_result(self): """Error information about the job as a whole. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index f4a78a9da..ca3ffb2bf 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -36,7 +36,7 @@ from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning @@ -1260,6 +1260,7 @@ def result( retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None, start_index: int = None, + job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. @@ -1270,9 +1271,13 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. If the job state is - ``DONE``, retrying is aborted early even if the results are not - available, as this will not change anymore. + How to retry the call that retrieves rows. This only + applies to making RPC calls. It isn't used to retry + failed jobs. This has a reasonable default that + should only be overridden with care. If the job state + is ``DONE``, retrying is aborted early even if the + results are not available, as this will not change + anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1280,6 +1285,16 @@ def result( applies to each individual request. start_index (Optional[int]): The zero-based index of the starting row to read. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` was + provided to the query that created this job, then the + job returned by the query will not be retryable, and + an exception will be raised if non-``None`` + non-default ``job_retry`` is also provided. Returns: google.cloud.bigquery.table.RowIterator: @@ -1295,17 +1310,66 @@ def result( Raises: google.cloud.exceptions.GoogleAPICallError: - If the job failed. + If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. + TypeError: + If Non-``None`` and non-default ``job_retry`` is + provided and the job is not retryable. """ try: - super(QueryJob, self).result(retry=retry, timeout=timeout) + retry_do_query = getattr(self, "_retry_do_query", None) + if retry_do_query is not None: + if job_retry is DEFAULT_JOB_RETRY: + job_retry = self._job_retry + else: + if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY: + raise TypeError( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ) + + first = True + + def do_get_result(): + nonlocal first + + if first: + first = False + else: + # Note that we won't get here if retry_do_query is + # None, because we won't use a retry. + + # The orinal job is failed. Create a new one. + job = retry_do_query() + + # If it's already failed, we might as well stop: + if job.done() and job.exception() is not None: + raise job.exception() + + # Become the new job: + self.__dict__.clear() + self.__dict__.update(job.__dict__) + + # This shouldn't be necessary, because once we have a good + # job, it should stay good,and we shouldn't have to retry. + # But let's be paranoid. :) + self._retry_do_query = retry_do_query + self._job_retry = job_retry + + super(QueryJob, self).result(retry=retry, timeout=timeout) + + # Since the job could already be "done" (e.g. got a finished job + # via client.get_job), the superclass call to done() might not + # set the self._query_results cache. + self._reload_query_results(retry=retry, timeout=timeout) + + if retry_do_query is not None and job_retry is not None: + do_get_result = job_retry(do_get_result) + + do_get_result() - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - self._reload_query_results(retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index d1e9a45a5..1f449f189 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,7 +16,9 @@ from collections import OrderedDict import copy -from typing import Union +import datetime +import decimal +from typing import Optional, Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -24,6 +26,11 @@ from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM +_SCALAR_VALUE_TYPE = Optional[ + Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] +] + + class UDFResource(object): """Describe a single user-defined function (UDF) resource. @@ -325,35 +332,46 @@ class ScalarQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for scalar values. Args: - name (Optional[str]): + name: Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - type_ (str): - Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + type_: + Name of parameter type. See + :class:`google.cloud.bigquery.enums.SqlTypeNames` and + :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + supported types. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. """ - def __init__(self, name, type_, value): + def __init__( + self, + name: Optional[str], + type_: Optional[Union[str, ScalarQueryParameterType]], + value: _SCALAR_VALUE_TYPE, + ): self.name = name - self.type_ = type_ + if isinstance(type_, ScalarQueryParameterType): + self.type_ = type_._type + else: + self.type_ = type_ self.value = value @classmethod - def positional(cls, type_: str, value) -> "ScalarQueryParameter": + def positional( + cls, type_: Union[str, ScalarQueryParameterType], value: _SCALAR_VALUE_TYPE + ) -> "ScalarQueryParameter": """Factory for positional paramater. Args: - type_ (str): + type_: Name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. Returns: diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 2df4de08b..e9286055c 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -32,6 +32,8 @@ auth_exceptions.TransportError, ) +_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds + def _should_retry(exc): """Predicate for determining when to retry. @@ -47,7 +49,7 @@ def _should_retry(exc): return reason in _RETRYABLE_REASONS -DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0) """The default retry object. Any method with a ``retry`` parameter will be retried automatically, @@ -56,3 +58,21 @@ def _should_retry(exc): on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +job_retry_reasons = "rateLimitExceeded", "backendError" + + +def _job_should_retry(exc): + if not hasattr(exc, "errors") or len(exc.errors) == 0: + return False + + reason = exc.errors[0]["reason"] + return reason in job_retry_reasons + + +DEFAULT_JOB_RETRY = retry.Retry( + predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE +) +""" +The default job retry object. +""" diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 8433e37fa..dad06deed 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -247,9 +247,16 @@ def _key(self): return (self._project, self._dataset_id, self._table_id) def __eq__(self, other): - if not isinstance(other, TableReference): + if isinstance(other, (Table, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + elif isinstance(other, TableReference): + return self._key() == other._key() + else: return NotImplemented - return self._key() == other._key() def __ne__(self, other): return not self == other @@ -1003,6 +1010,24 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) + def __eq__(self, other): + if isinstance(other, Table): + return ( + self._properties["tableReference"] + == other._properties["tableReference"] + ) + elif isinstance(other, (TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def __repr__(self): return "Table({})".format(repr(self.reference)) @@ -1221,6 +1246,19 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def __eq__(self, other): + if isinstance(other, (Table, TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0460e7bb9..84f6b4643 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.2" +__version__ = "2.24.0" diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5aa967b24..dfee339d4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.2 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery==2.24.0 +google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4f2eaf90b..264899dff 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery==2.23.2 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery==2.24.0 +google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 4970ef281..77832fcac 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1549,6 +1549,40 @@ def test_dml_statistics(self): assert query_job.dml_stats.updated_row_count == 0 assert query_job.dml_stats.deleted_row_count == 3 + def test_transaction_info(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = f"{Config.CLIENT.project}.{dataset_id}.test_dml_statistics" + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + BEGIN TRANSACTION; + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + COMMIT TRANSACTION; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + # Transaction ID set by the server should be accessible + assert query_job.transaction_info is not None + assert query_job.transaction_info.transaction_id != "" + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/tests/system/test_job_retry.py b/tests/system/test_job_retry.py new file mode 100644 index 000000000..520545493 --- /dev/null +++ b/tests/system/test_job_retry.py @@ -0,0 +1,72 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import threading +import time + +import google.api_core.exceptions +import google.cloud.bigquery +import pytest + + +def thread(func): + thread = threading.Thread(target=func, daemon=True) + thread.start() + return thread + + +@pytest.mark.parametrize("job_retry_on_query", [True, False]) +def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query): + """ + Test job_retry + + See: https://github.com/googleapis/python-bigquery/issues/539 + """ + from google.api_core import exceptions + from google.api_core.retry import if_exception_type, Retry + + table_name = f"{dataset_id}.t539" + + # Without a custom retry, we fail: + with pytest.raises(google.api_core.exceptions.NotFound): + bigquery_client.query(f"select count(*) from {table_name}").result() + + retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound)) + + job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {} + job = bigquery_client.query(f"select count(*) from {table_name}", **job_retry) + job_id = job.job_id + + # We can already know that the job failed, but we're not supposed + # to find out until we call result, which is where retry happend + assert job.done() + assert job.exception() is not None + + @thread + def create_table(): + time.sleep(1) # Give the first retry attempt time to fail. + with contextlib.closing(google.cloud.bigquery.Client()) as client: + client.query(f"create table {table_name} (id int64)").result() + + job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound) + [[count]] = list(job.result(**job_retry)) + assert count == 0 + + # The job was retried, and thus got a new job id + assert job.job_id != job_id + + # Make sure we don't leave a thread behind: + create_table.join() + bigquery_client.query(f"drop table {table_name}").result() diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index ea071c5ac..c792214e7 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -162,6 +162,7 @@ def _verifyInitialReadonlyProperties(self, job): self.assertIsNone(job.created) self.assertIsNone(job.started) self.assertIsNone(job.ended) + self.assertIsNone(job.transaction_info) # derived from resource['status'] self.assertIsNone(job.error_result) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 405ad6ee5..0ac1d05b5 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -227,6 +227,20 @@ def test_script_statistics(self): self.assertEqual(stack_frame.end_column, 14) self.assertEqual(stack_frame.text, "QUERY TEXT") + def test_transaction_info(self): + from google.cloud.bigquery.job.base import TransactionInfo + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + assert job.transaction_info is None + + statistics = job._properties["statistics"] = {} + assert job.transaction_info is None + + statistics["transactionInfo"] = {"transactionId": "123-abc-xyz"} + assert isinstance(job.transaction_info, TransactionInfo) + assert job.transaction_info.transaction_id == "123-abc-xyz" + def test_num_child_jobs(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 482f7f3af..d41370520 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -128,6 +128,18 @@ def _verify_dml_stats_resource_properties(self, job, resource): else: assert job.dml_stats is None + def _verify_transaction_info_resource_properties(self, job, resource): + resource_stats = resource.get("statistics", {}) + + if "transactionInfo" in resource_stats: + resource_transaction_info = resource_stats["transactionInfo"] + job_transaction_info = job.transaction_info + assert job_transaction_info.transaction_id == resource_transaction_info.get( + "transactionId" + ) + else: + assert job.transaction_info is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -137,6 +149,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) self._verify_dml_stats_resource_properties(job, resource) + self._verify_transaction_info_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -325,6 +338,22 @@ def test_from_api_repr_with_dml_stats(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_transaction_info(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": {"transactionInfo": {"transactionId": "1a2b-3c4d"}}, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 0dc9c3f55..b8ffdf295 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7614,6 +7614,42 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_unicode_emoji_data_case(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + + emoji = "\U0001F3E6" + json_row = {"emoji": emoji} + json_rows = [json_row] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json(json_rows, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=None, + ) + + sent_data_file = load_table_from_file.mock_calls[0][1][1] + + # make sure json_row's unicode characters are only encoded one time + expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}' + assert sent_data_file.getvalue() == expected_bytes + # Low-level tests @classmethod diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py new file mode 100644 index 000000000..b2095d2f2 --- /dev/null +++ b/tests/unit/test_job_retry.py @@ -0,0 +1,247 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import re + +import mock +import pytest + +import google.api_core.exceptions +import google.api_core.retry + +from .helpers import make_connection + + +# With job_retry_on_query, we're testing 4 scenarios: +# - No `job_retry` passed, retry on default rateLimitExceeded. +# - Pass NotFound retry to `query`. +# - Pass NotFound retry to `result`. +# - Pass BadRequest retry to query, with the value passed to `result` overriding. +@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"]) +@mock.patch("time.sleep") +def test_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + + retry_notfound = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.NotFound + ) + ) + retry_badrequest = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.BadRequest + ) + ) + + if job_retry_on_query is None: + reason = "rateLimitExceeded" + else: + reason = "notFound" + + err = dict(reason=reason) + responses = [ + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=retry_notfound) + elif job_retry_on_query == "Both": + # This will be overridden in `result` + job_retry = dict(job_retry=retry_badrequest) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = ( + dict(job_retry=retry_notfound) + if job_retry_on_query in ("Result", "Both") + else {} + ) + result = job.result(**job_retry) + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # The job adjusts it's job id based on the id of the last attempt. + assert job.job_id != orig_job_id + assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"] + + # We had to sleep three times + assert len(sleep.mock_calls) == 3 + + # Sleeps are random, however they're more than 0 + assert min(c[1][0] for c in sleep.mock_calls) > 0 + + # They're at most 2 * (multiplier**(number of sleeps - 1)) * initial + # The default multiplier is 2 + assert max(c[1][0] for c in sleep.mock_calls) <= 8 + + # We can ask for the result again: + responses = [ + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + orig_job_id = job.job_id + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # We wouldn't (and didn't) fail, because we're dealing with a successful job. + # So the job id hasn't changed. + assert job.job_id == orig_job_id + + +# With job_retry_on_query, we're testing 4 scenarios: +# - Pass None retry to `query`. +# - Pass None retry to `result`. +@pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) +@mock.patch("time.sleep") +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + err = dict(reason="rateLimitExceeded") + responses = [dict(status=dict(state="DONE", errors=[err], errorResult=err))] * 3 + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + response["jobReference"] = data["jobReference"] + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=None) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {} + with pytest.raises(google.api_core.exceptions.Forbidden): + job.result(**job_retry) + + assert job.job_id == orig_job_id + assert len(sleep.mock_calls) == 0 + + +@mock.patch("google.api_core.retry.datetime_helpers") +@mock.patch("time.sleep") +def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client): + """ + If at first you don't succeed, maybe you will later. :) + """ + conn = client._connection = make_connection() + + datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2) + + err = dict(reason="rateLimitExceeded") + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + if calls: + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) + response["jobReference"] = data["jobReference"] + return response + + conn.api_request.side_effect = api_request + + job = client.query("select 1") + orig_job_id = job.job_id + + with pytest.raises(google.api_core.exceptions.RetryError): + job.result() + + # We never got a successful job, so the job id never changed: + assert job.job_id == orig_job_id + + # We failed because we couldn't succeed after 120 seconds. + # But we can try again: + err2 = dict(reason="backendError") # We also retry on this + responses = [ + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn.api_request.side_effect = api_request + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + assert job.job_id != orig_job_id + + +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ), + ): + client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) + + +def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): + client._connection = make_connection({}) + job = client.query("select 42", job_id=42) + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ), + ): + job.result(job_retry=google.api_core.retry.Retry()) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 9483fe8dd..69a6772e5 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import decimal import unittest import mock @@ -430,6 +431,18 @@ def test_positional(self): self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) + def test_ctor_w_scalar_query_parameter_type(self): + from google.cloud.bigquery import enums + + param = self._make_one( + name="foo", + type_=enums.SqlParameterScalarTypes.BIGNUMERIC, + value=decimal.Decimal("123.456"), + ) + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "BIGNUMERIC") + self.assertEqual(param.value, decimal.Decimal("123.456")) + def test_from_api_repr_w_name(self): RESOURCE = { "name": "foo", diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 6fb7f93fd..c7c25e036 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -86,3 +86,27 @@ def test_w_unstructured_bad_gateway(self): exc = BadGateway("testing") self.assertTrue(self._call_fut(exc)) + + +def test_DEFAULT_JOB_RETRY_predicate(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.api_core.exceptions import ClientError + + assert not DEFAULT_JOB_RETRY._predicate(TypeError()) + assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail")) + assert not DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="idk")]) + ) + + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="rateLimitExceeded")]) + ) + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="backendError")]) + ) + + +def test_DEFAULT_JOB_RETRY_deadline(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + + assert DEFAULT_JOB_RETRY._deadline == 600 diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 20336b227..66543bb38 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -99,8 +99,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -108,8 +106,6 @@ def test_ctor_defaults(self): self.assertEqual(table_ref.table_id, "table_1") def test_to_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -121,7 +117,6 @@ def test_to_api_repr(self): ) def test_from_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference dataset_ref = DatasetReference("project_1", "dataset_1") @@ -188,8 +183,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.table_id, "string_table") def test___eq___wrong_type(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset_ref, "table_1") other = object() @@ -197,8 +190,6 @@ def test___eq___wrong_type(self): self.assertEqual(table, mock.ANY) def test___eq___project_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_2", "dataset_1") table = self._make_one(dataset, "table_1") @@ -206,8 +197,6 @@ def test___eq___project_mismatch(self): self.assertNotEqual(table, other) def test___eq___dataset_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_1", "dataset_2") table = self._make_one(dataset, "table_1") @@ -215,24 +204,18 @@ def test___eq___dataset_mismatch(self): self.assertNotEqual(table, other) def test___eq___table_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_2") self.assertNotEqual(table, other) def test___eq___equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_1") self.assertEqual(table, other) def test___hash__set_equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -241,8 +224,6 @@ def test___hash__set_equality(self): self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -278,8 +259,6 @@ def _get_target_class(): return Table def _make_one(self, *args, **kw): - from google.cloud.bigquery.dataset import DatasetReference - if len(args) == 0: dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -565,6 +544,68 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") + def test__eq__wrong_type(self): + table = self._make_one("project_foo.dataset_bar.table_baz") + + class TableWannabe: + pass + + not_a_table = TableWannabe() + not_a_table._properties = table._properties + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table_basic(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + assert table_1 == table_2 + + def test__eq__same_table_multiple_properties(self): + from google.cloud.bigquery import SchemaField + + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.require_partition_filter = True + table_1.labels = {"first": "one", "second": "two"} + + table_1.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.require_partition_filter = True + table_2.labels = {"first": "one", "second": "two"} + table_2.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is table baz" + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.description = "This is also table baz" + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") + + assert table_1 != table_2 + + def test_hashable(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is a table" + + table_1b = self._make_one("project_foo.dataset_bar.table_baz") + table_1b.description = "Metadata is irrelevant for hashes" + + assert hash(table_1) == hash(table_1b) + def test_schema_setter_non_sequence(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1527,6 +1568,148 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) + def test__eq__wrong_type(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table = self._make_one(resource) + + class FakeTableListItem: + project = "project_foo" + dataset_id = "dataset_bar" + table_id = "table_baz" + + not_a_table = FakeTableListItem() + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource) + table_2 = self._make_one(resource) + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_ref_resource = { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + + resource_1 = {"tableReference": table_ref_resource, "friendlyName": "Table One"} + table_1 = self._make_one(resource_1) + + resource_2 = {"tableReference": table_ref_resource, "friendlyName": "Table Two"} + table_2 = self._make_one(resource_2) + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + resource_1 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource_1) + + resource_2 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_quux", + } + } + table_2 = self._make_one(resource_2) + + assert table_1 != table_2 + + def test_hashable(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_item = self._make_one(resource) + table_item_2 = self._make_one(resource) + + assert hash(table_item) == hash(table_item_2) + + +class TestTableClassesInterchangeability: + @staticmethod + def _make_table(*args, **kwargs): + from google.cloud.bigquery.table import Table + + return Table(*args, **kwargs) + + @staticmethod + def _make_table_ref(*args, **kwargs): + from google.cloud.bigquery.table import TableReference + + return TableReference(*args, **kwargs) + + @staticmethod + def _make_table_list_item(*args, **kwargs): + from google.cloud.bigquery.table import TableListItem + + return TableListItem(*args, **kwargs) + + def test_table_eq_table_ref(self): + + table = self._make_table("project_foo.dataset_bar.table_baz") + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + + assert table == table_ref + assert table_ref == table + + def test_table_eq_table_list_item(self): + table = self._make_table("project_foo.dataset_bar.table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table == table_list_item + assert table_list_item == table + + def test_table_ref_eq_table_list_item(self): + + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table_ref == table_list_item + assert table_list_item == table_ref + class TestSnapshotDefinition: @staticmethod From dcd78c7861edd922e1b6009f1e2c6e0fd1a5e995 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Aug 2021 10:32:28 -0500 Subject: [PATCH 05/35] fix!: use nullable `Int64` and `boolean` dtypes in `to_dataframe` (#786) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To override this behavior, specify the types for the desired columns with the `dtype` argument. BREAKING CHANGE: uses Int64 type by default to avoid loss-of-precision in results with large integer values Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes https://issuetracker.google.com/144712110 🦕 Fixes #793 --- docs/conf.py | 1 + docs/usage/pandas.rst | 27 +++- google/cloud/bigquery/_pandas_helpers.py | 37 ++++- google/cloud/bigquery/table.py | 11 +- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- tests/system/test_arrow.py | 5 +- tests/system/test_pandas.py | 72 ++++++++- tests/unit/job/test_query_pandas.py | 22 +-- tests/unit/test_table.py | 8 +- tests/unit/test_table_pandas.py | 192 +++++++++++++++++++++++ 11 files changed, 340 insertions(+), 39 deletions(-) create mode 100644 tests/unit/test_table_pandas.py diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst index 9db98dfbb..40732a298 100644 --- a/docs/usage/pandas.rst +++ b/docs/usage/pandas.rst @@ -14,12 +14,12 @@ First, ensure that the :mod:`pandas` library is installed by running: pip install --upgrade pandas -Alternatively, you can install the BigQuery python client library with +Alternatively, you can install the BigQuery Python client library with :mod:`pandas` by running: .. code-block:: bash - pip install --upgrade google-cloud-bigquery[pandas] + pip install --upgrade 'google-cloud-bigquery[pandas]' To retrieve query results as a :class:`pandas.DataFrame`: @@ -37,6 +37,27 @@ To retrieve table rows as a :class:`pandas.DataFrame`: :start-after: [START bigquery_list_rows_dataframe] :end-before: [END bigquery_list_rows_dataframe] +The following data types are used when creating a pandas DataFrame. + +.. list-table:: Pandas Data Type Mapping + :header-rows: 1 + + * - BigQuery + - pandas + - Notes + * - BOOL + - boolean + - + * - DATETIME + - datetime64[ns], object + - object is used when there are values not representable in pandas + * - FLOAT64 + - float64 + - + * - INT64 + - Int64 + - + Load a Pandas DataFrame to a BigQuery Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -45,7 +66,7 @@ As of version 1.3.0, you can use the to load data from a :class:`pandas.DataFrame` to a :class:`~google.cloud.bigquery.table.Table`. To use this function, in addition to :mod:`pandas`, you will need to install the :mod:`pyarrow` library. You can -install the BigQuery python client library with :mod:`pandas` and +install the BigQuery Python client library with :mod:`pandas` and :mod:`pyarrow` by running: .. code-block:: bash diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 817930ddd..88759bd18 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -18,6 +18,7 @@ import functools import logging import queue +from typing import Dict, Sequence import warnings try: @@ -42,15 +43,19 @@ _LOGGER = logging.getLogger(__name__) -_NO_BQSTORAGE_ERROR = ( - "The google-cloud-bigquery-storage library is not installed, " - "please install google-cloud-bigquery-storage to use bqstorage features." -) - _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. _MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads +# If you update the default dtypes, also update the docs at docs/usage/pandas.rst. +_BQ_TO_PANDAS_DTYPE_NULLSAFE = { + "BOOL": "boolean", + "BOOLEAN": "boolean", + "FLOAT": "float64", + "FLOAT64": "float64", + "INT64": "Int64", + "INTEGER": "Int64", +} _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", @@ -217,6 +222,28 @@ def bq_to_arrow_schema(bq_schema): return pyarrow.schema(arrow_fields) +def bq_schema_to_nullsafe_pandas_dtypes( + bq_schema: Sequence[schema.SchemaField], +) -> Dict[str, str]: + """Return the default dtypes to use for columns in a BigQuery schema. + + Only returns default dtypes which are safe to have NULL values. This + includes Int64, which has pandas.NA values and does not result in + loss-of-precision. + + Returns: + A mapping from column names to pandas dtypes. + """ + dtypes = {} + for bq_field in bq_schema: + if bq_field.mode.upper() not in {"NULLABLE", "REQUIRED"}: + continue + field_type = bq_field.field_type.upper() + if field_type in _BQ_TO_PANDAS_DTYPE_NULLSAFE: + dtypes[bq_field.name] = _BQ_TO_PANDAS_DTYPE_NULLSAFE[field_type] + return dtypes + + def bq_to_arrow_array(series, bq_field): arrow_type = bq_to_arrow_data_type(bq_field) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index dad06deed..4054f37fe 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1933,6 +1933,13 @@ def to_dataframe( bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, ) + default_dtypes = _pandas_helpers.bq_schema_to_nullsafe_pandas_dtypes( + self.schema + ) + + # Let the user-defined dtypes override the default ones. + # https://stackoverflow.com/a/26853961/101923 + dtypes = {**default_dtypes, **dtypes} # When converting timestamp values to nanosecond precision, the result # can be out of pyarrow bounds. To avoid the error when converting to @@ -1954,7 +1961,9 @@ def to_dataframe( extra_kwargs = {"timestamp_as_object": timestamp_as_object} - df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) + df = record_batch.to_pandas( + date_as_object=date_as_object, integer_object_nulls=True, **extra_kwargs + ) for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) diff --git a/setup.py b/setup.py index 5205b5365..6fa619d37 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], - "pandas": ["pandas>=0.23.0"], + "pandas": ["pandas>=1.0.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index ce012f0d7..bf1f89f58 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -13,7 +13,7 @@ grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 -pandas==0.23.0 +pandas==1.0.0 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index f97488e39..7e20dfd7c 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -14,12 +14,9 @@ """System tests for Arrow connector.""" +import pyarrow import pytest -pyarrow = pytest.importorskip( - "pyarrow", minversion="3.0.0" -) # Needs decimal256 for BIGNUMERIC columns. - @pytest.mark.parametrize( ("max_results", "scalars_table_name"), diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 6a96dff62..411c9bed0 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -567,7 +567,7 @@ def test_query_results_to_dataframe(bigquery_client): for _, row in df.iterrows(): for col in column_names: # all the schema fields are nullable, so None is acceptable - if not row[col] is None: + if not pandas.isna(row[col]): assert isinstance(row[col], exp_datatypes[col]) @@ -597,7 +597,7 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client): for index, row in df.iterrows(): for col in column_names: # all the schema fields are nullable, so None is acceptable - if not row[col] is None: + if not pandas.isna(row[col]): assert isinstance(row[col], exp_datatypes[col]) @@ -795,3 +795,71 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) assert len(dataframe.index) == 100 + + +@pytest.mark.parametrize( + ("max_results",), ((None,), (10,),) # Use BQ Storage API. # Use REST API. +) +def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results): + df = bigquery_client.list_rows( + scalars_table, max_results=max_results, + ).to_dataframe() + + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["float64_col"].name == "float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" + + # object is used by default, but we can use "datetime64[ns]" automatically + # when data is within the supported range. + # https://github.com/googleapis/python-bigquery/issues/861 + assert df.dtypes["date_col"].name == "object" + + # object is used by default, but we can use "timedelta64[ns]" automatically + # https://github.com/googleapis/python-bigquery/issues/862 + assert df.dtypes["time_col"].name == "object" + + # decimal.Decimal is used to avoid loss of precision. + assert df.dtypes["bignumeric_col"].name == "object" + assert df.dtypes["numeric_col"].name == "object" + + # pandas uses Python string and bytes objects. + assert df.dtypes["bytes_col"].name == "object" + assert df.dtypes["string_col"].name == "object" + + +@pytest.mark.parametrize( + ("max_results",), ((None,), (10,),) # Use BQ Storage API. # Use REST API. +) +def test_list_rows_nullable_scalars_extreme_dtypes( + bigquery_client, scalars_extreme_table, max_results +): + df = bigquery_client.list_rows( + scalars_extreme_table, max_results=max_results + ).to_dataframe() + + # Extreme values are out-of-bounds for pandas datetime64 values, which use + # nanosecond precision. Values before 1677-09-21 and after 2262-04-11 must + # be represented with object. + # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations + assert df.dtypes["date_col"].name == "object" + assert df.dtypes["datetime_col"].name == "object" + assert df.dtypes["timestamp_col"].name == "object" + + # These pandas dtypes can handle the same ranges as BigQuery. + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["float64_col"].name == "float64" + assert df.dtypes["int64_col"].name == "Int64" + + # object is used by default, but we can use "timedelta64[ns]" automatically + # https://github.com/googleapis/python-bigquery/issues/862 + assert df.dtypes["time_col"].name == "object" + + # decimal.Decimal is used to avoid loss of precision. + assert df.dtypes["numeric_col"].name == "object" + assert df.dtypes["bignumeric_col"].name == "object" + + # pandas uses Python string and bytes objects. + assert df.dtypes["bytes_col"].name == "object" + assert df.dtypes["string_col"].name == "object" diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index e5105974f..c3a9d2d1a 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -20,11 +20,6 @@ import pyarrow import pytest -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None - from google.cloud import bigquery_storage try: @@ -36,6 +31,8 @@ from .helpers import _make_connection from .helpers import _make_job_resource +pandas = pytest.importorskip("pandas") + @pytest.fixture def table_read_options_kwarg(): @@ -78,7 +75,6 @@ def test__contains_order_by(query, expected): assert not mut._contains_order_by(query) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.parametrize( "query", ( @@ -413,7 +409,6 @@ def test_to_arrow_w_tqdm_wo_query_plan(): result_patch_tqdm.assert_called() -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe(): from google.cloud.bigquery.job import QueryJob as target_class @@ -452,7 +447,6 @@ def test_to_dataframe(): assert list(df) == ["name", "age"] # verify the column names -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_ddl_query(): from google.cloud.bigquery.job import QueryJob as target_class @@ -472,7 +466,6 @@ def test_to_dataframe_ddl_query(): assert len(df) == 0 -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -522,7 +515,6 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): ) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class @@ -565,7 +557,6 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -617,15 +608,14 @@ def test_to_dataframe_column_dtypes(): assert list(df) == exp_columns # verify the column names assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" - assert df.seconds.dtype.name == "int64" + assert df.seconds.dtype.name == "Int64" assert df.miles.dtype.name == "float64" assert df.km.dtype.name == "float16" assert df.payment_type.dtype.name == "object" - assert df.complete.dtype.name == "bool" + assert df.complete.dtype.name == "boolean" assert df.date.dtype.name == "object" -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -657,7 +647,6 @@ def test_to_dataframe_column_date_dtypes(): assert df.date.dtype.name == "datetime64[ns]" -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("tqdm.tqdm") def test_to_dataframe_with_progress_bar(tqdm_mock): @@ -685,7 +674,6 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): tqdm_mock.assert_called() -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_dataframe_w_tqdm_pending(): from google.cloud.bigquery import table @@ -741,7 +729,6 @@ def test_to_dataframe_w_tqdm_pending(): ) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_dataframe_w_tqdm(): from google.cloud.bigquery import table @@ -801,7 +788,6 @@ def test_to_dataframe_w_tqdm(): ) -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_dataframe_w_tqdm_max_results(): from google.cloud.bigquery import table diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 66543bb38..44d02f14c 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2761,7 +2761,7 @@ def test_to_dataframe(self): self.assertEqual(len(df), 4) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names self.assertEqual(df.name.dtype.name, "object") - self.assertEqual(df.age.dtype.name, "int64") + self.assertEqual(df.age.dtype.name, "Int64") @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): @@ -3004,7 +3004,7 @@ def test_to_dataframe_w_various_types_nullable(self): self.assertTrue(row.isnull().all()) else: self.assertIsInstance(row.start_timestamp, pandas.Timestamp) - self.assertIsInstance(row.seconds, float) + self.assertIsInstance(row.seconds, int) self.assertIsInstance(row.payment_type, str) self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) @@ -3050,11 +3050,11 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(list(df), exp_columns) # verify the column names self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") - self.assertEqual(df.seconds.dtype.name, "int64") + self.assertEqual(df.seconds.dtype.name, "Int64") self.assertEqual(df.miles.dtype.name, "float64") self.assertEqual(df.km.dtype.name, "float16") self.assertEqual(df.payment_type.dtype.name, "object") - self.assertEqual(df.complete.dtype.name, "bool") + self.assertEqual(df.complete.dtype.name, "boolean") self.assertEqual(df.date.dtype.name, "object") @mock.patch("google.cloud.bigquery.table.pandas", new=None) diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py new file mode 100644 index 000000000..a223e6652 --- /dev/null +++ b/tests/unit/test_table_pandas.py @@ -0,0 +1,192 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +from unittest import mock + +import pyarrow +import pytest + +from google.cloud import bigquery + +pandas = pytest.importorskip("pandas") + + +TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data" + + +@pytest.fixture +def class_under_test(): + from google.cloud.bigquery.table import RowIterator + + return RowIterator + + +def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): + # See tests/system/test_arrow.py for the actual types we get from the API. + arrow_schema = pyarrow.schema( + [ + pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38)), + pyarrow.field("bool_col", pyarrow.bool_()), + pyarrow.field("bytes_col", pyarrow.binary()), + pyarrow.field("date_col", pyarrow.date32()), + pyarrow.field("datetime_col", pyarrow.timestamp("us", tz=None)), + pyarrow.field("float64_col", pyarrow.float64()), + pyarrow.field("int64_col", pyarrow.int64()), + pyarrow.field("numeric_col", pyarrow.decimal128(38, scale=9)), + pyarrow.field("string_col", pyarrow.string()), + pyarrow.field("time_col", pyarrow.time64("us")), + pyarrow.field( + "timestamp_col", pyarrow.timestamp("us", tz=datetime.timezone.utc) + ), + ] + ) + arrow_table = pyarrow.Table.from_pydict( + { + "bignumeric_col": [decimal.Decimal("123.456789101112131415")], + "bool_col": [True], + "bytes_col": [b"Hello,\x00World!"], + "date_col": [datetime.date(2021, 8, 9)], + "datetime_col": [datetime.datetime(2021, 8, 9, 13, 30, 44, 123456)], + "float64_col": [1.25], + "int64_col": [-7], + "numeric_col": [decimal.Decimal("-123.456789")], + "string_col": ["abcdefg"], + "time_col": [datetime.time(14, 21, 17, 123456)], + "timestamp_col": [ + datetime.datetime( + 2021, 8, 9, 13, 30, 44, 123456, tzinfo=datetime.timezone.utc + ) + ], + }, + schema=arrow_schema, + ) + + nullable_schema = [ + bigquery.SchemaField("bignumeric_col", "BIGNUMERIC"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("datetime_col", "DATETIME"), + bigquery.SchemaField("float64_col", "FLOAT"), + bigquery.SchemaField("int64_col", "INT64"), + bigquery.SchemaField("numeric_col", "NUMERIC"), + bigquery.SchemaField("string_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("timestamp_col", "TIMESTAMP"), + ] + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + mock_to_arrow = mock.Mock() + mock_to_arrow.return_value = arrow_table + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema) + monkeypatch.setattr(rows, "to_arrow", mock_to_arrow) + df = rows.to_dataframe() + + # Check for expected dtypes. + # Keep these in sync with tests/system/test_pandas.py + assert df.dtypes["bignumeric_col"].name == "object" + assert df.dtypes["bool_col"].name == "boolean" + assert df.dtypes["bytes_col"].name == "object" + assert df.dtypes["date_col"].name == "object" + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["float64_col"].name == "float64" + assert df.dtypes["int64_col"].name == "Int64" + assert df.dtypes["numeric_col"].name == "object" + assert df.dtypes["string_col"].name == "object" + assert df.dtypes["time_col"].name == "object" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" + + # Check for expected values. + assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") + assert df["bool_col"][0] # True + assert df["bytes_col"][0] == b"Hello,\x00World!" + + # object is used by default, but we can use "datetime64[ns]" automatically + # when data is within the supported range. + # https://github.com/googleapis/python-bigquery/issues/861 + assert df["date_col"][0] == datetime.date(2021, 8, 9) + + assert df["datetime_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456") + assert df["float64_col"][0] == 1.25 + assert df["int64_col"][0] == -7 + assert df["numeric_col"][0] == decimal.Decimal("-123.456789") + assert df["string_col"][0] == "abcdefg" + + # Pandas timedelta64 might be a better choice for pandas time columns. Then + # they can more easily be combined with date columns to form datetimes. + # https://github.com/googleapis/python-bigquery/issues/862 + assert df["time_col"][0] == datetime.time(14, 21, 17, 123456) + + assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z") + + +def test_to_dataframe_nullable_scalars_with_custom_dtypes( + monkeypatch, class_under_test +): + """Passing in explicit dtypes is merged with default behavior.""" + arrow_schema = pyarrow.schema( + [ + pyarrow.field("int64_col", pyarrow.int64()), + pyarrow.field("other_int_col", pyarrow.int64()), + ] + ) + arrow_table = pyarrow.Table.from_pydict( + {"int64_col": [1000], "other_int_col": [-7]}, schema=arrow_schema, + ) + + nullable_schema = [ + bigquery.SchemaField("int64_col", "INT64"), + bigquery.SchemaField("other_int_col", "INT64"), + ] + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + mock_to_arrow = mock.Mock() + mock_to_arrow.return_value = arrow_table + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema) + monkeypatch.setattr(rows, "to_arrow", mock_to_arrow) + df = rows.to_dataframe(dtypes={"other_int_col": "int8"}) + + assert df.dtypes["int64_col"].name == "Int64" + assert df["int64_col"][0] == 1000 + + assert df.dtypes["other_int_col"].name == "int8" + assert df["other_int_col"][0] == -7 + + +def test_to_dataframe_arrays(monkeypatch, class_under_test): + arrow_schema = pyarrow.schema( + [pyarrow.field("int64_repeated", pyarrow.list_(pyarrow.int64()))] + ) + arrow_table = pyarrow.Table.from_pydict( + {"int64_repeated": [[-1, 0, 2]]}, schema=arrow_schema, + ) + + nullable_schema = [ + bigquery.SchemaField("int64_repeated", "INT64", mode="REPEATED"), + ] + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + mock_to_arrow = mock.Mock() + mock_to_arrow.return_value = arrow_table + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, nullable_schema) + monkeypatch.setattr(rows, "to_arrow", mock_to_arrow) + df = rows.to_dataframe() + + assert df.dtypes["int64_repeated"].name == "object" + assert tuple(df["int64_repeated"][0]) == (-1, 0, 2) From 60e73fe9c7338d3aba6b110ca1a8358ef98dee32 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Aug 2021 14:31:21 -0500 Subject: [PATCH 06/35] chore: sync v3 with master branch (#880) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name * fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. * feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 * chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-cloud-bigquery to v2.23.0 (#820) * fix: `insert_rows()` accepts float column values as strings again (#824) * chore: release 2.23.1 (#825) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore: add second protection rule for v3 branch (#828) * chore(deps): update dependency google-cloud-bigquery to v2.23.1 (#827) * test: retry getting rows after streaming them in `test_insert_rows_from_dataframe` (#832) * chore(deps): update dependency pyarrow to v5 (#834) * chore(deps): update dependency google-cloud-bigquery-storage to v2.6.2 (#795) * deps: expand pyarrow pins to support 5.x releases (#833) * chore: release 2.23.2 (#835) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-auth-oauthlib to v0.4.5 (#839) * chore(deps): update dependency google-cloud-bigquery to v2.23.2 (#838) * chore(deps): update dependency google-cloud-testutils to v1 (#845) * chore: require CODEOWNER review and up to date branches (#846) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 * chore: add api-bigquery as a samples owner (#852) * fix: increase default retry deadline to 10 minutes (#859) The backend API has a timeout of 4 minutes, so the default of 2 minutes was not allowing for any retries to happen in some cases. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #853 🦕 * process: add yoshi-python to samples CODEOWNERS (#858) Closes #857. * chore: release 2.23.3 (#860) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast * chore(deps): update dependency google-cloud-bigquery to v2.23.3 (#866) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.23.2` -> `==2.23.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/compatibility-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/confidence-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery ### [`v2.23.3`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2233-httpswwwgithubcomgoogleapispython-bigquerycomparev2232v2233-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[ ] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * feat: add support for transaction statistics (#849) * feat: add support for transaction statistics * Hoist transaction_info into base job class * Add versionadded directive to new property and class * Include new class in docs reference * chore(deps): update dependency google-cloud-bigquery-storage to v2.6.3 (#863) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.2` -> `==2.6.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/compatibility-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/confidence-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery-storage ### [`v2.6.3`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​263-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev262v263-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[x] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * chore: fix INSTALL_LIBRARY_FROM_SOURCE in noxfile.py (#869) Source-Link: https://github.com/googleapis/synthtool/commit/6252f2cd074c38f37b44abe5e96d128733eb1b61 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc Co-authored-by: Owl Bot * feat: make the same `Table*` instances equal to each other (#867) * feat: make the same Table instances equal to each other * Table equality should ignore metadata differences * Compare instances through tableReference property * Make Table instances hashable * Make Table* classes interchangeable If these classes reference the same table, they are now considered equal. * feat: support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor (#850) Follow-up to https://github.com/googleapis/python-bigquery/pull/840/files#r679880582 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) * feat: retry failed query jobs in `result()` (#837) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #539 🦕 Previously, we only retried failed API requests. Now, we retry failed jobs (according to the predicate of the `Retry` object passed to `job.result()`). * fix: make unicode characters working well in load_table_from_json (#865) Co-authored-by: Tim Swast Co-authored-by: Tres Seaver * chore: release 2.24.0 (#868) :robot: I have created a release \*beep\* \*boop\* --- ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) ### Features * add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) * make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) * retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) * support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) ### Bug Fixes * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). * chore(deps): update dependency google-cloud-bigquery to v2.24.0 (#873) * test: refactor `list_rows` tests and add test for scalars (#829) * test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests * chore: drop mention of Python 2.7 from templates (#877) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot * fix: remove pytz dependency and require pyarrow>=3.0.0 (#875) * fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot Co-authored-by: Dina Graves Portman Co-authored-by: Tim Swast * Update google/cloud/bigquery/table.py * tests: avoid INTERVAL columns in pandas tests Co-authored-by: Peter Lamut Co-authored-by: gcf-owl-bot[bot] <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: WhiteSource Renovate Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Jim Fulton Co-authored-by: Grimmer Co-authored-by: Tres Seaver Co-authored-by: Dina Graves Portman --- docs/snippets.py | 5 +- google/cloud/bigquery/table.py | 4 +- samples/client_query_w_timestamp_params.py | 3 +- samples/geography/noxfile.py | 6 +- samples/snippets/noxfile.py | 6 +- .../templates/install_deps.tmpl.rst | 2 +- tests/data/scalars.jsonl | 4 +- tests/data/scalars_extreme.jsonl | 10 +- tests/data/scalars_schema.json | 54 +++++---- tests/system/test_arrow.py | 35 +++++- tests/system/test_client.py | 53 +-------- tests/system/test_list_rows.py | 112 ++++++++++++++++++ tests/system/test_pandas.py | 54 +++++++-- tests/unit/job/test_base.py | 4 +- tests/unit/test__pandas_helpers.py | 13 +- tests/unit/test_client.py | 21 ++-- tests/unit/test_table.py | 6 +- 17 files changed, 268 insertions(+), 124 deletions(-) create mode 100644 tests/system/test_list_rows.py diff --git a/docs/snippets.py b/docs/snippets.py index 82e07901e..18cc6a3b5 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -359,7 +359,6 @@ def test_update_table_expiration(client, to_delete): # [START bigquery_update_table_expiration] import datetime - import pytz # from google.cloud import bigquery # client = bigquery.Client() @@ -371,7 +370,9 @@ def test_update_table_expiration(client, to_delete): assert table.expires is None # set table to expire 5 days from now - expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) table.expires = expiration table = client.update_table(table, ["expires"]) # API request diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 4054f37fe..7387f58c1 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -20,7 +20,6 @@ import datetime import functools import operator -import pytz import typing from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings @@ -1727,7 +1726,6 @@ def to_arrow( .. versionadded:: 1.17.0 """ self._maybe_warn_max_results(bqstorage_client) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1946,7 +1944,7 @@ def to_dataframe( # Pandas, we set the timestamp_as_object parameter to True, if necessary. types_to_check = { pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), + pyarrow.timestamp("us", tz=datetime.timezone.utc), } for column in record_batch: diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index ca8eec0b5..41a27770e 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -18,7 +18,6 @@ def client_query_w_timestamp_params(): # [START bigquery_query_params_timestamps] import datetime - import pytz from google.cloud import bigquery # Construct a BigQuery client object. @@ -30,7 +29,7 @@ def client_query_w_timestamp_params(): bigquery.ScalarQueryParameter( "ts_value", "TIMESTAMP", - datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc), ) ] ) diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index a0406dba8..275d64989 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. +#. Create a virtualenv. Samples are compatible with Python 3.6+. .. code-block:: bash diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl index 4419a6e9a..e06139e5c 100644 --- a/tests/data/scalars.jsonl +++ b/tests/data/scalars.jsonl @@ -1,2 +1,2 @@ -{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl index ceccd8dbc..d0a33fdba 100644 --- a/tests/data/scalars_extreme.jsonl +++ b/tests/data/scalars_extreme.jsonl @@ -1,5 +1,5 @@ -{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} -{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} -{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} -{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json index 00bd150fd..676d37d56 100644 --- a/tests/data/scalars_schema.json +++ b/tests/data/scalars_schema.json @@ -1,33 +1,33 @@ [ { "mode": "NULLABLE", - "name": "timestamp_col", - "type": "TIMESTAMP" + "name": "bool_col", + "type": "BOOLEAN" }, { "mode": "NULLABLE", - "name": "time_col", - "type": "TIME" + "name": "bignumeric_col", + "type": "BIGNUMERIC" }, { "mode": "NULLABLE", - "name": "float64_col", - "type": "FLOAT" + "name": "bytes_col", + "type": "BYTES" }, { "mode": "NULLABLE", - "name": "datetime_col", - "type": "DATETIME" + "name": "date_col", + "type": "DATE" }, { "mode": "NULLABLE", - "name": "bignumeric_col", - "type": "BIGNUMERIC" + "name": "datetime_col", + "type": "DATETIME" }, { "mode": "NULLABLE", - "name": "numeric_col", - "type": "NUMERIC" + "name": "float64_col", + "type": "FLOAT" }, { "mode": "NULLABLE", @@ -36,27 +36,37 @@ }, { "mode": "NULLABLE", - "name": "date_col", - "type": "DATE" + "name": "int64_col", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "string_col", - "type": "STRING" + "name": "interval_col", + "type": "INTERVAL" }, { "mode": "NULLABLE", - "name": "bool_col", - "type": "BOOLEAN" + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "REQUIRED", + "name": "rowindex", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "bytes_col", - "type": "BYTES" + "name": "string_col", + "type": "STRING" }, { "mode": "NULLABLE", - "name": "int64_col", - "type": "INTEGER" + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" } ] diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index 7e20dfd7c..f36dc0944 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -14,9 +14,14 @@ """System tests for Arrow connector.""" +from typing import Optional + import pyarrow import pytest +from google.cloud import bigquery +from google.cloud.bigquery import enums + @pytest.mark.parametrize( ("max_results", "scalars_table_name"), @@ -28,17 +33,35 @@ ), ) def test_list_rows_nullable_scalars_dtypes( - bigquery_client, - scalars_table, - scalars_extreme_table, - max_results, - scalars_table_name, + bigquery_client: bigquery.Client, + scalars_table: str, + scalars_extreme_table: str, + max_results: Optional[int], + scalars_table_name: str, ): table_id = scalars_table if scalars_table_name == "scalars_extreme_table": table_id = scalars_extreme_table + + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, + table_id, max_results=max_results, selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 77832fcac..c6896da14 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1962,6 +1962,11 @@ def test_query_w_query_params(self): "expected": {"friends": [phred_name, bharney_name]}, "query_parameters": [with_friends_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, ] for example in examples: @@ -2406,54 +2411,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def test_list_rows_empty_table(self): - from google.cloud.bigquery.table import RowIterator - - dataset_id = _make_dataset_id("empty_table") - dataset = self.temp_dataset(dataset_id) - table_ref = dataset.table("empty_table") - table = Config.CLIENT.create_table(bigquery.Table(table_ref)) - - # It's a bit silly to list rows for an empty table, but this does - # happen as the result of a DDL query from an IPython magic command. - rows = Config.CLIENT.list_rows(table) - self.assertIsInstance(rows, RowIterator) - self.assertEqual(tuple(rows), ()) - - def test_list_rows_page_size(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - num_items = 7 - page_size = 3 - num_pages, num_last_page = divmod(num_items, page_size) - - SF = bigquery.SchemaField - schema = [SF("string_col", "STRING", mode="NULLABLE")] - to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) - pages = df.pages - - for i in range(num_pages): - page = next(pages) - self.assertEqual(page.num_items, page_size) - page = next(pages) - self.assertEqual(page.num_items, num_last_page) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py new file mode 100644 index 000000000..70388059e --- /dev/null +++ b/tests/system/test_list_rows.py @@ -0,0 +1,112 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str): + from google.cloud.bigquery.table import RowIterator + + table = bigquery_client.create_table(table_id) + + # It's a bit silly to list rows for an empty table, but this does + # happen as the result of a DDL query from an IPython magic command. + rows = bigquery_client.list_rows(table) + assert isinstance(rows, RowIterator) + assert tuple(rows) == () + + +def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str): + num_items = 7 + page_size = 3 + num_pages, num_last_page = divmod(num_items, page_size) + + to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)] + bigquery_client.load_table_from_json(to_insert, table_id).result() + + df = bigquery_client.list_rows( + table_id, + selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)], + page_size=page_size, + ) + pages = df.pages + + for i in range(num_pages): + page = next(pages) + assert page.num_items == page_size + page = next(pages) + assert page.num_items == num_last_page + + +def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str): + rows = sorted( + bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"] + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"Hello, World!" + assert row["date_col"] == datetime.date(2021, 7, 21) + assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) + assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" + assert row["int64_col"] == 123456789 + assert row["numeric_col"] == decimal.Decimal("1.23456789") + assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") + assert row["float64_col"] == 1.25 + assert row["string_col"] == "Hello, World!" + assert row["time_col"] == datetime.time(11, 41, 43, 76160) + assert row["timestamp_col"] == datetime.datetime( + 2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[1] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 1 + else: + assert value is None + + +def test_list_rows_scalars_extreme( + bigquery_client: bigquery.Client, scalars_extreme_table: str +): + rows = sorted( + bigquery_client.list_rows(scalars_extreme_table), + key=lambda row: row["rowindex"], + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"\r\n" + assert row["date_col"] == datetime.date(9999, 12, 31) + assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert row["geography_col"] == "POINT(-135 90)" + assert row["int64_col"] == 9223372036854775807 + assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28") + assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37") + assert row["float64_col"] == float("Inf") + assert row["string_col"] == "Hello, World" + assert row["time_col"] == datetime.time(23, 59, 59, 999999) + assert row["timestamp_col"] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[4] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 4 + else: + assert value is None diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 411c9bed0..2bd496e83 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -24,10 +24,11 @@ import google.api_core.retry import pkg_resources import pytest -import pytz from google.cloud import bigquery +from google.cloud.bigquery import enums from google.cloud import bigquery_storage + from . import helpers @@ -60,7 +61,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ( "dt_col", @@ -349,13 +350,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), ], ), ] - df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) @@ -475,10 +477,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc ), ], ), @@ -801,8 +803,25 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): ("max_results",), ((None,), (10,),) # Use BQ Storage API. # Use REST API. ) def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results): + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + df = bigquery_client.list_rows( - scalars_table, max_results=max_results, + scalars_table, max_results=max_results, selected_fields=schema, ).to_dataframe() assert df.dtypes["bool_col"].name == "boolean" @@ -835,8 +854,25 @@ def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_r def test_list_rows_nullable_scalars_extreme_dtypes( bigquery_client, scalars_extreme_table, max_results ): + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + df = bigquery_client.list_rows( - scalars_extreme_table, max_results=max_results + scalars_extreme_table, max_results=max_results, selected_fields=schema, ).to_dataframe() # Extreme values are out-of-bounds for pandas datetime64 values, which use diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 0ac1d05b5..c3f7854e3 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -295,11 +295,11 @@ def test_user_email(self): @staticmethod def _datetime_and_millis(): import datetime - import pytz from google.cloud._helpers import _millis now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + microsecond=123000, + tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) return now, _millis(now) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 85c507b2a..5a792527a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -33,7 +33,6 @@ import pyarrow import pyarrow.types import pytest -import pytz from google import api_core from google.cloud.bigquery import _helpers @@ -427,10 +426,12 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ( "TIMESTAMP", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), ], ), ( @@ -926,8 +927,8 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field09": [True, False], "field10": [False, True], "field11": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc), ], "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index b8ffdf295..bd07990b8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -29,7 +29,6 @@ import mock import requests import pytest -import pytz import pkg_resources try: @@ -4934,16 +4933,24 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], [1.25, 2.5], ), { "score": 13, "times": [ - datetime.datetime(2018, 12, 2, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 2, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 2, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 2, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], "distances": [-1.25, -2.5], }, @@ -6884,7 +6891,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ] ) @@ -7269,7 +7276,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ("string_col", ["abc", None, "def"]), ("bytes_col", [b"abc", b"def", None]), diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 44d02f14c..0ff2c9258 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -23,7 +23,6 @@ import pyarrow import pyarrow.types import pytest -import pytz import google.api_core.exceptions @@ -898,7 +897,9 @@ def test_mview_last_refresh_time(self): } self.assertEqual( table.mview_last_refresh_time, - datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc), + datetime.datetime( + 2020, 11, 30, 15, 57, 22, 496000, tzinfo=datetime.timezone.utc + ), ) def test_mview_enable_refresh(self): @@ -2779,7 +2780,6 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) tzinfo = datetime.timezone.utc - self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) From 2689df44101cf1d1ca98944e8804525c59f283c7 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 23 Aug 2021 10:49:49 -0400 Subject: [PATCH 07/35] feat: Destination tables are no-longer removed by create_job (#891) --- google/cloud/bigquery/client.py | 2 -- tests/unit/test_client.py | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1a826eb55..93ba2aa11 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -49,7 +49,6 @@ DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, ) -from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none @@ -1937,7 +1936,6 @@ def create_job( ) elif "query" in job_config: copy_config = copy.deepcopy(job_config) - _del_sub_prop(copy_config, ["query", "destinationTable"]) query_job_config = google.cloud.bigquery.job.QueryJobConfig.from_api_repr( copy_config ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index bd07990b8..458798afa 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2594,8 +2594,6 @@ def test_delete_table_w_not_found_ok_true(self): conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) def _create_job_helper(self, job_config): - from google.cloud.bigquery import _helpers - creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) @@ -2606,8 +2604,6 @@ def _create_job_helper(self, job_config): } conn = client._connection = make_connection(RESOURCE) client.create_job(job_config=job_config) - if "query" in job_config: - _helpers._del_sub_prop(job_config, ["query", "destinationTable"]) conn.api_request.assert_called_once_with( method="POST", @@ -2732,7 +2728,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): } data_without_destination = { "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, - "configuration": {"query": {"query": query, "useLegacySql": False}}, + "configuration": configuration, } creds = _make_credentials() From eed311e3f300c3598607b7026bb2f843bd231f5a Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 23 Aug 2021 16:01:49 -0400 Subject: [PATCH 08/35] chore: Simplify create_job slightly (#893) --- google/cloud/bigquery/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 93ba2aa11..e2863e6a5 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1935,11 +1935,10 @@ def create_job( source_type=source_type, ) elif "query" in job_config: - copy_config = copy.deepcopy(job_config) query_job_config = google.cloud.bigquery.job.QueryJobConfig.from_api_repr( - copy_config + job_config ) - query = _get_sub_prop(copy_config, ["query", "query"]) + query = _get_sub_prop(job_config, ["query", "query"]) return self.query( query, job_config=query_job_config, retry=retry, timeout=timeout ) From 2cb1c210b00328161c3dcb09966bc849c99537aa Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 9 Sep 2021 16:25:40 +0200 Subject: [PATCH 09/35] chore: sync v3 branch with main (#947) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name * fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. * feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 * chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-cloud-bigquery to v2.23.0 (#820) * fix: `insert_rows()` accepts float column values as strings again (#824) * chore: release 2.23.1 (#825) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore: add second protection rule for v3 branch (#828) * chore(deps): update dependency google-cloud-bigquery to v2.23.1 (#827) * test: retry getting rows after streaming them in `test_insert_rows_from_dataframe` (#832) * chore(deps): update dependency pyarrow to v5 (#834) * chore(deps): update dependency google-cloud-bigquery-storage to v2.6.2 (#795) * deps: expand pyarrow pins to support 5.x releases (#833) * chore: release 2.23.2 (#835) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-auth-oauthlib to v0.4.5 (#839) * chore(deps): update dependency google-cloud-bigquery to v2.23.2 (#838) * chore(deps): update dependency google-cloud-testutils to v1 (#845) * chore: require CODEOWNER review and up to date branches (#846) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 * chore: add api-bigquery as a samples owner (#852) * fix: increase default retry deadline to 10 minutes (#859) The backend API has a timeout of 4 minutes, so the default of 2 minutes was not allowing for any retries to happen in some cases. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #853 🦕 * process: add yoshi-python to samples CODEOWNERS (#858) Closes #857. * chore: release 2.23.3 (#860) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast * chore(deps): update dependency google-cloud-bigquery to v2.23.3 (#866) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.23.2` -> `==2.23.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/compatibility-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/confidence-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery ### [`v2.23.3`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2233-httpswwwgithubcomgoogleapispython-bigquerycomparev2232v2233-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[ ] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * feat: add support for transaction statistics (#849) * feat: add support for transaction statistics * Hoist transaction_info into base job class * Add versionadded directive to new property and class * Include new class in docs reference * chore(deps): update dependency google-cloud-bigquery-storage to v2.6.3 (#863) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.2` -> `==2.6.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/compatibility-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/confidence-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery-storage ### [`v2.6.3`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​263-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev262v263-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[x] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * chore: fix INSTALL_LIBRARY_FROM_SOURCE in noxfile.py (#869) Source-Link: https://github.com/googleapis/synthtool/commit/6252f2cd074c38f37b44abe5e96d128733eb1b61 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc Co-authored-by: Owl Bot * feat: make the same `Table*` instances equal to each other (#867) * feat: make the same Table instances equal to each other * Table equality should ignore metadata differences * Compare instances through tableReference property * Make Table instances hashable * Make Table* classes interchangeable If these classes reference the same table, they are now considered equal. * feat: support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor (#850) Follow-up to https://github.com/googleapis/python-bigquery/pull/840/files#r679880582 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) * feat: retry failed query jobs in `result()` (#837) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #539 🦕 Previously, we only retried failed API requests. Now, we retry failed jobs (according to the predicate of the `Retry` object passed to `job.result()`). * fix: make unicode characters working well in load_table_from_json (#865) Co-authored-by: Tim Swast Co-authored-by: Tres Seaver * chore: release 2.24.0 (#868) :robot: I have created a release \*beep\* \*boop\* --- ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) ### Features * add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) * make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) * retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) * support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) ### Bug Fixes * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). * chore(deps): update dependency google-cloud-bigquery to v2.24.0 (#873) * test: refactor `list_rows` tests and add test for scalars (#829) * test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests * chore: drop mention of Python 2.7 from templates (#877) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot * fix: remove pytz dependency and require pyarrow>=3.0.0 (#875) * fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot Co-authored-by: Dina Graves Portman Co-authored-by: Tim Swast * chore: release 2.24.1 (#879) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update dependency google-cloud-bigquery to v2.24.1 (#887) * feat: Support using GeoPandas for GEOGRAPHY columns (#848) * test: Add test of datetime and time pandas load (#895) * chore: release 2.25.0 (#898) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Jim Fulton * chore(deps): update dependency numpy to v1.21.2 (#899) * chore(deps): update dependency numpy to v1.21.2 * Update samples/geography/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> * chore(deps): update dependency google-cloud-core to v2 (#904) * fix: use REST API in cell magic when requested (#892) Fixes #876. The `--use_rest_api` option did not work as expected and this commit fixes it. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) * fix: populate default `timeout` and retry after client-side timeout (#896) This addresses internal issue 195337762 where sometimes query job creation can take longer than expected and retrying the API call can be faster than waiting for the first query job request to fail. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #889 Towards https://github.com/googleapis/python-bigquery/issues/779 🦕 * chore(deps): update dependency google-cloud-bigquery to v2.25.0 (#907) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.24.1` -> `==2.25.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/compatibility-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/confidence-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2250-httpswwwgithubcomgoogleapispython-bigquerycomparev2241v2250-2021-08-24) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) ##### Features - Support using GeoPandas for GEOGRAPHY columns ([#​848](https://www.togithub.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) ##### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) ##### Bug Fixes - remove pytz dependency and require pyarrow>=3.0.0 ([#​875](https://www.togithub.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * chore(deps): update dependency pandas to v1.3.2 (#900) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | `==1.1.5` -> `==1.3.2` | [![age](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/compatibility-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/confidence-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
pandas-dev/pandas ### [`v1.3.2`](https://togithub.com/pandas-dev/pandas/releases/v1.3.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.1...v1.3.2) This is a patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.2/whatsnew/v1.3.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.1`](https://togithub.com/pandas-dev/pandas/releases/v1.3.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.0...v1.3.1) This is the first patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.1/whatsnew/v1.3.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.0`](https://togithub.com/pandas-dev/pandas/releases/v1.3.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.5...v1.3.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.0/whatsnew/v1.3.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.5`](https://togithub.com/pandas-dev/pandas/releases/v1.2.5) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.4...v1.2.5) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.5/whatsnew/v1.2.5.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.4`](https://togithub.com/pandas-dev/pandas/releases/v1.2.4) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.3...v1.2.4) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.4/whatsnew/v1.2.4.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.3`](https://togithub.com/pandas-dev/pandas/releases/v1.2.3) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.2...v1.2.3) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.3/whatsnew/v1.2.3.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.2`](https://togithub.com/pandas-dev/pandas/releases/v1.2.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.1...v1.2.2) This is a patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.2/whatsnew/v1.2.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.1`](https://togithub.com/pandas-dev/pandas/releases/v1.2.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.0...v1.2.1) This is the first patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.1/whatsnew/v1.2.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * chore: group all renovate PRs together (#911) This excludes `renovate.json` from templated updates. If this works well, we can update the core templates (perhaps with a configuration option to `py_library`). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 * chore: release 2.25.1 (#912) :robot: I have created a release \*beep\* \*boop\* --- ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) ### Bug Fixes * populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) * use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). * docs: update docstring for bigquery_create_routine sample (#883) (#917) Fixed language issues. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 Co-authored-by: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> * chore: migrate default branch to main (#910) * chore: migrate default branch to main * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Add owlbot replacements to persist changes * Manually apply new replacements from owlbot.py * Move temp replacement rules after s.move() Co-authored-by: Owl Bot * chore: invalid docstrings broke docfx (#924) * chore(deps): update all dependencies (#914) * chore(deps): update all dependencies * Python version modifiers for pyproj Co-authored-by: Tim Swast * fix: error inserting DataFrame with REPEATED field (#925) Co-authored-by: Tim Swast * chore(deps): update all dependencies (#926) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.25.0` -> `==2.25.1` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/compatibility-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.1/confidence-slim/2.25.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-cloud-testutils](https://togithub.com/googleapis/python-test-utils) | `==1.0.0` -> `==1.1.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/compatibility-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-testutils/1.1.0/confidence-slim/1.0.0)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.2` -> `==1.1.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/compatibility-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.3/confidence-slim/1.1.2)](https://docs.renovatebot.com/merge-confidence/) | | [importlib-metadata](https://togithub.com/python/importlib_metadata) | `==4.6.4` -> `==4.8.1` | [![age](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/compatibility-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/importlib-metadata/4.8.1/confidence-slim/4.6.4)](https://docs.renovatebot.com/merge-confidence/) | | [pytest](https://docs.pytest.org/en/latest/) ([source](https://togithub.com/pytest-dev/pytest), [changelog](https://docs.pytest.org/en/stable/changelog.html)) | `==6.2.4` -> `==6.2.5` | [![age](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/compatibility-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pytest/6.2.5/confidence-slim/6.2.4)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://togithub.com/python/typing) | `==3.10.0.0` -> `==3.10.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/compatibility-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/typing-extensions/3.10.0.2/confidence-slim/3.10.0.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2251-httpswwwgithubcomgoogleapispython-bigquerycomparev2250v2251-2021-08-25) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1)
googleapis/python-test-utils ### [`v1.1.0`](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0) [Compare Source](https://togithub.com/googleapis/python-test-utils/compare/v1.0.0...v1.1.0)
googleapis/python-crc32c ### [`v1.1.3`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​113-httpswwwgithubcomgoogleapispython-crc32ccomparev112v113-2021-08-30) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.2...v1.1.3)
python/importlib_metadata ### [`v4.8.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v481) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.8.0...v4.8.1) \====== - [#​348](https://togithub.com/python/importlib_metadata/issues/348): Restored support for `EntryPoint` access by item, deprecating support in the process. Users are advised to use direct member access instead of item-based access:: - ep\[0] -> ep.name - ep\[1] -> ep.value - ep\[2] -> ep.group - ep\[:] -> ep.name, ep.value, ep.group ### [`v4.8.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v480) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.1...v4.8.0) \====== - [#​337](https://togithub.com/python/importlib_metadata/issues/337): Rewrote `EntryPoint` as a simple class, still immutable and still with the attributes, but without any expectation for `namedtuple` functionality such as `_asdict`. ### [`v4.7.1`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v471) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.7.0...v4.7.1) \====== - [#​344](https://togithub.com/python/importlib_metadata/issues/344): Fixed regression in `packages_distributions` when neither top-level.txt nor a files manifest is present. ### [`v4.7.0`](https://togithub.com/python/importlib_metadata/blob/master/CHANGES.rst#v470) [Compare Source](https://togithub.com/python/importlib_metadata/compare/v4.6.4...v4.7.0) \====== - [#​330](https://togithub.com/python/importlib_metadata/issues/330): In `packages_distributions`, now infer top-level names from `.files()` when a `top-level.txt` (Setuptools-specific metadata) is not present.
pytest-dev/pytest ### [`v6.2.5`](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5) [Compare Source](https://togithub.com/pytest-dev/pytest/compare/6.2.4...6.2.5)
python/typing ### [`v3.10.0.2`](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.1...3.10.0.2) ### [`v3.10.0.1`](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1) [Compare Source](https://togithub.com/python/typing/compare/3.10.0.0...3.10.0.1)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * fix: underscores weren't allowed in struct field names when passing parameters to the DB API (#930) * chore: release 2.25.2 (#916) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> * chore(deps): update all dependencies (#928) * fix: guard imports against unsupported pyarrow versions (#934) * fix: guard imports against unsupported pyarrow versions * add unit tests * fix pytype * second try at fixing pytype * feat: set the X-Server-Timeout header when timeout is set (#927) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #919 🦕 * chore: release 2.26.0 (#937) :robot: I have created a release \*beep\* \*boop\* --- ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) ### Features * set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) ### Bug Fixes * guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). * chore(deps): update dependency google-cloud-bigquery to v2.26.0 (#938) * chore: update system tests and samples to use and @google.com email address (#942) * chore: update system tests and samples to use and @google.com email address * Add group prefix * fixed access entry some more * chore(python): rename default branch to main (#935) Source-Link: https://github.com/googleapis/synthtool/commit/5c0fa62eea9c33ebe61e582424b659eb264e1ba4 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d Co-authored-by: Owl Bot Co-authored-by: Tim Swast Co-authored-by: Peter Lamut Co-authored-by: Anthonios Partheniou * chore: Reduce duplicated code betweem tests/unit and tests/unit/job (#940) * chore: Reduce duplicated code betweem tests/unit and tests/unit/job * reuse parent make_client * test: fix routine DDL sample test exits too early (#932) Co-authored-by: Tres Seaver Co-authored-by: Tim Swast * chore(deps): update all dependencies (#939) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.3` -> `==2.7.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/compatibility-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.7.0/confidence-slim/2.6.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-crc32c](https://togithub.com/googleapis/python-crc32c) | `==1.1.3` -> `==1.1.4` | [![age](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/compatibility-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-crc32c/1.1.4/confidence-slim/1.1.3)](https://docs.renovatebot.com/merge-confidence/) | | [google-resumable-media](https://togithub.com/googleapis/google-resumable-media-python) | `==2.0.1` -> `==2.0.2` | [![age](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/compatibility-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-resumable-media/2.0.2/confidence-slim/2.0.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.7.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​270-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev263v270-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.3...v2.7.0) ##### Features - **v1beta2:** Align ReadRows timeout with other versions of the API ([#​293](https://www.togithub.com/googleapis/python-bigquery-storage/issues/293)) ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### Documentation - **v1beta2:** Align session length with public documentation ([43e36a1](https://www.github.com/googleapis/python-bigquery-storage/commit/43e36a13ece8d876763d88bad0252a1b2421c52a)) ##### [2.6.3](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3) (2021-08-06) ##### Bug Fixes - resume read stream on `Unknown` transport-layer exception ([#​263](https://www.togithub.com/googleapis/python-bigquery-storage/issues/263)) ([127caa0](https://www.github.com/googleapis/python-bigquery-storage/commit/127caa06144b9cec04b23914b561be6a264bcb36)) ##### [2.6.2](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.1...v2.6.2) (2021-07-28) ##### Bug Fixes - enable self signed jwt for grpc ([#​249](https://www.togithub.com/googleapis/python-bigquery-storage/issues/249)) ([a7e8d91](https://www.github.com/googleapis/python-bigquery-storage/commit/a7e8d913fc3de67a3f38ecbd35af2f9d1a33aa8d)) ##### Documentation - remove duplicate code samples ([#​246](https://www.togithub.com/googleapis/python-bigquery-storage/issues/246)) ([303f273](https://www.github.com/googleapis/python-bigquery-storage/commit/303f2732ced38e491df92e965dd37bac24a61d2f)) - add Samples section to CONTRIBUTING.rst ([#​241](https://www.togithub.com/googleapis/python-bigquery-storage/issues/241)) ([5d02358](https://www.github.com/googleapis/python-bigquery-storage/commit/5d02358fbd397cafcc1169d829859fe2dd568645)) ##### [2.6.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.0...v2.6.1) (2021-07-20) ##### Bug Fixes - **deps:** pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions ([#​240](https://www.togithub.com/googleapis/python-bigquery-storage/issues/240)) ([8f848e1](https://www.github.com/googleapis/python-bigquery-storage/commit/8f848e18379085160492cdd2d12dc8de50a46c8e)) ##### Documentation - pandas DataFrame samples are more standalone ([#​224](https://www.togithub.com/googleapis/python-bigquery-storage/issues/224)) ([4026997](https://www.github.com/googleapis/python-bigquery-storage/commit/4026997d7a286b63ed2b969c0bd49de59635326d))
googleapis/python-crc32c ### [`v1.1.4`](https://togithub.com/googleapis/python-crc32c/blob/master/CHANGELOG.md#​114-httpswwwgithubcomgoogleapispython-crc32ccomparev114v114-2021-09-02) [Compare Source](https://togithub.com/googleapis/python-crc32c/compare/v1.1.3...v1.1.4)
googleapis/google-resumable-media-python ### [`v2.0.2`](https://togithub.com/googleapis/google-resumable-media-python/blob/master/CHANGELOG.md#​202-httpswwwgithubcomgoogleapisgoogle-resumable-media-pythoncomparev201v202-2021-09-02) [Compare Source](https://togithub.com/googleapis/google-resumable-media-python/compare/v2.0.1...v2.0.2)
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 👻 **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://togithub.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). * Remove unneeded file * Remove unneeded legacy pyarrow import in noxfile Co-authored-by: Tim Swast Co-authored-by: gcf-owl-bot[bot] <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: WhiteSource Renovate Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Jim Fulton Co-authored-by: Grimmer Co-authored-by: Tres Seaver Co-authored-by: Dina Graves Portman Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 2 +- .github/sync-repo-settings.yaml | 8 +- .kokoro/build.sh | 2 +- .kokoro/test-samples-impl.sh | 2 +- CHANGELOG.md | 47 ++++ CONTRIBUTING.rst | 16 +- docs/conf.py | 12 +- docs/usage/pandas.rst | 14 ++ google/cloud/bigquery/_helpers.py | 24 ++ google/cloud/bigquery/_pandas_helpers.py | 79 ++++++- google/cloud/bigquery/client.py | 150 +++++++----- google/cloud/bigquery/dbapi/_helpers.py | 2 +- google/cloud/bigquery/dbapi/cursor.py | 2 +- google/cloud/bigquery/job/copy_.py | 2 +- google/cloud/bigquery/job/extract.py | 2 +- google/cloud/bigquery/job/load.py | 2 +- google/cloud/bigquery/job/query.py | 119 +++++++++- google/cloud/bigquery/magics/magics.py | 12 +- google/cloud/bigquery/retry.py | 8 + google/cloud/bigquery/table.py | 198 +++++++++++++++- google/cloud/bigquery/version.py | 2 +- noxfile.py | 6 +- owlbot.py | 76 +++++- renovate.json | 2 +- samples/create_routine.py | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 51 +++- samples/geography/to_geodataframe.py | 32 +++ samples/geography/to_geodataframe_test.py | 25 ++ samples/snippets/requirements-test.txt | 4 +- samples/snippets/requirements.txt | 8 +- samples/tests/test_routine_samples.py | 1 - samples/update_dataset_access.py | 4 +- setup.py | 1 + testing/constraints-3.6.txt | 2 + tests/system/test_client.py | 3 - tests/system/test_pandas.py | 207 ++++++++++++++++- tests/unit/conftest.py | 19 ++ tests/unit/job/helpers.py | 22 +- tests/unit/job/test_base.py | 13 +- tests/unit/job/test_copy.py | 21 +- tests/unit/job/test_extract.py | 21 +- tests/unit/job/test_load.py | 41 ++-- tests/unit/job/test_query.py | 56 ++--- tests/unit/job/test_query_pandas.py | 168 +++++++++++--- tests/unit/test__helpers.py | 34 +++ tests/unit/test__pandas_helpers.py | 163 +++++++++++-- tests/unit/test_client.py | 269 +++++++++++++--------- tests/unit/test_create_dataset.py | 19 +- tests/unit/test_dbapi__helpers.py | 8 +- tests/unit/test_dbapi_cursor.py | 4 + tests/unit/test_delete_dataset.py | 7 +- tests/unit/test_list_datasets.py | 11 +- tests/unit/test_list_jobs.py | 19 +- tests/unit/test_list_models.py | 12 +- tests/unit/test_list_projects.py | 11 +- tests/unit/test_list_routines.py | 12 +- tests/unit/test_list_tables.py | 16 +- tests/unit/test_magics.py | 26 ++- tests/unit/test_retry.py | 12 + tests/unit/test_table.py | 242 +++++++++++++++++++ 61 files changed, 1903 insertions(+), 454 deletions(-) create mode 100644 samples/geography/to_geodataframe.py create mode 100644 samples/geography/to_geodataframe_test.py diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 649877dc4..c07f148f0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc + digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 8634a3043..6572e5982 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -1,9 +1,9 @@ -# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings -# Rules for master branch protection +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `master` -- pattern: master +# Defaults to `main` +- pattern: main requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 302cc1e1a..4d6a1d0f6 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -41,7 +41,7 @@ python3 -m pip install --upgrade --quiet nox python3 -m nox --version # If this is a continuous build, send the test log to the FlakyBot. -# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then cleanup() { chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 311a8d54b..8a324c9c7 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -80,7 +80,7 @@ for file in samples/**/requirements.txt; do EXIT=$? # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/CHANGELOG.md b/CHANGELOG.md index 83b409015..5a3cb6bee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,53 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) + + +### Features + +* set the X-Server-Timeout header when timeout is set ([#927](https://www.github.com/googleapis/python-bigquery/issues/927)) ([ba02f24](https://www.github.com/googleapis/python-bigquery/commit/ba02f248ba9c449c34859579a4011f4bfd2f4a93)) + + +### Bug Fixes + +* guard imports against unsupported pyarrow versions ([#934](https://www.github.com/googleapis/python-bigquery/issues/934)) ([b289076](https://www.github.com/googleapis/python-bigquery/commit/b28907693bbe889becc1b9c8963f0a7e1ee6c35a)) + +### [2.25.2](https://www.github.com/googleapis/python-bigquery/compare/v2.25.1...v2.25.2) (2021-08-31) + + +### Bug Fixes + +* error inserting DataFrame with REPEATED field ([#925](https://www.github.com/googleapis/python-bigquery/issues/925)) ([656d2fa](https://www.github.com/googleapis/python-bigquery/commit/656d2fa6f870573a21235c83463752a2d084caba)) +* underscores weren't allowed in struct field names when passing parameters to the DB API ([#930](https://www.github.com/googleapis/python-bigquery/issues/930)) ([fcb0bc6](https://www.github.com/googleapis/python-bigquery/commit/fcb0bc68c972c2c98bb8542f54e9228308177ecb)) + + +### Documentation + +* update docstring for bigquery_create_routine sample ([#883](https://www.github.com/googleapis/python-bigquery/issues/883)) ([#917](https://www.github.com/googleapis/python-bigquery/issues/917)) ([e2d12b7](https://www.github.com/googleapis/python-bigquery/commit/e2d12b795ef2dc51b0ee36f1b3000edb1e64ce05)) + +### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) + + +### Bug Fixes + +* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) +* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) + +## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) + + +### Features + +* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) + +### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) + + +### Bug Fixes + +* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e)) + ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 2faf5aed3..8aecf9dd2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -50,9 +50,9 @@ You'll have to create a development environment using a Git checkout: # Configure remotes such that you can pull changes from the googleapis/python-bigquery # repository into your local repository. $ git remote add upstream git@github.com:googleapis/python-bigquery.git - # fetch and merge changes from upstream into master + # fetch and merge changes from upstream into main $ git fetch upstream - $ git merge upstream/master + $ git merge upstream/main Now your local repo is set up such that you will push changes to your GitHub repo, from which you can submit a pull request. @@ -110,12 +110,12 @@ Coding Style variables:: export GOOGLE_CLOUD_TESTING_REMOTE="upstream" - export GOOGLE_CLOUD_TESTING_BRANCH="master" + export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date - version of ``python-bigquery``. The the suggested remote name ``upstream`` - should point to the official ``googleapis`` checkout and the - the branch should be the main branch on that remote (``master``). + version of ``python-bigquery``. The + remote name ``upstream`` should point to the official ``googleapis`` + checkout and the branch should be the default branch on that remote (``main``). - This repository contains configuration for the `pre-commit `__ tool, which automates checking @@ -209,7 +209,7 @@ The `description on PyPI`_ for the project comes directly from the ``README``. Due to the reStructuredText (``rst``) parser used by PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst`` instead of -``https://github.com/googleapis/python-bigquery/blob/master/CONTRIBUTING.rst``) +``https://github.com/googleapis/python-bigquery/blob/main/CONTRIBUTING.rst``) may cause problems creating links or rendering the description. .. _description on PyPI: https://pypi.org/project/google-cloud-bigquery @@ -234,7 +234,7 @@ We support: Supported versions can be found in our ``noxfile.py`` `config`_. -.. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py +.. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py We also explicitly decided to support Python 3 beginning with version 3.6. diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..07e5d8c30 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,8 +76,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = "google-cloud-bigquery" @@ -281,7 +281,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery.tex", "google-cloud-bigquery Documentation", author, @@ -316,7 +316,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", [author], @@ -335,7 +335,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-bigquery", "google-cloud-bigquery Documentation", author, @@ -366,6 +366,8 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst index 40732a298..109259711 100644 --- a/docs/usage/pandas.rst +++ b/docs/usage/pandas.rst @@ -58,6 +58,20 @@ The following data types are used when creating a pandas DataFrame. - Int64 - +Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame +------------------------------------------------------------ + +`GeoPandas `_ adds geospatial analytics +capabilities to Pandas. To retrieve query results containing +GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`: + +.. literalinclude:: ../samples/geography/to_geodataframe.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_geodataframe] + :end-before: [END bigquery_query_results_geodataframe] + + Load a Pandas DataFrame to a BigQuery Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index cb2ce40a3..e7f5bd59b 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -74,7 +74,31 @@ def is_read_session_optional(self) -> bool: return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION +class PyarrowVersions: + """Version comparisons for pyarrow package.""" + + def __init__(self): + self._installed_version = None + + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of pyarrow.""" + if self._installed_version is None: + import pyarrow + + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(pyarrow, "__version__", "0.0.0") + ) + + return self._installed_version + + BQ_STORAGE_VERSIONS = BQStorageVersions() +PYARROW_VERSIONS = PyarrowVersions() def _not_null(value, field): diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 88759bd18..a627f5226 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -25,10 +25,40 @@ import pandas except ImportError: # pragma: NO COVER pandas = None +else: + import numpy import pyarrow import pyarrow.parquet +try: + # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` + from shapely.geometry.base import BaseGeometry as _BaseGeometry +except ImportError: # pragma: NO COVER + # No shapely, use NoneType for _BaseGeometry as a placeholder. + _BaseGeometry = type(None) +else: + if pandas is not None: # pragma: NO COVER + + def _to_wkb(): + # Create a closure that: + # - Adds a not-null check. This allows the returned function to + # be used directly with apply, unlike `shapely.wkb.dumps`. + # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. + # - Caches the WKBWriter (and write method lookup :) ) + # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. + from shapely.geos import WKBWriter, lgeos + + write = WKBWriter(lgeos).write + notnull = pandas.notnull + + def _to_wkb(v): + return write(v) if notnull(v) else v + + return _to_wkb + + _to_wkb = _to_wkb() + try: from google.cloud.bigquery_storage import ArrowSerializationOptions except ImportError: @@ -71,6 +101,7 @@ "uint8": "INTEGER", "uint16": "INTEGER", "uint32": "INTEGER", + "geometry": "GEOGRAPHY", } @@ -191,14 +222,16 @@ def bq_to_arrow_data_type(field): return data_type_constructor() -def bq_to_arrow_field(bq_field): +def bq_to_arrow_field(bq_field, array_type=None): """Return the Arrow field, corresponding to a given BigQuery column. Returns: None: if the Arrow type cannot be determined. """ arrow_type = bq_to_arrow_data_type(bq_field) - if arrow_type: + if arrow_type is not None: + if array_type is not None: + arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) @@ -245,7 +278,24 @@ def bq_schema_to_nullsafe_pandas_dtypes( def bq_to_arrow_array(series, bq_field): - arrow_type = bq_to_arrow_data_type(bq_field) + if bq_field.field_type.upper() == "GEOGRAPHY": + arrow_type = None + first = _first_valid(series) + if first is not None: + if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry): + arrow_type = pyarrow.binary() + # Convert shapey geometry to WKB binary format: + series = series.apply(_to_wkb) + elif isinstance(first, bytes): + arrow_type = pyarrow.binary() + elif series.dtype.name == "geometry": + # We have a GeoSeries containing all nulls, convert it to a pandas series + series = pandas.Series(numpy.array(series)) + + if arrow_type is None: + arrow_type = bq_to_arrow_data_type(bq_field) + else: + arrow_type = bq_to_arrow_data_type(bq_field) field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" @@ -299,6 +349,12 @@ def list_columns_and_indexes(dataframe): return columns_and_indexes +def _first_valid(series): + first_valid_index = series.first_valid_index() + if first_valid_index is not None: + return series.at[first_valid_index] + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -339,6 +395,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Otherwise, try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) + if bq_type is None: + sample_data = _first_valid(dataframe[column]) + if ( + isinstance(sample_data, _BaseGeometry) + and sample_data is not None # Paranoia + ): + bq_type = "GEOGRAPHY" bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) @@ -463,11 +526,11 @@ def dataframe_to_arrow(dataframe, bq_schema): arrow_names = [] arrow_fields = [] for bq_field in bq_schema: - arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) arrow_arrays.append( bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field) ) + arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type)) if all((field is not None for field in arrow_fields)): return pyarrow.Table.from_arrays( @@ -791,7 +854,13 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if pandas.isna(value): + is_nan = pandas.isna(value) + + # isna() can also return an array-like of bools, but the latter's boolean + # value is ambiguous, hence an extra check. An array-like value is *not* + # considered a NaN, however. + if isinstance(is_nan, bool) and is_nan: continue output[column] = value + yield output diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e2863e6a5..a738dd0f3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -62,17 +62,24 @@ from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( + CopyJob, + CopyJobConfig, + ExtractJob, + ExtractJobConfig, + LoadJob, LoadJobConfig, QueryJob, QueryJobConfig, - CopyJobConfig, - ExtractJobConfig, ) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_JOB_RETRY, + DEFAULT_RETRY, + DEFAULT_TIMEOUT, +) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -107,6 +114,8 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 +TIMEOUT_HEADER = "X-Server-Timeout" + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -228,7 +237,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -275,7 +284,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -341,7 +350,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -512,7 +521,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -587,7 +596,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -642,7 +651,7 @@ def create_table( table: Union[str, Table, TableReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -698,23 +707,33 @@ def create_table( return self.get_table(table.reference, retry=retry) def _call_api( - self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs + self, + retry, + span_name=None, + span_attributes=None, + job_ref=None, + headers: Optional[Dict[str, str]] = None, + **kwargs, ): + kwargs = _add_server_timeout_header(headers, kwargs) call = functools.partial(self._connection.api_request, **kwargs) + if retry: call = retry(call) + if span_name is not None: with create_span( name=span_name, attributes=span_attributes, client=self, job_ref=job_ref ): return call() + return call() def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -758,7 +777,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -788,7 +807,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -821,7 +840,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -846,7 +865,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -889,7 +908,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -933,7 +952,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -975,7 +994,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1045,7 +1064,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1109,7 +1128,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1183,7 +1202,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1249,7 +1268,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1326,7 +1345,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1403,7 +1422,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1478,7 +1497,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1537,7 +1556,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1587,12 +1606,12 @@ def delete_model( def delete_job_metadata( self, - job_id, - project=None, - location=None, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, + job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob], + project: Optional[str] = None, + location: Optional[str] = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = DEFAULT_TIMEOUT, + not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1600,26 +1619,20 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: Job or job identifier. Keyword Arguments: - project (Optional[str]): + project: ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): + location: Location where the job was run. Ignored if ``job_id`` is a job object. - retry (Optional[google.api_core.retry.Retry]): + retry: How to retry the RPC. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (Optional[bool]): + not_found_ok: Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1660,7 +1673,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1714,7 +1727,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1767,7 +1780,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1856,7 +1869,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1951,7 +1964,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2025,7 +2038,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2102,7 +2115,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2217,7 +2230,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2301,7 +2314,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2404,7 +2417,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2622,7 +2635,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2905,7 +2918,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3008,7 +3021,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3106,7 +3119,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3388,7 +3401,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3523,7 +3536,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3573,7 +3586,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3685,7 +3698,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See @@ -3988,3 +4001,16 @@ def _get_upload_headers(user_agent): "User-Agent": user_agent, "content-type": "application/json", } + + +def _add_server_timeout_header(headers: Optional[Dict[str, str]], kwargs): + timeout = kwargs.get("timeout") + if timeout is not None: + if headers is None: + headers = {} + headers[TIMEOUT_HEADER] = str(timeout) + + if headers: + kwargs["headers"] = headers + + return kwargs diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 9c134b47c..72e711bcf 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -173,7 +173,7 @@ def _parse_type( \s* (ARRAY|STRUCT|RECORD) # Type \s* - <([A-Z0-9<> ,()]+)> # Subtype(s) + <([A-Z0-9_<> ,()]+)> # Subtype(s) \s*$ """, re.IGNORECASE | re.VERBOSE, diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 587598d5f..b1239ff57 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -494,7 +494,7 @@ def _extract_types( ([^:)]*) # name (?:: # ':' introduces type ( # start of type group - [a-zA-Z0-9<>, ]+ # First part, no parens + [a-zA-Z0-9_<>, ]+ # First part, no parens (?: # start sets of parens + non-paren text \([0-9 ,]+\) # comma-separated groups of digits in parens diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index c6ee98944..f0dd3d668 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -240,7 +240,7 @@ def to_api_repr(self): def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py index 3373bcdef..52aa036c9 100644 --- a/google/cloud/bigquery/job/extract.py +++ b/google/cloud/bigquery/job/extract.py @@ -244,7 +244,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index aee055c1c..b12c3e621 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -800,7 +800,7 @@ def to_api_repr(self): def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation - .. note: + .. note:: This method assumes that the project found in the resource matches the client's project. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index ca3ffb2bf..c07daec99 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -53,6 +53,7 @@ # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. import pandas + import geopandas import pyarrow from google.api_core import retry as retries from google.cloud import bigquery_storage @@ -1482,6 +1483,7 @@ def to_dataframe( create_bqstorage_client: bool = True, date_as_object: bool = True, max_results: Optional[int] = None, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1533,13 +1535,27 @@ def to_dataframe( .. versionadded:: 2.21.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. + pandas.DataFrame: + A :class:`~pandas.DataFrame` populated with row data + and column headers from the query results. The column + headers are derived from the destination table's + schema. Raises: - ValueError: If the `pandas` library cannot be imported. + ValueError: + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( @@ -1548,6 +1564,101 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, date_as_object=date_as_object, + geography_as_object=geography_as_object, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + max_results: Optional[int] = None, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Return a GeoPandas GeoDataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + .. versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + .. versionadded:: 1.26.0 + + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + .. versionadded:: 2.21.0 + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a GeoPandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) + return query_result.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, ) def __iter__(self): diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index dce911232..60670167e 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -671,7 +671,9 @@ def _cell_magic(line, query): _handle_error(ex, args.destination_var) return - result = rows.to_dataframe(bqstorage_client=bqstorage_client) + result = rows.to_dataframe( + bqstorage_client=bqstorage_client, create_bqstorage_client=False, + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return @@ -728,11 +730,15 @@ def _cell_magic(line, query): if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) else: result = query_job.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) if args.destination_var: diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index e9286055c..830582322 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -29,6 +29,7 @@ exceptions.BadGateway, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, + requests.exceptions.Timeout, auth_exceptions.TransportError, ) @@ -59,6 +60,13 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +DEFAULT_TIMEOUT = 5.0 * 60.0 +"""The default API timeout. + +This is the time to wait per request. To adjust the total wait time, set a +deadline on the retry object. +""" + job_retry_reasons = "rateLimitExceeded", "backendError" diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 7387f58c1..c44289324 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -31,6 +31,20 @@ import pyarrow +try: + import geopandas +except ImportError: + geopandas = None +else: + _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" + +try: + import shapely.geos +except ImportError: + shapely = None +else: + _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator @@ -45,17 +59,25 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration if typing.TYPE_CHECKING: # pragma: NO COVER - from google.cloud import bigquery_storage - # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas + import geopandas + from google.cloud import bigquery_storage _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_GEOPANDAS_ERROR = ( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." +) +_NO_SHAPELY_ERROR = ( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." +) _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -1726,6 +1748,7 @@ def to_arrow( .. versionadded:: 1.17.0 """ self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1850,6 +1873,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1904,6 +1928,13 @@ def to_dataframe( .. versionadded:: 1.26.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1912,11 +1943,18 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported. + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) + if geography_as_object and shapely is None: + raise ValueError(_NO_SHAPELY_ERROR) + if dtypes is None: dtypes = {} @@ -1966,8 +2004,136 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) + if geography_as_object: + for field in self.schema: + if field.field_type.upper() == "GEOGRAPHY": + df[field.name] = df[field.name].dropna().apply(_read_wkt) + return df + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_geodataframe() + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Create a GeoPandas GeoDataFrame by loading all pages of a query. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a geopandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + + geography_columns = set( + field.name + for field in self.schema + if field.field_type.upper() == "GEOGRAPHY" + ) + if not geography_columns: + raise TypeError( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ) + + if geography_column: + if geography_column not in geography_columns: + raise ValueError( + f"The given geography column, {geography_column}, doesn't name" + f" a GEOGRAPHY column in the result." + ) + elif len(geography_columns) == 1: + [geography_column] = geography_columns + else: + raise ValueError( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ) + + df = self.to_dataframe( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + return geopandas.GeoDataFrame( + df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column + ) + class _EmptyRowIterator(RowIterator): """An empty row iterator. @@ -2018,6 +2184,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, + geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2035,6 +2202,31 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_geodataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + geography_column: Optional[str] = None, + ) -> "pandas.DataFrame": + """Create an empty dataframe. + + Args: + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + date_as_object (bool): Ignored. Added for compatibility with RowIterator. + + Returns: + pandas.DataFrame: An empty :class:`~pandas.DataFrame`. + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 84f6b4643..1f7d79ab9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.0" +__version__ = "2.26.0" diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..dbf6a163c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -94,7 +94,7 @@ def unit(session): default(session) -@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) def unit_noextras(session): """Run the unit test suite.""" default(session, install_extras=False) @@ -160,10 +160,6 @@ def snippets(session): if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - # Sanity check: Only run snippets tests if the environment variable is set. - if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable.") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) diff --git a/owlbot.py b/owlbot.py index 09845480a..09aa8ca6f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -63,7 +63,7 @@ s.replace( library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", r"type_ ", - "type " + "type ", ) s.move( @@ -78,8 +78,8 @@ "noxfile.py", "setup.py", f"scripts/fixup_bigquery_{library.name}_keywords.py", - f"google/cloud/bigquery/__init__.py", - f"google/cloud/bigquery/py.typed", + "google/cloud/bigquery/__init__.py", + "google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, # thus there's no point in generating it and its tests. f"google/cloud/bigquery_{library.name}/services/**", @@ -97,6 +97,10 @@ samples=True, microgenerator=True, split_system_tests=True, + intersphinx_dependencies={ + "pandas": "http://pandas.pydata.org/pandas-docs/dev", + "geopandas": "https://geopandas.org/", + }, ) # BigQuery has a custom multiprocessing note @@ -109,7 +113,11 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ] + # Group all renovate PRs together. If this works well, remove this and + # update the shared templates (possibly with configuration option to + # py_library.) + "renovate.json", + ], ) # ---------------------------------------------------------------------------- @@ -121,14 +129,14 @@ s.replace( "docs/conf.py", r'\{"members": True\}', - '{"members": True, "inherited-members": True}' + '{"members": True, "inherited-members": True}', ) # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", r'"samples/snippets/README\.rst",', - '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) # ---------------------------------------------------------------------------- @@ -136,13 +144,14 @@ # ---------------------------------------------------------------------------- # Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") +s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") # Add pytype config to setup.cfg s.replace( "setup.cfg", r"universal = 1", - textwrap.dedent(""" \g<0> + textwrap.dedent( + """ \\g<0> [pytype] python_version = 3.8 @@ -156,7 +165,56 @@ # There's some issue with finding some pyi files, thus disabling. # The issue https://github.com/google/pytype/issues/150 is closed, but the # error still occurs for some reason. - pyi-error""") + pyi-error""" + ), +) + + +# Remove the replacements below once +# https://github.com/googleapis/synthtool/pull/1188 is merged + +# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files +s.replace( + ".kokoro/*.sh", + "repo-automation-bots/tree/master", + "repo-automation-bots/tree/main", +) + +# Customize CONTRIBUTING.rst to replace master with main +s.replace( + "CONTRIBUTING.rst", + "fetch and merge changes from upstream into master", + "fetch and merge changes from upstream into main", +) + +s.replace( + "CONTRIBUTING.rst", "git merge upstream/master", "git merge upstream/main", +) + +s.replace( + "CONTRIBUTING.rst", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""", +) + +s.replace( + "CONTRIBUTING.rst", r"remote \(``master``\)", "remote (``main``)", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/CONTRIBUTING.rst", "blob/main/CONTRIBUTING.rst", +) + +s.replace( + "CONTRIBUTING.rst", "blob/master/noxfile.py", "blob/main/noxfile.py", +) + +s.replace( + "docs/conf.py", "master_doc", "root_doc", +) + +s.replace( + "docs/conf.py", "# The master toctree document.", "# The root toctree document.", ) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/renovate.json b/renovate.json index c04895563..713c60bb4 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,6 @@ { "extends": [ - "config:base", ":preserveSemverRanges" + "config:base", "group:all", ":preserveSemverRanges" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { diff --git a/samples/create_routine.py b/samples/create_routine.py index 012c7927a..1cb4a80b4 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -22,7 +22,7 @@ def create_routine(routine_id): # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Choose a fully-qualified ID for the routine. + # TODO(developer): Choose a fully qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" routine = bigquery.Routine( diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index b0cf76724..5d836a5c5 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.4 +pytest==6.2.5 mock==4.0.3 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index dfee339d4..8fb578018 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,51 @@ +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +click==8.0.1 +click-plugins==1.1.1 +cligj==0.7.2 +dataclasses==0.6; python_version < '3.7' +Fiona==1.8.20 geojson==2.5.0 -google-cloud-bigquery==2.24.0 -google-cloud-bigquery-storage==2.6.3 +geopandas==0.9.0 +google-api-core==2.0.1 +google-auth==2.0.2 +google-cloud-bigquery==2.26.0 +google-cloud-bigquery-storage==2.7.0 +google-cloud-core==2.0.0 +google-crc32c==1.1.2 +google-resumable-media==2.0.2 +googleapis-common-protos==1.53.0 +grpcio==1.39.0 +idna==3.2 +importlib-metadata==4.8.1 +libcst==0.3.20 +munch==2.5.0 +mypy-extensions==0.4.3 +numpy==1.19.5; python_version < "3.7" +numpy==1.21.2; python_version > "3.6" +packaging==21.0 +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' +proto-plus==1.19.0 +protobuf==3.17.3 +pyarrow==5.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyparsing==2.4.7 +pyproj==3.0.1; python_version < "3.7" +pyproj==3.1.0; python_version > "3.6" +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +rsa==4.7.2 Shapely==1.7.1 +six==1.16.0 +typing-extensions==3.10.0.2 +typing-inspect==0.7.1 +urllib3==1.26.6 +zipp==3.5.0 diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py new file mode 100644 index 000000000..fa8073fef --- /dev/null +++ b/samples/geography/to_geodataframe.py @@ -0,0 +1,32 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +client = bigquery.Client() + + +def get_austin_service_requests_as_geography(): + # [START bigquery_query_results_geodataframe] + + sql = """ + SELECT created_date, complaint_description, + ST_GEOGPOINT(longitude, latitude) as location + FROM bigquery-public-data.austin_311.311_service_requests + LIMIT 10 + """ + + df = client.query(sql).to_geodataframe() + # [END bigquery_query_results_geodataframe] + return df diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py new file mode 100644 index 000000000..7a2ba6937 --- /dev/null +++ b/samples/geography/to_geodataframe_test.py @@ -0,0 +1,25 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .to_geodataframe import get_austin_service_requests_as_geography + + +def test_get_austin_service_requests_as_geography(): + geopandas = pytest.importorskip("geopandas") + df = get_austin_service_requests_as_geography() + assert isinstance(df, geopandas.GeoDataFrame) + assert len(list(df)) == 3 # verify the number of columns + assert len(df) == 10 # verify the number of rows diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index b8dee50d0..caa48813a 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.0.0 -pytest==6.2.4 +google-cloud-testutils==1.1.0 +pytest==6.2.5 mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 264899dff..e096af157 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,12 +1,12 @@ -google-cloud-bigquery==2.24.0 -google-cloud-bigquery-storage==2.6.3 -google-auth-oauthlib==0.4.5 +google-cloud-bigquery==2.26.0 +google-cloud-bigquery-storage==2.7.0 +google-auth-oauthlib==0.4.6 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.2.0; python_version >= '3.7' +pandas==1.3.2; python_version >= '3.7' pyarrow==5.0.0 pytz==2021.1 diff --git a/samples/tests/test_routine_samples.py b/samples/tests/test_routine_samples.py index 59ec1fae9..c1b0bb5a7 100644 --- a/samples/tests/test_routine_samples.py +++ b/samples/tests/test_routine_samples.py @@ -32,7 +32,6 @@ def test_create_routine_ddl(capsys, random_routine_id, client): out, err = capsys.readouterr() assert "Created routine {}".format(random_routine_id) in out - return routine assert routine.type_ == "SCALAR_FUNCTION" assert routine.language == "SQL" expected_arguments = [ diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py index 6e844cc90..a5c2670e7 100644 --- a/samples/update_dataset_access.py +++ b/samples/update_dataset_access.py @@ -28,8 +28,8 @@ def update_dataset_access(dataset_id): entry = bigquery.AccessEntry( role="READER", - entity_type="userByEmail", - entity_id="sample.bigquery.dev@gmail.com", + entity_type="groupByEmail", + entity_id="cloud-developer-relations@google.com", ) entries = list(dataset.access_entries) diff --git a/setup.py b/setup.py index 6fa619d37..f1464e77a 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], "pandas": ["pandas>=1.0.0"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index bf1f89f58..6e27172b2 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +geopandas==0.9.0 google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 @@ -18,5 +19,6 @@ proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 +Shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c6896da14..4b9868f10 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2351,9 +2351,6 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - def _fetch_dataframe(self, query): - return Config.CLIENT.query(query).result().to_dataframe() - def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 2bd496e83..075d3b680 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -278,8 +278,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 @@ -287,7 +285,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("dt_col", "DATETIME"), bigquery.SchemaField("float_col", "FLOAT"), bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), @@ -312,14 +310,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), ("float_col", [float("-inf"), float("nan"), float("inf")]), ( "geo_col", @@ -899,3 +897,190 @@ def test_list_rows_nullable_scalars_extreme_dtypes( # pandas uses Python string and bytes objects. assert df.dtypes["bytes_col"].name == "object" assert df.dtypes["string_col"].name == "object" + + +def test_upload_time_and_datetime_56(bigquery_client, dataset_id): + df = pandas.DataFrame( + dict( + dt=[ + datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime( + 2020, + 1, + 8, + 8, + 0, + 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-7)), + ), + ], + t=[datetime.time(0, 0, 10, 100001), None], + ) + ) + table = f"{dataset_id}.test_upload_time_and_datetime" + bigquery_client.load_table_from_dataframe(df, table).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [ + datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc), + datetime.time(0, 0, 10, 100001), + ], + [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None], + ] + + from google.cloud.bigquery import job, schema + + table = f"{dataset_id}.test_upload_time_and_datetime_dt" + config = job.LoadJobConfig( + schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")] + ) + + bigquery_client.load_table_from_dataframe(df, table, job_config=config).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)], + [datetime.datetime(2020, 1, 8, 15, 0), None], + ] + + +def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + bigquery_client.query( + f"create table {dataset_id}.lake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.lake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('point(0 1)')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.lake order by name" + ).to_dataframe(geography_as_object=True) + assert list(df["name"]) == ["bar", "baz", "foo"] + assert df["geog"][0] == wkt.loads("point(0 1)") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + + +def test_to_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + from shapely import wkt + + bigquery_client.query( + f"create table {dataset_id}.geolake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.geolake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.geolake order by name" + ).to_geodataframe() + assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + assert isinstance(df, geopandas.GeoDataFrame) + assert isinstance(df["geog"], geopandas.GeoSeries) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" + assert df.crs.name == "WGS 84" + assert df.geog.crs.srs == "EPSG:4326" + assert df.geog.crs.name == "WGS 84" + + +def test_load_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + import pandas + from shapely import wkt + from google.cloud.bigquery.schema import SchemaField + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + + table_id = f"{dataset_id}.lake_from_gp" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo1", "GEOGRAPHY", "NULLABLE"), + SchemaField("geo2", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", None, "POINT(1 1)"], + ["foo", None, None], + ] + + +def test_load_dataframe_w_shapely(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")]) + ) + + table_id = f"{dataset_id}.lake_from_shapes" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] + + bigquery_client.load_table_from_dataframe(df, table_id).result() + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["bar", "POINT(1 1)"], + ["foo", None], + ["foo", None], + ] + + +def test_load_dataframe_w_wkb(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from shapely import wkb + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))]) + ) + + table_id = f"{dataset_id}.lake_from_wkb" + # We create the table first, to inform the interpretation of the wkb data + bigquery_client.query( + f"create table {table_id} (name string, geo GEOGRAPHY)" + ).result() + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 7a67ea6b5..feba65aa5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock import pytest from .helpers import make_client @@ -35,3 +36,21 @@ def DS_ID(): @pytest.fixture def LOCATION(): yield "us-central" + + +def noop_add_server_timeout_header(headers, kwargs): + if headers: + kwargs["headers"] = headers + return kwargs + + +@pytest.fixture(autouse=True) +def disable_add_server_timeout_header(request): + if "enable_add_server_timeout_header" in request.keywords: + yield + else: + with mock.patch( + "google.cloud.bigquery.client._add_server_timeout_header", + noop_add_server_timeout_header, + ): + yield diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index c792214e7..3642c7229 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -14,36 +14,20 @@ import unittest -import mock from google.api_core import exceptions - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) +from ..helpers import make_connection, make_client as __make_client def _make_client(project="test-project", connection=None): - from google.cloud.bigquery.client import Client - + client = __make_client(project) if connection is None: - connection = _make_connection() + connection = make_connection() - client = Client(project=project, credentials=_make_credentials(), _http=object()) client._connection = connection return client -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - def _make_retriable_exception(): return exceptions.TooManyRequests( "retriable exception", errors=[{"reason": "rateLimitExceeded"}] diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index c3f7854e3..aa8e9c045 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -21,8 +21,9 @@ import mock import pytest +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_retriable_exception from .helpers import _make_job_resource @@ -740,7 +741,7 @@ def test_cancel_defaults(self): response = {"job": resource} job = self._set_properties_job() job._properties["jobReference"]["location"] = self.LOCATION - connection = job._client._connection = _make_connection(response) + connection = job._client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -769,7 +770,7 @@ def test_cancel_explicit(self): response = {"job": resource} job = self._set_properties_job() client = _make_client(project=other_project) - connection = client._connection = _make_connection(response) + connection = client._connection = make_connection(response) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -930,7 +931,7 @@ def test_result_default_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( _make_retriable_exception(), begun_job_resource, _make_retriable_exception(), @@ -968,7 +969,7 @@ def test_result_w_retry_wo_state(self): started=True, ended=True, ) - conn = _make_connection( + conn = make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, exceptions.NotFound("not normally retriable"), @@ -1008,7 +1009,7 @@ def test_result_w_retry_wo_state(self): ) def test_result_explicit_w_state(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) # Use _set_properties() instead of directly modifying _properties so diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index 992efcf6b..d94e5bc88 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestCopyJobConfig(_Base): @@ -333,7 +334,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -396,9 +397,9 @@ def test_begin_w_alternate_client(self): "writeDisposition": WriteDisposition.WRITE_TRUNCATE, } RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -427,7 +428,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) @@ -446,9 +447,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -468,7 +469,7 @@ def test_exists_hit_w_alternate_client(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) @@ -488,9 +489,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py index 4c9411d0d..8bada51af 100644 --- a/tests/unit/job/test_extract.py +++ b/tests/unit/job/test_extract.py @@ -14,9 +14,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestExtractJobConfig(_Base): @@ -265,7 +266,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -318,9 +319,9 @@ def test_begin_w_alternate_client(self): "printHeader": False, } RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -353,7 +354,7 @@ def test_begin_w_alternate_client(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client @@ -371,9 +372,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one( self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 @@ -395,7 +396,7 @@ def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) @@ -416,9 +417,9 @@ def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 70e7860a7..cf2096b8b 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -16,9 +16,10 @@ import mock +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestLoadJob(_Base): @@ -238,7 +239,7 @@ def test_result_invokes_begin(self): begun_resource = self._make_resource() done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, done_resource) + connection = make_connection(begun_resource, done_resource) client = _make_client(self.PROJECT) client._connection = connection @@ -421,7 +422,7 @@ def test_from_api_repr_w_properties(self): self._verifyResourceProperties(job, RESOURCE) def test_begin_w_already_running(self): - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties["status"] = {"state": "RUNNING"} @@ -436,7 +437,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) path = "/projects/{}/jobs".format(self.PROJECT) @@ -478,7 +479,7 @@ def test_begin_w_autodetect(self): del resource["etag"] del resource["selfLink"] del resource["user_email"] - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True @@ -559,9 +560,9 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") @@ -611,7 +612,7 @@ def test_begin_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -634,7 +635,7 @@ def test_begin_w_job_reference(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -654,9 +655,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -679,7 +680,7 @@ def test_exists_miss_w_job_reference(self): from google.cloud.bigquery import job job_ref = job._JobReference("my-job-id", "other-project", "US") - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -701,7 +702,7 @@ def test_exists_miss_w_job_reference(self): def test_reload_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -719,9 +720,9 @@ def test_reload_w_bound_client(self): def test_reload_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource() - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -744,7 +745,7 @@ def test_reload_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) + conn = make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -769,7 +770,7 @@ def test_cancel_w_bound_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn = _make_connection(RESPONSE) + conn = make_connection(RESPONSE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( @@ -788,9 +789,9 @@ def test_cancel_w_alternate_client(self): PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) RESOURCE = self._make_resource(ended=True) RESPONSE = {"job": RESOURCE} - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESPONSE) + conn2 = make_connection(RESPONSE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) with mock.patch( @@ -813,7 +814,7 @@ def test_cancel_w_job_reference(self): resource["jobReference"]["projectId"] = "alternative-project" resource["jobReference"]["location"] = "US" job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection({"job": resource}) + conn = make_connection({"job": resource}) client = _make_client(project=self.PROJECT, connection=conn) load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) with mock.patch( diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index d41370520..4c598d797 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -26,9 +26,11 @@ from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query + +from ..helpers import make_connection + from .helpers import _Base from .helpers import _make_client -from .helpers import _make_connection class TestQueryJob(_Base): @@ -943,7 +945,7 @@ def test_result(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection( + conn = make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource ) client = _make_client(self.PROJECT, connection=conn) @@ -1005,7 +1007,7 @@ def test_result_with_done_job_calls_get_query_results(self): "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection(query_resource_done, results_page_resource) + conn = make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1052,7 +1054,7 @@ def test_result_with_max_results(self): {"f": [{"v": "ghi"}]}, ], } - connection = _make_connection(query_resource, query_page_resource) + connection = make_connection(query_resource, query_page_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1096,7 +1098,7 @@ def test_result_w_retry(self): "tableId": "dest_table", } - connection = _make_connection( + connection = make_connection( exceptions.NotFound("not normally retriable"), query_resource, exceptions.NotFound("not normally retriable"), @@ -1144,7 +1146,7 @@ def test_result_w_empty_schema(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": []}, } - connection = _make_connection(query_resource, query_resource) + connection = make_connection(query_resource, query_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1165,7 +1167,7 @@ def test_result_invokes_begins(self): query_resource["jobComplete"] = True done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, incomplete_resource, query_resource, @@ -1196,7 +1198,7 @@ def test_result_w_timeout(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) @@ -1245,7 +1247,7 @@ def test_result_w_page_size(self): ], } query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = _make_connection( + conn = make_connection( query_results_resource, query_page_resource, query_page_resource_2 ) client = _make_client(self.PROJECT, connection=conn) @@ -1303,7 +1305,7 @@ def test_result_with_start_index(self): {"f": [{"v": "jkl"}]}, ], } - connection = _make_connection(query_resource, tabledata_resource) + connection = make_connection(query_resource, tabledata_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -1428,7 +1430,7 @@ def test__begin_w_timeout(self): PATH = "/projects/%s/jobs" % (self.PROJECT,) RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1462,7 +1464,7 @@ def test_begin_w_bound_client(self): del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() @@ -1530,9 +1532,9 @@ def test_begin_w_alternate_client(self): } RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION RESOURCE["configuration"]["dryRun"] = True - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(TABLE) @@ -1588,7 +1590,7 @@ def test_begin_w_udf(self): {"resourceUri": RESOURCE_URI}, {"inlineCode": INLINE_UDF_CODE}, ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) udf_resources = [ UDFResource("resourceUri", RESOURCE_URI), @@ -1647,7 +1649,7 @@ def test_begin_w_named_query_parameter(self): "parameterValue": {"value": "123"}, } ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1695,7 +1697,7 @@ def test_begin_w_positional_query_parameter(self): config["queryParameters"] = [ {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} ] - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) jconfig = QueryJobConfig() jconfig.query_parameters = query_parameters @@ -1774,7 +1776,7 @@ def test_begin_w_table_defs(self): csv_table: CSV_CONFIG_RESOURCE, } want_resource = copy.deepcopy(RESOURCE) - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.table_definitions = {bt_table: bt_config, csv_table: csv_config} @@ -1818,7 +1820,7 @@ def test_dry_run_query(self): del RESOURCE["selfLink"] del RESOURCE["user_email"] RESOURCE["configuration"]["dryRun"] = True - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) config = QueryJobConfig() config.dry_run = True @@ -1846,7 +1848,7 @@ def test_dry_run_query(self): def test_exists_miss_w_bound_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() + conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, self.QUERY, client) with mock.patch( @@ -1862,9 +1864,9 @@ def test_exists_miss_w_bound_client(self): def test_exists_hit_w_alternate_client(self): PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) + conn2 = make_connection({}) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1887,7 +1889,7 @@ def test_reload_w_bound_client(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1919,9 +1921,9 @@ def test_reload_w_alternate_client(self): "datasetId": DS_ID, "tableId": DEST_TABLE, } - conn1 = _make_connection() + conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) + conn2 = make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) job = self._make_one(self.JOB_ID, self.QUERY, client1) with mock.patch( @@ -1945,7 +1947,7 @@ def test_reload_w_timeout(self): DS_ID = "DATASET" DEST_TABLE = "dest_table" RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) + conn = make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) dataset_ref = DatasetReference(self.PROJECT, DS_ID) table_ref = dataset_ref.table(DEST_TABLE) @@ -1975,7 +1977,7 @@ def test_iter(self): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = make_connection(begun_resource, query_resource, done_resource) client = _make_client(project=self.PROJECT, connection=connection) job = self._make_one(self.JOB_ID, self.QUERY, client) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c3a9d2d1a..8e4fba770 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -22,13 +22,26 @@ from google.cloud import bigquery_storage +try: + import pandas +except (ImportError, AttributeError): # pragma: NO COVER + pandas = None +try: + import shapely +except (ImportError, AttributeError): # pragma: NO COVER + shapely = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +from ..helpers import make_connection + from .helpers import _make_client -from .helpers import _make_connection from .helpers import _make_job_resource pandas = pytest.importorskip("pandas") @@ -106,7 +119,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): }, "totalRows": "4", } - connection = _make_connection(get_query_results_resource, job_resource) + connection = make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -187,7 +200,7 @@ def test_to_arrow(): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) @@ -231,7 +244,7 @@ def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery.job import QueryJob as target_class from google.cloud.bigquery.schema import SchemaField - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) begun_resource = _make_job_resource(job_type="query") job = target_class.from_api_repr(begun_resource, client) @@ -277,7 +290,7 @@ def test_to_arrow_w_tqdm_w_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -333,7 +346,7 @@ def test_to_arrow_w_tqdm_w_pending_status(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -384,7 +397,7 @@ def test_to_arrow_w_tqdm_wo_query_plan(): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -409,37 +422,41 @@ def test_to_arrow_w_tqdm_wo_query_plan(): result_patch_tqdm.assert_called() -def test_to_dataframe(): +def _make_job(schema=(), rows=()): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") query_resource = { "jobComplete": True, "jobReference": begun_resource["jobReference"], - "totalRows": "4", + "totalRows": str(len(rows)), "schema": { "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + dict(name=field[0], type=field[1], mode=field[2]) for field in schema ] }, } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } + tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) - job = target_class.from_api_repr(begun_resource, client) + return target_class.from_api_repr(begun_resource, client) + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")), + ( + ("Phred Phlyntstone", "32"), + ("Bharney Rhubble", "33"), + ("Wylma Phlyntstone", "29"), + ("Bhettye Rhubble", "27"), + ), + ) df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) @@ -457,7 +474,7 @@ def test_to_dataframe_ddl_query(): "jobReference": resource["jobReference"], "schema": {"fields": []}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) @@ -481,7 +498,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): ] }, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -525,7 +542,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): "totalRows": "4", "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, } - connection = _make_connection(query_resource) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) @@ -594,7 +611,7 @@ def test_to_dataframe_column_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -633,7 +650,7 @@ def test_to_dataframe_column_date_dtypes(): query_resource["rows"] = rows done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource ) client = _make_client(connection=connection) @@ -661,7 +678,7 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( + connection = make_connection( begun_resource, query_resource, done_resource, query_resource, query_resource, ) client = _make_client(connection=connection) @@ -693,7 +710,7 @@ def test_to_dataframe_w_tqdm_pending(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -748,7 +765,7 @@ def test_to_dataframe_w_tqdm(): {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, ] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -802,7 +819,7 @@ def test_to_dataframe_w_tqdm_max_results(): ] rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}] - connection = _make_connection({}) + connection = make_connection({}) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -835,3 +852,94 @@ def test_to_dataframe_w_tqdm_max_results(): result_patch_tqdm.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(shapely is None, reason="Requires `shapely`") +def test_to_dataframe_geography_as_object(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "float", + ] # float because nan + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_to_geodataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_geodataframe(create_bqstorage_client=False) + + assert isinstance(df, geopandas.GeoDataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "NoneType", + ] # float because nan + assert isinstance(df.geog, geopandas.GeoSeries) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_geodataframe_delegation(wait_for_query): + """ + QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for RowIterator. + """ + import numpy + + job = _make_job() + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + max_results = 42 + geography_column = "g" + + df = job.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + max_results=max_results, + geography_column=geography_column, + ) + + wait_for_query.assert_called_once_with( + job, progress_bar_type, max_results=max_results + ) + row_iterator = wait_for_query.return_value + row_iterator.to_geodataframe.assert_called_once_with( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index cf60cf749..035f04456 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -21,6 +21,12 @@ class TestBQStorageVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + def _object_under_test(self): from google.cloud.bigquery import _helpers @@ -52,6 +58,34 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional +class TestPyarrowVersions(unittest.TestCase): + def tearDown(self): + from google.cloud.bigquery import _helpers + + # Reset any cached versions since it may not match reality. + _helpers.PYARROW_VERSIONS._installed_version = None + + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.PyarrowVersions() + + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("pyarrow.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): from google.cloud.bigquery._helpers import _not_null diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 5a792527a..0140beb77 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -32,14 +32,19 @@ import pyarrow import pyarrow.types + +try: + import geopandas +except ImportError: # pragma: NO COVER + geopandas = None + import pytest from google import api_core +from google.cloud import bigquery_storage from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud import bigquery_storage - PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -566,6 +571,57 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_bq_to_arrow_array_w_geography_dtype(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = geopandas.GeoSeries([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = pandas.Series([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == list(series) + + def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -743,6 +799,41 @@ def test_dataframe_to_json_generator(module_under_test): assert list(rows) == expected +def test_dataframe_to_json_generator_repeated_field(module_under_test): + pytest.importorskip( + "pandas", + minversion=str(PANDAS_MINIUM_VERSION), + reason=( + f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` " + "which introduces pandas.NA" + ), + ) + + df_data = [ + collections.OrderedDict( + [("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")] + ), + collections.OrderedDict( + [ + ("repeated_col", ["a", "b", mock.sentinel.foo, "d"]), + ("not_repeated_col", "second"), + ] + ), + ] + dataframe = pandas.DataFrame(df_data) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + + expected = [ + {"repeated_col": [pandas.NA, 2, None, 4], "not_repeated_col": "first"}, + { + "repeated_col": ["a", "b", mock.sentinel.foo, "d"], + "not_repeated_col": "second", + }, + ] + assert list(rows) == expected + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( @@ -804,7 +895,7 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): def test_dataframe_to_bq_schema_dict_sequence(module_under_test): df_data = collections.OrderedDict( [ - ("str_column", [u"hello", u"world"]), + ("str_column", ["hello", "world"]), ("int_column", [42, 8]), ("bool_column", [True, False]), ] @@ -988,7 +1079,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) arrow_table = module_under_test.dataframe_to_arrow(dataframe, dict_schema) @@ -1002,7 +1093,7 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame(), (schema.SchemaField("not_in_df", "STRING"),), None @@ -1013,7 +1104,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test, monkeypatch): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_parquet_w_missing_fields(module_under_test, monkeypatch): +def test_dataframe_to_parquet_w_missing_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( pandas.DataFrame({"not_in_bq": [1, 2, 3]}), (), None @@ -1046,8 +1137,8 @@ def test_dataframe_to_parquet_compression_method(module_under_test): def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ - {"id": 10, "status": u"FOO", "created_at": datetime.date(2019, 5, 10)}, - {"id": 20, "status": u"BAR", "created_at": datetime.date(2018, 9, 12)}, + {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, ] ) @@ -1075,8 +1166,8 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ - {"struct_field": {"one": 2}, "status": u"FOO"}, - {"struct_field": {"two": u"222"}, "status": u"BAR"}, + {"struct_field": {"one": 2}, "status": "FOO"}, + {"struct_field": {"two": "222"}, "status": "BAR"}, ] ) @@ -1095,6 +1186,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): assert "struct_field" in str(expected_warnings[0]) +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_dataframe_to_bq_schema_geography(module_under_test): + from shapely import wkt + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( + schema.SchemaField("name", "STRING"), + schema.SchemaField("geo1", "GEOGRAPHY"), + schema.SchemaField("geo2", "GEOGRAPHY"), + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( @@ -1107,7 +1220,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "timestamp_field": datetime.datetime(2005, 5, 31, 14, 25, 55), "date_field": datetime.date(2005, 5, 31), "bytes_field": b"some bytes", - "string_field": u"some characters", + "string_field": "some characters", "numeric_field": decimal.Decimal("123.456"), "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } @@ -1166,13 +1279,13 @@ def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( data=[ { - "status": u"FOO", + "status": "FOO", "struct_field": {"one": 1}, - "struct_field_2": {"foo": u"123"}, + "struct_field_2": {"foo": "123"}, }, { - "status": u"BAR", - "struct_field": {"two": u"111"}, + "status": "BAR", + "struct_field": {"two": "111"}, "struct_field_2": {"bar": 27}, }, ] @@ -1206,7 +1319,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): ] dataframe = pandas.DataFrame( - {"field01": [u"hello", u"world"], "field02": [True, False]} + {"field01": ["hello", "world"], "field02": [True, False]} ) write_table_patch = mock.patch.object( @@ -1479,3 +1592,21 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) + + +def test_bq_to_arrow_field_type_override(module_under_test): + # When loading pandas data, we may need to override the type + # decision based on data contents, because GEOGRAPHY data can be + # stored as either text or binary. + + assert ( + module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type + == pyarrow.string() + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + ).type + == pyarrow.binary() + ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 458798afa..4c6ec5b4f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -52,6 +52,7 @@ from google.cloud import bigquery_storage from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection @@ -358,7 +359,7 @@ def test__get_query_results_miss_w_client_location(self): method="GET", path="/projects/PROJECT/queries/nothere", query_params={"maxResults": 0, "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test__get_query_results_hit(self): @@ -419,7 +420,9 @@ def test_get_service_account_email_w_alternate_project(self): service_account_email = client.get_service_account_email(project=project) final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=DEFAULT_TIMEOUT + ) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_custom_retry(self): @@ -687,7 +690,7 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -723,7 +726,7 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -759,11 +762,13 @@ def test_create_routine_w_conflict_exists_ok(self): self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + ), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -839,7 +844,7 @@ def test_create_table_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) @@ -880,7 +885,7 @@ def test_create_table_w_encryption_configuration(self): "labels": {}, "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -916,7 +921,7 @@ def test_create_table_w_day_partition_and_expire(self): "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) @@ -997,7 +1002,7 @@ def test_create_table_w_schema_and_query(self): "view": {"query": query, "useLegacySql": False}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1052,7 +1057,7 @@ def test_create_table_w_external(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1091,7 +1096,7 @@ def test_create_table_w_reference(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1125,7 +1130,7 @@ def test_create_table_w_fully_qualified_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1157,7 +1162,7 @@ def test_create_table_w_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1192,7 +1197,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_table_alreadyexists_w_exists_ok_true(self): @@ -1235,9 +1240,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) @@ -1310,7 +1315,7 @@ def test_get_model_w_string(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None + method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT ) self.assertEqual(got.model_id, self.MODEL_ID) @@ -1419,7 +1424,7 @@ def test_get_table_sets_user_agent(self): "User-Agent": expected_user_agent, }, data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIn("my-application/1.2.3", expected_user_agent) @@ -1717,7 +1722,6 @@ def test_update_dataset(self): "access": ACCESS, }, path="/" + PATH, - headers=None, timeout=7.5, ) self.assertEqual(ds2.description, ds.description) @@ -1761,8 +1765,7 @@ def test_update_dataset_w_custom_property(self): method="PATCH", data={"newAlphaProperty": "unreleased property"}, path=path, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -1820,7 +1823,7 @@ def test_update_model(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_model.model_id, model.model_id) self.assertEqual(updated_model.description, model.description) @@ -1893,7 +1896,6 @@ def test_update_routine(self): method="PUT", data=sent, path="/projects/routines-project/datasets/test_routines/routines/updated_routine", - headers=None, timeout=7.5, ) self.assertEqual(actual_routine.arguments, routine.arguments) @@ -2001,7 +2003,7 @@ def test_update_table(self): "labels": {"x": "y"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5 ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2051,8 +2053,7 @@ def test_update_table_w_custom_property(self): method="PATCH", path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2086,8 +2087,7 @@ def test_update_table_only_use_legacy_sql(self): method="PATCH", path="/%s" % path, data={"view": {"useLegacySql": True}}, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2184,8 +2184,7 @@ def test_update_table_w_query(self): "expirationTime": str(_millis(exp_time)), "schema": schema_resource, }, - headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_update_table_w_schema_None(self): @@ -2314,7 +2313,7 @@ def test_delete_job_metadata_not_found(self): method="DELETE", path="/projects/client-proj/jobs/my-job/delete", query_params={"location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_id(self): @@ -2328,7 +2327,7 @@ def test_delete_job_metadata_with_id(self): method="DELETE", path="/projects/param-proj/jobs/my-job/delete", query_params={"location": "param-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_resource(self): @@ -2353,7 +2352,7 @@ def test_delete_job_metadata_with_resource(self): method="DELETE", path="/projects/job-based-proj/jobs/query_job/delete", query_params={"location": "us-east1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_model(self): @@ -2408,7 +2407,9 @@ def test_delete_model_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_model_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2429,7 +2430,9 @@ def test_delete_model_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_routine(self): from google.cloud.bigquery.routine import Routine @@ -2483,7 +2486,7 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2505,7 +2508,7 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -2569,7 +2572,9 @@ def test_delete_table_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_table_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -2591,7 +2596,9 @@ def test_delete_table_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def _create_job_helper(self, job_config): creds = _make_credentials() @@ -2609,7 +2616,7 @@ def _create_job_helper(self, job_config): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_job_load_config(self): @@ -2758,7 +2765,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): method="POST", path="/projects/PROJECT/jobs", data=data_without_destination, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ) @@ -2798,7 +2805,7 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_miss_w_client_location(self): @@ -2816,7 +2823,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_hit_w_timeout(self): @@ -2885,7 +2892,7 @@ def test_cancel_job_miss_w_explict_project(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_miss_w_client_location(self): @@ -2904,7 +2911,7 @@ def test_cancel_job_miss_w_client_location(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_hit(self): @@ -2940,7 +2947,7 @@ def test_cancel_job_hit(self): method="POST", path="/projects/job-based-proj/jobs/query_job/cancel", query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_w_timeout(self): @@ -3066,7 +3073,7 @@ def test_load_table_from_uri_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_client_location(self): @@ -3110,7 +3117,7 @@ def test_load_table_from_uri_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_invalid_job_config(self): @@ -3398,7 +3405,7 @@ def test_copy_table_w_multiple_sources(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=expected_resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -3460,7 +3467,7 @@ def test_copy_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_client_location(self): @@ -3510,7 +3517,7 @@ def test_copy_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_source_strings(self): @@ -3603,7 +3610,7 @@ def test_copy_table_w_valid_job_config(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job._configuration, CopyJobConfig) @@ -3709,7 +3716,7 @@ def test_extract_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_w_client_location(self): @@ -3753,7 +3760,7 @@ def test_extract_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_generated_job_id(self): @@ -3796,7 +3803,7 @@ def test_extract_table_generated_job_id(self): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3841,7 +3848,7 @@ def test_extract_table_w_destination_uris(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -4011,7 +4018,7 @@ def test_query_defaults(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4064,7 +4071,7 @@ def test_query_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_explicit_job_config(self): @@ -4120,7 +4127,10 @@ def test_query_w_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4164,7 +4174,10 @@ def test_query_preserving_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4216,7 +4229,10 @@ def test_query_preserving_explicit_default_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original default config object should not have been modified @@ -4301,7 +4317,10 @@ def test_query_w_explicit_job_config_override(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_client_default_config_no_incoming(self): @@ -4342,7 +4361,10 @@ def test_query_w_client_default_config_no_incoming(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_invalid_default_job_config(self): @@ -4387,7 +4409,7 @@ def test_query_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_detect_location(self): @@ -4458,7 +4480,7 @@ def test_query_w_udf_resources(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4514,7 +4536,7 @@ def test_query_w_query_parameters(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertEqual(sent["jobReference"]["jobId"], JOB) sent_config = sent["configuration"]["query"] @@ -4706,7 +4728,7 @@ def _row_data(row): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/%s" % PATH) self.assertEqual(req["data"], SENT) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -4774,7 +4796,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_list_of_Rows(self): @@ -4819,7 +4841,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): @@ -4896,7 +4918,7 @@ def _row_data(row): errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] ) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_repeated_fields(self): @@ -4997,7 +5019,7 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None, + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5063,7 +5085,7 @@ def test_insert_rows_w_record_schema(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_explicit_none_insert_ids(self): @@ -5097,7 +5119,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=None, + method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5181,7 +5203,7 @@ def test_insert_rows_w_numeric(self): project, ds_id, table_id ), data=sent, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5373,7 +5395,10 @@ def test_insert_rows_from_dataframe_many_columns(self): ] } expected_call = mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) actual_calls = conn.api_request.call_args_list @@ -5426,7 +5451,10 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 assert actual_calls[0] == mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_default_behavior(self): @@ -5506,7 +5534,7 @@ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): @@ -5536,7 +5564,7 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_iterator_row_ids(self): @@ -5563,7 +5591,7 @@ def test_insert_rows_json_with_iterator_row_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_non_iterable_row_ids(self): @@ -5616,7 +5644,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_none_insert_ids_sequence(self): @@ -5655,7 +5683,7 @@ def test_insert_rows_json_w_none_insert_ids_sequence(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_wrong_arg(self): @@ -5850,7 +5878,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -5860,7 +5888,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -6011,7 +6039,7 @@ def test_list_rows_repeated_fields(self): "selectedFields": "color,struct", "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_w_record_schema(self): @@ -6081,7 +6109,7 @@ def test_list_rows_w_record_schema(self): method="GET", path="/%s" % PATH, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_with_missing_schema(self): @@ -6136,7 +6164,7 @@ def test_list_rows_with_missing_schema(self): row_iter = client.list_rows(table) conn.api_request.assert_called_once_with( - method="GET", path=table_path, timeout=None + method="GET", path=table_path, timeout=DEFAULT_TIMEOUT ) conn.api_request.reset_mock() self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) @@ -6146,7 +6174,7 @@ def test_list_rows_with_missing_schema(self): method="GET", path=tabledata_path, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) @@ -6319,7 +6347,7 @@ def test_load_table_from_file_resumable(self): file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6352,7 +6380,7 @@ def test_load_table_from_file_w_explicit_project(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6386,7 +6414,7 @@ def test_load_table_from_file_w_client_location(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6448,7 +6476,7 @@ def test_load_table_from_file_resumable_metadata(self): file_obj, expected_config, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6477,7 +6505,7 @@ def test_load_table_from_file_multipart(self): self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.PROJECT, ) @@ -6502,7 +6530,7 @@ def test_load_table_from_file_with_retries(self): file_obj, self.EXPECTED_CONFIGURATION, num_retries, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6539,7 +6567,7 @@ def test_load_table_from_file_with_readable_gzip(self): gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6661,7 +6689,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6718,7 +6746,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6771,7 +6799,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6826,7 +6854,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6918,7 +6946,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6983,7 +7011,7 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7037,7 +7065,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7083,7 +7111,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf( @@ -7124,7 +7152,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7171,7 +7199,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7232,7 +7260,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7306,7 +7334,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7427,7 +7455,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7493,7 +7521,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7531,7 +7559,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7584,7 +7612,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7644,7 +7672,7 @@ def test_load_table_from_json_unicode_emoji_data_case(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_data_file = load_table_from_file.mock_calls[0][1][1] @@ -7974,3 +8002,20 @@ def transmit_next_chunk(transport): chunk_size = RU.call_args_list[0][0][1] assert chunk_size == 100 * (1 << 20) + + +@pytest.mark.enable_add_server_timeout_header +@pytest.mark.parametrize("headers", [None, {}]) +def test__call_api_add_server_timeout_w_timeout(client, headers): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/", headers=headers, timeout=42) + client._connection.api_request.assert_called_with( + method="GET", path="/", timeout=42, headers={"X-Server-Timeout": "42"} + ) + + +@pytest.mark.enable_add_server_timeout_header +def test__call_api_no_add_server_timeout_wo_timeout(client): + client._connection = make_connection({}) + client._call_api(None, method="GET", path="/") + client._connection.api_request.assert_called_with(method="GET", path="/") diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index d07aaed4f..67b21225d 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -15,6 +15,7 @@ from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import mock import pytest @@ -111,7 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "labels": LABELS, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -143,7 +144,7 @@ def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -176,7 +177,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LO "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -211,7 +212,7 @@ def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOC "labels": {}, "location": OTHER_LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -241,7 +242,7 @@ def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -271,7 +272,7 @@ def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -306,7 +307,7 @@ def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -356,8 +357,8 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 4afc47b6c..84c74eeec 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -606,8 +606,8 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameters,parameter_types,expect", [ ( - [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], - ["ARRAY", "struct"], + [[], dict(name="ch1", b_date=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], [ { "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, @@ -617,13 +617,13 @@ def test_complex_query_parameter_type_errors(type_, value, expect): "parameterType": { "structTypes": [ {"name": "name", "type": {"type": "STRING"}}, - {"name": "bdate", "type": {"type": "DATE"}}, + {"name": "b_date", "type": {"type": "DATE"}}, ], "type": "STRUCT", }, "parameterValue": { "structValues": { - "bdate": {"value": "2021-01-01"}, + "b_date": {"value": "2021-01-01"}, "name": {"value": "ch1"}, } }, diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index f075bb6f7..07bce986f 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -785,6 +785,10 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), ( "values(%%%%%(foo:struct)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py index 3a65e031c..b48beb147 100644 --- a/tests/unit/test_delete_dataset.py +++ b/tests/unit/test_delete_dataset.py @@ -14,6 +14,7 @@ from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import pytest @@ -40,7 +41,7 @@ def test_delete_dataset_delete_contents( method="DELETE", path="/%s" % PATH, query_params={"deleteContents": "true"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -61,7 +62,7 @@ def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): client.delete_dataset(DS_ID) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -74,5 +75,5 @@ def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): ) client.delete_dataset(DS_ID, not_found_ok=True) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py index 7793a7ba6..6f0b55c5e 100644 --- a/tests/unit/test_list_datasets.py +++ b/tests/unit/test_list_datasets.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -65,7 +66,7 @@ def test_list_datasets_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -120,5 +121,5 @@ def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): "maxResults": 3, "pageToken": TOKEN, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py index f348be724..1fb40d446 100644 --- a/tests/unit/test_list_jobs.py +++ b/tests/unit/test_list_jobs.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -136,7 +137,7 @@ def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): method="GET", path="/%s" % PATH, query_params=dict({"projection": "full"}, **query), - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -185,7 +186,7 @@ def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -220,7 +221,7 @@ def test_list_jobs_explicit_missing(client, PROJECT): "allUsers": True, "stateFilter": "done", }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -233,7 +234,7 @@ def test_list_jobs_w_project(client, PROJECT): method="GET", path="/projects/other-project/jobs", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -269,7 +270,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): "minCreationTime": "1", "maxCreationTime": str(end_time_millis), }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -286,6 +287,6 @@ def test_list_jobs_w_parent_job_filter(client, PROJECT): method="GET", path="/projects/%s/jobs" % PROJECT, query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 4ede9a7dd..b14852338 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) @@ -82,7 +84,7 @@ def test_list_models_defaults( assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py index a88540dd5..190612b44 100644 --- a/tests/unit/test_list_projects.py +++ b/tests/unit/test_list_projects.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -66,7 +67,7 @@ def test_list_projects_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params=query, timeout=None + method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -115,5 +116,5 @@ def test_list_projects_explicit_response_missing_projects_key(client): method="GET", path="/projects", query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 069966542..80e62d6bd 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_routines_empty_w_timeout(client): conn = client._connection = make_connection({}) @@ -85,7 +87,7 @@ def test_list_routines_defaults( assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params=query, timeout=None + method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 45d15bed3..8360f6605 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -1,21 +1,23 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic -import google.cloud.bigquery.dataset import pytest +import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + @dataset_polymorphic def test_list_tables_empty_w_timeout( @@ -89,7 +91,7 @@ def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -150,7 +152,7 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index bb3a8d1fd..01c213e98 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -32,6 +32,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.magics import magics +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -172,7 +173,7 @@ def test_context_with_default_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -236,7 +237,7 @@ def test_context_with_custom_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -593,7 +594,9 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -635,7 +638,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type="tqdm" + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -689,7 +694,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) - query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.return_value.to_dataframe.assert_called_once_with( + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=mock.ANY, + ) @pytest.mark.usefixtures("ipython_interactive") @@ -858,7 +868,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, ) @@ -1175,7 +1185,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=magics.context.progress_bar_type, ) assert isinstance(return_value, pandas.DataFrame) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index c7c25e036..e0a992f78 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -55,6 +55,18 @@ def test_w_unstructured_requests_chunked_encoding_error(self): exc = requests.exceptions.ChunkedEncodingError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_connecttimeout(self): + exc = requests.exceptions.ConnectTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_readtimeout(self): + exc = requests.exceptions.ReadTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_timeout(self): + exc = requests.exceptions.Timeout() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 0ff2c9258..ed9ed5d0f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -14,6 +14,7 @@ import datetime import logging +import re import time import types import unittest @@ -36,6 +37,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None + try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -1826,6 +1832,27 @@ def test_to_dataframe_iterable(self): self.assertEqual(len(df), 0) # Verify the number of rows. self.assertEqual(len(df.columns), 0) + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_if_geopandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one() + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + class TestRowIterator(unittest.TestCase): def _class_under_test(self): @@ -1863,6 +1890,16 @@ def _make_one( client, api_request, path, schema, table=table, **kwargs ) + def _make_one_from_data(self, schema=(), rows=()): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField(*a) for a in schema] + rows = [{"f": [{"v": v} for v in row]} for row in rows] + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + return self._make_one(_mock_client(), api_request, path, schema) + def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start @@ -3076,6 +3113,18 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.shapely", new=None) + def test_to_dataframe_error_if_shapely_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." + ), + ): + self._make_one_from_data().to_dataframe(geography_as_object=True) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -3790,6 +3839,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_dataframe_geography_as_object(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_dataframe( + create_bqstorage_client=False, geography_as_object=True, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "object") + self.assertIsInstance(df.geog, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"] + ) + + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_error_if_geopandas_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + self._make_one_from_data().to_geodataframe() + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + self.assertEqual(df.geog.crs.srs, "EPSG:4326") + self.assertEqual(df.geog.crs.name, "WGS 84") + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_ambiguous_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_bad_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "The given geography column, xxx, doesn't name" + " a GEOGRAPHY column in the result." + ), + ): + row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="xxx" + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_no_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "STRING")), () + ) + with self.assertRaisesRegex( + TypeError, + re.escape( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_w_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)", "Point(1 1)"), + ("bar", None, "Point(2 2)"), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"), + ), + ) + df = row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="geog" + ) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertEqual(df.geog2.dtype.name, "object") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] + ) + + # Geog2 isn't a GeoSeries, but it contains geomentries: + self.assertIsInstance(df.geog2, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] + ) + # and can easily be converted to a GeoSeries + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") + def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): + """ + RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for to_dataframe. + """ + import numpy + from shapely import wkt + + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("g", "GEOGRAPHY")) + ) + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + geography_column = "g" + + to_dataframe.return_value = pandas.DataFrame( + dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + ) + + df = row_iterator.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + + to_dataframe.assert_called_once_with( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(list(df), ["name", "g"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.g.dtype.name, "geometry") + self.assertIsInstance(df.g, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From a7842b6f66e016e489a39b1417fda35e9e98cb97 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 27 Sep 2021 23:50:25 +0200 Subject: [PATCH 10/35] chore!: remove google.cloud.bigquery_v2 code (#855) Closes #814. The first preview. Still need to address Model types, disable code generation, BigQuery ML classes... **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- .../types.rst => bigquery/standard_sql.rst} | 2 +- docs/conf.py | 3 - docs/reference.rst | 4 +- google/cloud/bigquery/__init__.py | 15 +- google/cloud/bigquery/dbapi/_helpers.py | 6 +- google/cloud/bigquery/enums.py | 99 +- google/cloud/bigquery/model.py | 263 ++- google/cloud/bigquery/query.py | 24 +- google/cloud/bigquery/routine/routine.py | 40 +- google/cloud/bigquery/schema.py | 84 +- google/cloud/bigquery/standard_sql.py | 363 ++++ google/cloud/bigquery_v2/__init__.py | 46 - google/cloud/bigquery_v2/gapic_metadata.json | 63 - google/cloud/bigquery_v2/py.typed | 2 - google/cloud/bigquery_v2/types/__init__.py | 48 - .../bigquery_v2/types/encryption_config.py | 42 - google/cloud/bigquery_v2/types/model.py | 1507 ----------------- .../bigquery_v2/types/model_reference.py | 44 - .../cloud/bigquery_v2/types/standard_sql.py | 117 -- .../bigquery_v2/types/table_reference.py | 58 - owlbot.py | 128 +- samples/create_routine.py | 5 +- samples/tests/conftest.py | 5 +- samples/tests/test_routine_samples.py | 25 +- setup.cfg | 1 - setup.py | 2 - testing/constraints-3.6.txt | 2 - tests/system/test_client.py | 26 +- tests/unit/enums/__init__.py | 13 - .../enums/test_standard_sql_data_types.py | 76 - tests/unit/gapic/__init__.py | 15 - tests/unit/model/test_model.py | 62 +- tests/unit/routine/test_routine.py | 41 +- tests/unit/routine/test_routine_argument.py | 14 +- tests/unit/test_client.py | 8 +- tests/unit/test_dbapi__helpers.py | 6 +- tests/unit/test_query.py | 4 +- tests/unit/test_schema.py | 104 +- tests/unit/test_standard_sql_types.py | 588 +++++++ 39 files changed, 1327 insertions(+), 2628 deletions(-) rename docs/{bigquery_v2/types.rst => bigquery/standard_sql.rst} (72%) create mode 100644 google/cloud/bigquery/standard_sql.py delete mode 100644 google/cloud/bigquery_v2/__init__.py delete mode 100644 google/cloud/bigquery_v2/gapic_metadata.json delete mode 100644 google/cloud/bigquery_v2/py.typed delete mode 100644 google/cloud/bigquery_v2/types/__init__.py delete mode 100644 google/cloud/bigquery_v2/types/encryption_config.py delete mode 100644 google/cloud/bigquery_v2/types/model.py delete mode 100644 google/cloud/bigquery_v2/types/model_reference.py delete mode 100644 google/cloud/bigquery_v2/types/standard_sql.py delete mode 100644 google/cloud/bigquery_v2/types/table_reference.py delete mode 100644 tests/unit/enums/__init__.py delete mode 100644 tests/unit/enums/test_standard_sql_data_types.py delete mode 100644 tests/unit/gapic/__init__.py create mode 100644 tests/unit/test_standard_sql_types.py diff --git a/docs/bigquery_v2/types.rst b/docs/bigquery/standard_sql.rst similarity index 72% rename from docs/bigquery_v2/types.rst rename to docs/bigquery/standard_sql.rst index c36a83e0b..bd52bb78f 100644 --- a/docs/bigquery_v2/types.rst +++ b/docs/bigquery/standard_sql.rst @@ -1,7 +1,7 @@ Types for Google Cloud Bigquery v2 API ====================================== -.. automodule:: google.cloud.bigquery_v2.types +.. automodule:: google.cloud.bigquery.standard_sql :members: :undoc-members: :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 07e5d8c30..b8ddbd8c8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -114,7 +114,6 @@ "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", - "bigquery_v2/services.rst", # generated by the code generator ] # The reST default role (used for this markup: `text`) to use for all @@ -364,8 +363,6 @@ "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), - "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), - "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/reference.rst b/docs/reference.rst index d8738e67b..128dee718 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -197,9 +197,9 @@ Encryption Configuration Additional Types ================ -Protocol buffer classes for working with the Models API. +Helper SQL type classes. .. toctree:: :maxdepth: 2 - bigquery_v2/types + bigquery/standard_sql diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 5529f9b2e..660a660b4 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -41,7 +41,7 @@ from google.cloud.bigquery.enums import DecimalTargetType from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames -from google.cloud.bigquery.enums import StandardSqlDataTypes +from google.cloud.bigquery.enums import StandardSqlTypeNames from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -77,6 +77,7 @@ from google.cloud.bigquery.query import ArrayQueryParameterType from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import ScalarQueryParameterType +from google.cloud.bigquery.query import SqlParameterScalarTypes from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import StructQueryParameterType from google.cloud.bigquery.query import UDFResource @@ -87,6 +88,10 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.standard_sql import StandardSqlDataType +from google.cloud.bigquery.standard_sql import StandardSqlField +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery.standard_sql import StandardSqlTableType from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row @@ -108,6 +113,7 @@ "StructQueryParameter", "ArrayQueryParameterType", "ScalarQueryParameterType", + "SqlParameterScalarTypes", "StructQueryParameterType", # Datasets "Dataset", @@ -151,6 +157,11 @@ "ScriptOptions", "TransactionInfo", "DEFAULT_RETRY", + # Standard SQL types + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", # Enum Constants "enums", "AutoRowIDs", @@ -168,7 +179,7 @@ "SchemaUpdateOption", "SourceFormat", "SqlTypeNames", - "StandardSqlDataTypes", + "StandardSqlTypeNames", "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 72e711bcf..c2daf4076 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -22,7 +22,7 @@ import typing from google.cloud import bigquery -from google.cloud.bigquery import table, enums, query +from google.cloud.bigquery import table, query from google.cloud.bigquery.dbapi import exceptions @@ -48,7 +48,7 @@ def _parameter_type(name, value, query_parameter_type=None, value_doc=""): query_parameter_type = type_parameters_re.sub("", query_parameter_type) try: parameter_type = getattr( - enums.SqlParameterScalarTypes, query_parameter_type.upper() + query.SqlParameterScalarTypes, query_parameter_type.upper() )._type except AttributeError: raise exceptions.ProgrammingError( @@ -185,7 +185,7 @@ def _parse_type( # Strip type parameters type_ = type_parameters_re.sub("", type_).strip() try: - type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) + type_ = getattr(query.SqlParameterScalarTypes, type_.upper()) except AttributeError: raise exceptions.ProgrammingError( f"The given parameter type, {type_}," diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d67cebd4c..cecdaa503 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -12,13 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re - import enum -import itertools - -from google.cloud.bigquery_v2 import types as gapic_types -from google.cloud.bigquery.query import ScalarQueryParameterType class AutoRowIDs(enum.Enum): @@ -180,56 +174,27 @@ class KeyResultStatementKind: FIRST_SELECT = "FIRST_SELECT" -_SQL_SCALAR_TYPES = frozenset( - ( - "INT64", - "BOOL", - "FLOAT64", - "STRING", - "BYTES", - "TIMESTAMP", - "DATE", - "TIME", - "DATETIME", - "INTERVAL", - "GEOGRAPHY", - "NUMERIC", - "BIGNUMERIC", - "JSON", - ) -) - -_SQL_NONSCALAR_TYPES = frozenset(("TYPE_KIND_UNSPECIFIED", "ARRAY", "STRUCT")) - - -def _make_sql_scalars_enum(): - """Create an enum based on a gapic enum containing only SQL scalar types.""" - - new_enum = enum.Enum( - "StandardSqlDataTypes", - ( - (member.name, member.value) - for member in gapic_types.StandardSqlDataType.TypeKind - if member.name in _SQL_SCALAR_TYPES - ), - ) - - # make sure the docstring for the new enum is also correct - orig_doc = gapic_types.StandardSqlDataType.TypeKind.__doc__ - skip_pattern = re.compile( - "|".join(_SQL_NONSCALAR_TYPES) - + "|because a JSON object" # the second description line of STRUCT member - ) - - new_doc = "\n".join( - itertools.filterfalse(skip_pattern.search, orig_doc.splitlines()) - ) - new_enum.__doc__ = "An Enum of scalar SQL types.\n" + new_doc - - return new_enum - - -StandardSqlDataTypes = _make_sql_scalars_enum() +class StandardSqlTypeNames(str, enum.Enum): + def _generate_next_value_(name, start, count, last_values): + return name + + TYPE_KIND_UNSPECIFIED = enum.auto() + INT64 = enum.auto() + BOOL = enum.auto() + FLOAT64 = enum.auto() + STRING = enum.auto() + BYTES = enum.auto() + TIMESTAMP = enum.auto() + DATE = enum.auto() + TIME = enum.auto() + DATETIME = enum.auto() + INTERVAL = enum.auto() + GEOGRAPHY = enum.auto() + NUMERIC = enum.auto() + BIGNUMERIC = enum.auto() + JSON = enum.auto() + ARRAY = enum.auto() + STRUCT = enum.auto() # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types @@ -256,28 +221,6 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" -class SqlParameterScalarTypes: - """Supported scalar SQL query parameter types as type objects.""" - - BOOL = ScalarQueryParameterType("BOOL") - BOOLEAN = ScalarQueryParameterType("BOOL") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - BYTES = ScalarQueryParameterType("BYTES") - DATE = ScalarQueryParameterType("DATE") - DATETIME = ScalarQueryParameterType("DATETIME") - DECIMAL = ScalarQueryParameterType("NUMERIC") - FLOAT = ScalarQueryParameterType("FLOAT64") - FLOAT64 = ScalarQueryParameterType("FLOAT64") - GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - INT64 = ScalarQueryParameterType("INT64") - INTEGER = ScalarQueryParameterType("INT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - STRING = ScalarQueryParameterType("STRING") - TIME = ScalarQueryParameterType("TIME") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - - class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 2d3f6660f..18b7b13ec 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -17,24 +17,23 @@ """Define resources for the BigQuery ML Models API.""" import copy - -from google.protobuf import json_format +import datetime +from typing import Any, Dict, Optional, Sequence, Union import google.cloud._helpers -from google.api_core import datetime_helpers from google.cloud.bigquery import _helpers -from google.cloud.bigquery_v2 import types +from google.cloud.bigquery import standard_sql from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -class Model(object): +class Model: """Model represents a machine learning model resource. See https://cloud.google.com/bigquery/docs/reference/rest/v2/models Args: - model_ref (Union[google.cloud.bigquery.model.ModelReference, str]): + model_ref: A pointer to a model. If ``model_ref`` is a string, it must included a project ID, dataset ID, and model ID, each separated by ``.``. @@ -51,11 +50,7 @@ class Model(object): "encryption_configuration": "encryptionConfiguration", } - def __init__(self, model_ref): - # Use _proto on read-only properties to use it's built-in type - # conversion. - self._proto = types.Model()._pb - + def __init__(self, model_ref: Union["ModelReference", str, None]): # Use _properties on read-write properties to match the REST API # semantics. The BigQuery API makes a distinction between an unset # value, a null value, and a default value (0 or ""), but the protocol @@ -66,140 +61,125 @@ def __init__(self, model_ref): model_ref = ModelReference.from_string(model_ref) if model_ref: - self._proto.model_reference.CopyFrom(model_ref._proto) + self._properties["modelReference"] = model_ref.to_api_repr() @property - def reference(self): - """A :class:`~google.cloud.bigquery.model.ModelReference` pointing to - this model. + def reference(self) -> Optional["ModelReference"]: + """A model reference pointing to this model. Read-only. - - Returns: - google.cloud.bigquery.model.ModelReference: pointer to this model. """ - ref = ModelReference() - ref._proto = self._proto.model_reference - return ref + resource = self._properties.get("modelReference") + if resource is not None: + return ModelReference.from_api_repr(resource) @property - def project(self): - """str: Project bound to the model""" + def project(self) -> str: + """Project bound to the model.""" return self.reference.project @property - def dataset_id(self): - """str: ID of dataset containing the model.""" + def dataset_id(self) -> str: + """ID of dataset containing the model.""" return self.reference.dataset_id @property - def model_id(self): - """str: The model ID.""" + def model_id(self) -> str: + """The model ID.""" return self.reference.model_id @property - def path(self): - """str: URL path for the model's APIs.""" + def path(self) -> str: + """URL path for the model's APIs.""" return self.reference.path @property - def location(self): - """str: The geographic location where the model resides. This value - is inherited from the dataset. + def location(self) -> str: + """The geographic location where the model resides. + + This value is inherited from the dataset. Read-only. """ - return self._proto.location + return self._properties.get("location") @property - def etag(self): - """str: ETag for the model resource (:data:`None` until - set from the server). + def etag(self) -> str: + """ETag for the model resource (:data:`None` until set from the server). Read-only. """ - return self._proto.etag + return self._properties.get("etag") @property - def created(self): - """Union[datetime.datetime, None]: Datetime at which the model was - created (:data:`None` until set from the server). + def created(self) -> Optional[datetime.datetime]: + """Datetime at which the model was created (:data:`None` until set from the server). Read-only. """ - value = self._proto.creation_time - if value is not None and value != 0: + value = self._properties.get("creationTime") + if value is not None: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) ) @property - def modified(self): - """Union[datetime.datetime, None]: Datetime at which the model was last - modified (:data:`None` until set from the server). + def modified(self) -> Optional[datetime.datetime]: + """Datetime at which the model was last modified (:data:`None` until set from the server). Read-only. """ - value = self._proto.last_modified_time - if value is not None and value != 0: + value = value = self._properties.get("lastModifiedTime") + if value is not None: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) ) @property - def model_type(self): - """google.cloud.bigquery_v2.types.Model.ModelType: Type of the - model resource. + def model_type(self) -> str: + """Type of the model resource. Read-only. - - The value is one of elements of the - :class:`~google.cloud.bigquery_v2.types.Model.ModelType` - enumeration. """ - return self._proto.model_type + return self._properties.get("modelType", "MODEL_TYPE_UNSPECIFIED") @property - def training_runs(self): - """Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]: Information - for all training runs in increasing order of start time. + def training_runs(self) -> Sequence[Dict[str, Any]]: + """Information for all training runs in increasing order of start time. - Read-only. + Dictionaries are in REST API format. See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun - An iterable of :class:`~google.cloud.bigquery_v2.types.Model.TrainingRun`. + Read-only. """ - return self._proto.training_runs + return self._properties.get("trainingRuns", []) @property - def feature_columns(self): - """Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Input - feature columns that were used to train this model. + def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]: + """Input feature columns that were used to train this model. Read-only. - - An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. """ - return self._proto.feature_columns + return self._properties.get("featureColumns", []) @property - def label_columns(self): - """Sequence[google.cloud.bigquery_v2.types.StandardSqlField]: Label - columns that were used to train this model. The output of the model - will have a ``predicted_`` prefix to these columns. + def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: + """Label columns that were used to train this model. - Read-only. + The output of the model will have a ``predicted_`` prefix to these columns. - An iterable of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. + Read-only. """ - return self._proto.label_columns + return self._properties.get("labelColumns", []) @property - def expires(self): - """Union[datetime.datetime, None]: The datetime when this model - expires. If not present, the model will persist indefinitely. Expired - models will be deleted and their storage reclaimed. + def expires(self) -> Optional[datetime.datetime]: + """The datetime when this model expires. + + If not present, the model will persist indefinitely. Expired models will be + deleted and their storage reclaimed. """ value = self._properties.get("expirationTime") if value is not None: @@ -209,55 +189,48 @@ def expires(self): ) @expires.setter - def expires(self, value): + def expires(self, value: Optional[datetime.datetime]): if value is not None: value = str(google.cloud._helpers._millis_from_datetime(value)) self._properties["expirationTime"] = value @property - def description(self): - """Optional[str]: Description of the model (defaults to - :data:`None`). - """ + def description(self) -> Optional[str]: + """Description of the model (defaults to :data:`None`).""" return self._properties.get("description") @description.setter - def description(self, value): + def description(self, value: Optional[str]): self._properties["description"] = value @property - def friendly_name(self): - """Optional[str]: Title of the table (defaults to :data:`None`). - - Raises: - ValueError: For invalid value types. - """ + def friendly_name(self) -> Optional[str]: + """Title of the table (defaults to :data:`None`).""" return self._properties.get("friendlyName") @friendly_name.setter - def friendly_name(self, value): + def friendly_name(self, value: Optional[str]): self._properties["friendlyName"] = value @property - def labels(self): - """Optional[Dict[str, str]]: Labels for the table. + def labels(self) -> Dict[str, str]: + """Labels for the table. - This method always returns a dict. To change a model's labels, - modify the dict, then call ``Client.update_model``. To delete a - label, set its value to :data:`None` before updating. + This method always returns a dict. To change a model's labels, modify the dict, + then call ``Client.update_model``. To delete a label, set its value to + :data:`None` before updating. """ return self._properties.setdefault("labels", {}) @labels.setter - def labels(self, value): + def labels(self, value: Optional[Dict[str, str]]): if value is None: value = {} self._properties["labels"] = value @property - def encryption_configuration(self): - """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom - encryption configuration for the model. + def encryption_configuration(self) -> Optional[EncryptionConfiguration]: + """Custom encryption configuration for the model. Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` if using default encryption. @@ -272,47 +245,26 @@ def encryption_configuration(self): return prop @encryption_configuration.setter - def encryption_configuration(self, value): + def encryption_configuration(self, value: Optional[EncryptionConfiguration]): api_repr = value if value: api_repr = value.to_api_repr() self._properties["encryptionConfiguration"] = api_repr @classmethod - def from_api_repr(cls, resource: dict) -> "Model": + def from_api_repr(cls, resource: Dict[str, Any]) -> "Model": """Factory: construct a model resource given its API representation Args: - resource (Dict[str, object]): + resource: Model resource representation from the API Returns: - google.cloud.bigquery.model.Model: Model parsed from ``resource``. + Model parsed from ``resource``. """ this = cls(None) - # Keep a reference to the resource as a workaround to find unknown - # field values. - this._properties = resource - - # Convert from millis-from-epoch to timestamp well-known type. - # TODO: Remove this hack once CL 238585470 hits prod. resource = copy.deepcopy(resource) - for training_run in resource.get("trainingRuns", ()): - start_time = training_run.get("startTime") - if not start_time or "-" in start_time: # Already right format? - continue - start_time = datetime_helpers.from_microseconds(1e3 * float(start_time)) - training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) - - try: - this._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) - except json_format.ParseError: - resource["modelType"] = "MODEL_TYPE_UNSPECIFIED" - this._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) + this._properties = resource return this def _build_resource(self, filter_fields): @@ -320,18 +272,18 @@ def _build_resource(self, filter_fields): return _helpers._build_resource_from_properties(self, filter_fields) def __repr__(self): - return "Model(reference={})".format(repr(self.reference)) + return f"Model(reference={self.reference!r})" - def to_api_repr(self) -> dict: + def to_api_repr(self) -> Dict[str, Any]: """Construct the API resource representation of this model. Returns: - Dict[str, object]: Model reference represented as an API resource + Model reference represented as an API resource """ - return json_format.MessageToDict(self._proto) + return copy.deepcopy(self._properties) -class ModelReference(object): +class ModelReference: """ModelReferences are pointers to models. See @@ -339,73 +291,60 @@ class ModelReference(object): """ def __init__(self): - self._proto = types.ModelReference()._pb self._properties = {} @property def project(self): """str: Project bound to the model""" - return self._proto.project_id + return self._properties.get("projectId") @property def dataset_id(self): """str: ID of dataset containing the model.""" - return self._proto.dataset_id + return self._properties.get("datasetId") @property def model_id(self): """str: The model ID.""" - return self._proto.model_id + return self._properties.get("modelId") @property - def path(self): - """str: URL path for the model's APIs.""" - return "/projects/%s/datasets/%s/models/%s" % ( - self._proto.project_id, - self._proto.dataset_id, - self._proto.model_id, - ) + def path(self) -> str: + """URL path for the model's APIs.""" + return f"/projects/{self.project}/datasets/{self.dataset_id}/models/{self.model_id}" @classmethod - def from_api_repr(cls, resource): - """Factory: construct a model reference given its API representation + def from_api_repr(cls, resource: Dict[str, Any]) -> "ModelReference": + """Factory: construct a model reference given its API representation. Args: - resource (Dict[str, object]): + resource: Model reference representation returned from the API Returns: - google.cloud.bigquery.model.ModelReference: - Model reference parsed from ``resource``. + Model reference parsed from ``resource``. """ ref = cls() - # Keep a reference to the resource as a workaround to find unknown - # field values. ref._properties = resource - ref._proto = json_format.ParseDict( - resource, types.ModelReference()._pb, ignore_unknown_fields=True - ) - return ref @classmethod def from_string( - cls, model_id: str, default_project: str = None + cls, model_id: str, default_project: Optional[str] = None ) -> "ModelReference": """Construct a model reference from model ID string. Args: - model_id (str): + model_id: A model ID in standard SQL format. If ``default_project`` is not specified, this must included a project ID, dataset ID, and model ID, each separated by ``.``. - default_project (Optional[str]): + default_project: The project ID to use when ``model_id`` does not include a project ID. Returns: - google.cloud.bigquery.model.ModelReference: - Model reference parsed from ``model_id``. + Model reference parsed from ``model_id``. Raises: ValueError: @@ -419,13 +358,13 @@ def from_string( {"projectId": proj, "datasetId": dset, "modelId": model} ) - def to_api_repr(self) -> dict: + def to_api_repr(self) -> Dict[str, Any]: """Construct the API resource representation of this model reference. Returns: - Dict[str, object]: Model reference represented as an API resource + Model reference represented as an API resource. """ - return json_format.MessageToDict(self._proto) + return copy.deepcopy(self._properties) def _key(self): """Unique key for this model. @@ -437,7 +376,7 @@ def _key(self): def __eq__(self, other): if not isinstance(other, ModelReference): return NotImplemented - return self._proto == other._proto + return self._properties == other._properties def __ne__(self, other): return not self == other diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 1f449f189..d58d46fd9 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -339,7 +339,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): type_: Name of parameter type. See :class:`google.cloud.bigquery.enums.SqlTypeNames` and - :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + :class:`google.cloud.bigquery.query.SqlParameterScalarTypes` for supported types. value: @@ -750,6 +750,28 @@ def __repr__(self): return "StructQueryParameter{}".format(self._key()) +class SqlParameterScalarTypes: + """Supported scalar SQL query parameter types as type objects.""" + + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + BYTES = ScalarQueryParameterType("BYTES") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") + FLOAT = ScalarQueryParameterType("FLOAT64") + FLOAT64 = ScalarQueryParameterType("FLOAT64") + GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") + TIME = ScalarQueryParameterType("TIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") + + class _QueryResults(object): """Results of a query. diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index a776212c3..677fb1178 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -16,12 +16,12 @@ """Define resources for the BigQuery Routines API.""" -from google.protobuf import json_format +from typing import Optional import google.cloud._helpers from google.cloud.bigquery import _helpers -import google.cloud.bigquery_v2.types -from google.cloud.bigquery_v2.types import StandardSqlTableType +from google.cloud.bigquery.standard_sql import StandardSqlDataType +from google.cloud.bigquery.standard_sql import StandardSqlTableType class RoutineType: @@ -190,7 +190,7 @@ def arguments(self, value): @property def return_type(self): - """google.cloud.bigquery_v2.types.StandardSqlDataType: Return type of + """google.cloud.bigquery.StandardSqlDataType: Return type of the routine. If absent, the return type is inferred from @@ -206,16 +206,12 @@ def return_type(self): if not resource: return resource - output = google.cloud.bigquery_v2.types.StandardSqlDataType() - raw_protobuf = json_format.ParseDict( - resource, output._pb, ignore_unknown_fields=True - ) - return type(output).wrap(raw_protobuf) + return StandardSqlDataType.from_api_repr(resource) @return_type.setter - def return_type(self, value): + def return_type(self, value: StandardSqlDataType): if value: - resource = json_format.MessageToDict(value._pb) + resource = value.to_api_repr() else: resource = None self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @@ -232,20 +228,14 @@ def return_table_type(self) -> StandardSqlTableType: if not resource: return resource - output = google.cloud.bigquery_v2.types.StandardSqlTableType() - raw_protobuf = json_format.ParseDict( - resource, output._pb, ignore_unknown_fields=True - ) - return type(output).wrap(raw_protobuf) + return StandardSqlTableType.from_api_repr(resource) @return_table_type.setter - def return_table_type(self, value): + def return_table_type(self, value: Optional[StandardSqlTableType]): if not value: resource = None else: - resource = { - "columns": [json_format.MessageToDict(col._pb) for col in value.columns] - } + resource = value.to_api_repr() self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource @@ -407,7 +397,7 @@ def mode(self, value): @property def data_type(self): - """Optional[google.cloud.bigquery_v2.types.StandardSqlDataType]: Type + """Optional[google.cloud.bigquery.StandardSqlDataType]: Type of a variable, e.g., a function argument. See: @@ -417,16 +407,12 @@ def data_type(self): if not resource: return resource - output = google.cloud.bigquery_v2.types.StandardSqlDataType() - raw_protobuf = json_format.ParseDict( - resource, output._pb, ignore_unknown_fields=True - ) - return type(output).wrap(raw_protobuf) + return StandardSqlDataType.from_api_repr(resource) @data_type.setter def data_type(self, value): if value: - resource = json_format.MessageToDict(value._pb) + resource = value.to_api_repr() else: resource = None self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 157db7ce6..b52e288f4 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -17,7 +17,8 @@ import collections from typing import Optional -from google.cloud.bigquery_v2 import types +from google.cloud.bigquery import standard_sql +from google.cloud.bigquery.enums import StandardSqlTypeNames _DEFAULT_VALUE = object() @@ -27,26 +28,26 @@ # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { - "STRING": types.StandardSqlDataType.TypeKind.STRING, - "BYTES": types.StandardSqlDataType.TypeKind.BYTES, - "INTEGER": types.StandardSqlDataType.TypeKind.INT64, - "INT64": types.StandardSqlDataType.TypeKind.INT64, - "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, - "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, - "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, - "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC, - "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, - "BOOL": types.StandardSqlDataType.TypeKind.BOOL, - "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, - "RECORD": types.StandardSqlDataType.TypeKind.STRUCT, - "STRUCT": types.StandardSqlDataType.TypeKind.STRUCT, - "TIMESTAMP": types.StandardSqlDataType.TypeKind.TIMESTAMP, - "DATE": types.StandardSqlDataType.TypeKind.DATE, - "TIME": types.StandardSqlDataType.TypeKind.TIME, - "DATETIME": types.StandardSqlDataType.TypeKind.DATETIME, + "STRING": StandardSqlTypeNames.STRING, + "BYTES": StandardSqlTypeNames.BYTES, + "INTEGER": StandardSqlTypeNames.INT64, + "INT64": StandardSqlTypeNames.INT64, + "FLOAT": StandardSqlTypeNames.FLOAT64, + "FLOAT64": StandardSqlTypeNames.FLOAT64, + "NUMERIC": StandardSqlTypeNames.NUMERIC, + "BIGNUMERIC": StandardSqlTypeNames.BIGNUMERIC, + "BOOLEAN": StandardSqlTypeNames.BOOL, + "BOOL": StandardSqlTypeNames.BOOL, + "GEOGRAPHY": StandardSqlTypeNames.GEOGRAPHY, + "RECORD": StandardSqlTypeNames.STRUCT, + "STRUCT": StandardSqlTypeNames.STRUCT, + "TIMESTAMP": StandardSqlTypeNames.TIMESTAMP, + "DATE": StandardSqlTypeNames.DATE, + "TIME": StandardSqlTypeNames.TIME, + "DATETIME": StandardSqlTypeNames.DATETIME, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } -"""String names of the legacy SQL types to integer codes of Standard SQL types.""" +"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" class SchemaField(object): @@ -285,48 +286,37 @@ def _key(self): policy_tags, ) - def to_standard_sql(self) -> types.StandardSqlField: - """Return the field as the standard SQL field representation object. - - Returns: - An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`. - """ - sql_type = types.StandardSqlDataType() + def to_standard_sql(self) -> standard_sql.StandardSqlField: + """Return the field as the standard SQL field representation object.""" + sql_type = standard_sql.StandardSqlDataType() if self.mode == "REPEATED": - sql_type.type_kind = types.StandardSqlDataType.TypeKind.ARRAY + sql_type.type_kind = StandardSqlTypeNames.ARRAY else: sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, - types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, + self.field_type, StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) - if sql_type.type_kind == types.StandardSqlDataType.TypeKind.ARRAY: # noqa: E721 + if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721 array_element_type = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, - types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, + self.field_type, StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + ) + sql_type.array_element_type = standard_sql.StandardSqlDataType( + type_kind=array_element_type ) - sql_type.array_element_type.type_kind = array_element_type # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type - if ( - array_element_type - == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 - ): - sql_type.array_element_type.struct_type.fields.extend( - field.to_standard_sql() for field in self.fields + if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721 + sql_type.array_element_type.struct_type = standard_sql.StandardSqlStructType( + fields=(field.to_standard_sql() for field in self.fields) ) - - elif ( - sql_type.type_kind - == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 - ): - sql_type.struct_type.fields.extend( - field.to_standard_sql() for field in self.fields + elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721 + sql_type.struct_type = standard_sql.StandardSqlStructType( + fields=(field.to_standard_sql() for field in self.fields) ) - return types.StandardSqlField(name=self.name, type=sql_type) + return standard_sql.StandardSqlField(name=self.name, type=sql_type) def __eq__(self, other): if not isinstance(other, SchemaField): diff --git a/google/cloud/bigquery/standard_sql.py b/google/cloud/bigquery/standard_sql.py new file mode 100644 index 000000000..479929c74 --- /dev/null +++ b/google/cloud/bigquery/standard_sql.py @@ -0,0 +1,363 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from typing import Any, Dict, Iterable, List, Optional + +from google.cloud.bigquery.enums import StandardSqlTypeNames + + +class StandardSqlDataType: + """The type of a variable, e.g., a function argument. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType + + Examples: + + .. code-block:: text + + INT64: {type_kind="INT64"} + ARRAY: {type_kind="ARRAY", array_element_type="STRING"} + STRUCT: { + type_kind="STRUCT", + struct_type={ + fields=[ + {name="x", type={type_kind="STRING"}}, + { + name="y", + type={type_kind="ARRAY", array_element_type="DATE"} + } + ] + } + } + + Args: + type_kind: + The top level type of this field. Can be any standard SQL data type, + e.g. INT64, DATE, ARRAY. + array_element_type: + The type of the array's elements, if type_kind is ARRAY. + struct_type: + The fields of this struct, in order, if type_kind is STRUCT. + """ + + def __init__( + self, + type_kind: Optional[ + StandardSqlTypeNames + ] = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + array_element_type: Optional["StandardSqlDataType"] = None, + struct_type: Optional["StandardSqlStructType"] = None, + ): + self._properties = {} + + self.type_kind = type_kind + self.array_element_type = array_element_type + self.struct_type = struct_type + + @property + def type_kind(self) -> StandardSqlTypeNames: + """The top level type of this field. + + Can be any standard SQL data type, e.g. INT64, DATE, ARRAY. + """ + kind = self._properties["typeKind"] + return StandardSqlTypeNames[kind] # pytype: disable=missing-parameter + + @type_kind.setter + def type_kind(self, value: Optional[StandardSqlTypeNames]): + if not value: + kind = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED.value + else: + kind = value.value + self._properties["typeKind"] = kind + + @property + def array_element_type(self) -> Optional["StandardSqlDataType"]: + """The type of the array's elements, if type_kind is ARRAY.""" + element_type = self._properties.get("arrayElementType") + + if element_type is None: + return None + + result = StandardSqlDataType() + result._properties = element_type # We do not use a copy on purpose. + return result + + @array_element_type.setter + def array_element_type(self, value: Optional["StandardSqlDataType"]): + element_type = None if value is None else value.to_api_repr() + + if element_type is None: + self._properties.pop("arrayElementType", None) + else: + self._properties["arrayElementType"] = element_type + + @property + def struct_type(self) -> Optional["StandardSqlStructType"]: + """The fields of this struct, in order, if type_kind is STRUCT.""" + struct_info = self._properties.get("structType") + + if struct_info is None: + return None + + result = StandardSqlStructType() + result._properties = struct_info # We do not use a copy on purpose. + return result + + @struct_type.setter + def struct_type(self, value: Optional["StandardSqlStructType"]): + struct_type = None if value is None else value.to_api_repr() + + if struct_type is None: + self._properties.pop("structType", None) + else: + self._properties["structType"] = struct_type + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL data type.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]): + """Construct an SQL data type instance given its API representation.""" + type_kind = resource.get("typeKind") + if type_kind not in StandardSqlTypeNames.__members__: + type_kind = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED + else: + # Convert string to an enum member. + type_kind = StandardSqlTypeNames[ # pytype: disable=missing-parameter + type_kind + ] + + array_element_type = None + if type_kind == StandardSqlTypeNames.ARRAY: + element_type = resource.get("arrayElementType") + if element_type: + array_element_type = cls.from_api_repr(element_type) + + struct_type = None + if type_kind == StandardSqlTypeNames.STRUCT: + struct_info = resource.get("structType") + if struct_info: + struct_type = StandardSqlStructType.from_api_repr(struct_info) + + return cls(type_kind, array_element_type, struct_type) + + def __eq__(self, other): + if not isinstance(other, StandardSqlDataType): + return NotImplemented + else: + return ( + self.type_kind == other.type_kind + and self.array_element_type == other.array_element_type + and self.struct_type == other.struct_type + ) + + __hash__ = None + + def __str__(self): + result = f"{self.__class__.__name__}(type_kind={self.type_kind!r}, ...)" + return result + + +class StandardSqlField: + """A field or a column. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlField + + Args: + name: + The name of this field. Can be absent for struct fields. + type: + The type of this parameter. Absent if not explicitly specified. + + For example, CREATE FUNCTION statement can omit the return type; in this + case the output parameter does not have this "type" field). + """ + + def __init__( + self, name: Optional[str] = None, type: Optional[StandardSqlDataType] = None + ): + if type is not None: + type = type.to_api_repr() + + self._properties = {"name": name, "type": type} + + @property + def name(self) -> Optional[str]: + """The name of this field. Can be absent for struct fields.""" + return self._properties["name"] + + @name.setter + def name(self, value: Optional[str]): + self._properties["name"] = value + + @property + def type(self) -> Optional[StandardSqlDataType]: + """The type of this parameter. Absent if not explicitly specified. + + For example, CREATE FUNCTION statement can omit the return type; in this + case the output parameter does not have this "type" field). + """ + type_info = self._properties["type"] + + if type_info is None: + return None + + result = StandardSqlDataType() + result._properties = type_info # We do not use a copy on purpose. + return result + + @type.setter + def type(self, value: Optional[StandardSqlDataType]): + if value is not None: + value = value.to_api_repr() + self._properties["type"] = value + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL field.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]): + """Construct an SQL field instance given its API representation.""" + result = cls( + name=resource.get("name"), + type=StandardSqlDataType.from_api_repr(resource.get("type", {})), + ) + return result + + def __eq__(self, other): + if not isinstance(other, StandardSqlField): + return NotImplemented + else: + return self.name == other.name and self.type == other.type + + __hash__ = None + + +class StandardSqlStructType: + """Type of a struct field. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType#StandardSqlStructType + + Args: + fields: The fields in this struct. + """ + + def __init__(self, fields: Optional[Iterable[StandardSqlField]] = None): + if fields is None: + fields = [] + self._properties = {"fields": [field.to_api_repr() for field in fields]} + + @property + def fields(self) -> List[StandardSqlField]: + """The fields in this struct.""" + result = [] + + for field_resource in self._properties.get("fields", []): + field = StandardSqlField() + field._properties = field_resource # We do not use a copy on purpose. + result.append(field) + + return result + + @fields.setter + def fields(self, value: Iterable[StandardSqlField]): + self._properties["fields"] = [field.to_api_repr() for field in value] + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL struct type.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "StandardSqlStructType": + """Construct an SQL struct type instance given its API representation.""" + fields = ( + StandardSqlField.from_api_repr(field_resource) + for field_resource in resource.get("fields", []) + ) + return cls(fields=fields) + + def __eq__(self, other): + if not isinstance(other, StandardSqlStructType): + return NotImplemented + else: + return self.fields == other.fields + + __hash__ = None + + +class StandardSqlTableType: + """A table type. + + See: + https://cloud.google.com/workflows/docs/reference/googleapis/bigquery/v2/Overview#StandardSqlTableType + + Args: + columns: The columns in this table type. + """ + + def __init__(self, columns: Iterable[StandardSqlField]): + self._properties = {"columns": [col.to_api_repr() for col in columns]} + + @property + def columns(self) -> List[StandardSqlField]: + """The columns in this table type.""" + result = [] + + for column_resource in self._properties.get("columns", []): + column = StandardSqlField() + column._properties = column_resource # We do not use a copy on purpose. + result.append(column) + + return result + + @columns.setter + def columns(self, value: Iterable[StandardSqlField]): + self._properties["columns"] = [col.to_api_repr() for col in value] + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this SQL table type.""" + return copy.deepcopy(self._properties) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "StandardSqlTableType": + """Construct an SQL table type instance given its API representation.""" + columns = [] + + for column_resource in resource.get("columns", []): + type_ = column_resource.get("type") + if type_ is None: + type_ = {} + + column = StandardSqlField( + name=column_resource.get("name"), + type=StandardSqlDataType.from_api_repr(type_), + ) + columns.append(column) + + return cls(columns=columns) + + def __eq__(self, other): + if not isinstance(other, StandardSqlTableType): + return NotImplemented + else: + return self.columns == other.columns + + __hash__ = None diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py deleted file mode 100644 index f9957efa9..000000000 --- a/google/cloud/bigquery_v2/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - -from .types.encryption_config import EncryptionConfiguration -from .types.model import DeleteModelRequest -from .types.model import GetModelRequest -from .types.model import ListModelsRequest -from .types.model import ListModelsResponse -from .types.model import Model -from .types.model import PatchModelRequest -from .types.model_reference import ModelReference -from .types.standard_sql import StandardSqlDataType -from .types.standard_sql import StandardSqlField -from .types.standard_sql import StandardSqlStructType -from .types.standard_sql import StandardSqlTableType -from .types.table_reference import TableReference - -__all__ = ( - "DeleteModelRequest", - "EncryptionConfiguration", - "GetModelRequest", - "ListModelsRequest", - "ListModelsResponse", - "Model", - "ModelReference", - "PatchModelRequest", - "StandardSqlDataType", - "StandardSqlField", - "StandardSqlStructType", - "StandardSqlTableType", - "TableReference", -) diff --git a/google/cloud/bigquery_v2/gapic_metadata.json b/google/cloud/bigquery_v2/gapic_metadata.json deleted file mode 100644 index 3251a2630..000000000 --- a/google/cloud/bigquery_v2/gapic_metadata.json +++ /dev/null @@ -1,63 +0,0 @@ - { - "comment": "This file maps proto services/RPCs to the corresponding library clients/methods", - "language": "python", - "libraryPackage": "google.cloud.bigquery_v2", - "protoPackage": "google.cloud.bigquery.v2", - "schema": "1.0", - "services": { - "ModelService": { - "clients": { - "grpc": { - "libraryClient": "ModelServiceClient", - "rpcs": { - "DeleteModel": { - "methods": [ - "delete_model" - ] - }, - "GetModel": { - "methods": [ - "get_model" - ] - }, - "ListModels": { - "methods": [ - "list_models" - ] - }, - "PatchModel": { - "methods": [ - "patch_model" - ] - } - } - }, - "grpc-async": { - "libraryClient": "ModelServiceAsyncClient", - "rpcs": { - "DeleteModel": { - "methods": [ - "delete_model" - ] - }, - "GetModel": { - "methods": [ - "get_model" - ] - }, - "ListModels": { - "methods": [ - "list_models" - ] - }, - "PatchModel": { - "methods": [ - "patch_model" - ] - } - } - } - } - } - } -} diff --git a/google/cloud/bigquery_v2/py.typed b/google/cloud/bigquery_v2/py.typed deleted file mode 100644 index e73777993..000000000 --- a/google/cloud/bigquery_v2/py.typed +++ /dev/null @@ -1,2 +0,0 @@ -# Marker file for PEP 561. -# The google-cloud-bigquery package uses inline types. diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py deleted file mode 100644 index 83bbb3a54..000000000 --- a/google/cloud/bigquery_v2/types/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from .encryption_config import EncryptionConfiguration -from .model import ( - DeleteModelRequest, - GetModelRequest, - ListModelsRequest, - ListModelsResponse, - Model, - PatchModelRequest, -) -from .model_reference import ModelReference -from .standard_sql import ( - StandardSqlDataType, - StandardSqlField, - StandardSqlStructType, - StandardSqlTableType, -) -from .table_reference import TableReference - -__all__ = ( - "EncryptionConfiguration", - "DeleteModelRequest", - "GetModelRequest", - "ListModelsRequest", - "ListModelsResponse", - "Model", - "PatchModelRequest", - "ModelReference", - "StandardSqlDataType", - "StandardSqlField", - "StandardSqlStructType", - "StandardSqlTableType", - "TableReference", -) diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py deleted file mode 100644 index 4b9139733..000000000 --- a/google/cloud/bigquery_v2/types/encryption_config.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import proto # type: ignore - -from google.protobuf import wrappers_pb2 # type: ignore - - -__protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"EncryptionConfiguration",}, -) - - -class EncryptionConfiguration(proto.Message): - r""" - Attributes: - kms_key_name (google.protobuf.wrappers_pb2.StringValue): - Optional. Describes the Cloud KMS encryption - key that will be used to protect destination - BigQuery table. The BigQuery Service Account - associated with your project requires access to - this encryption key. - """ - - kms_key_name = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.StringValue, - ) - - -__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py deleted file mode 100644 index 706418401..000000000 --- a/google/cloud/bigquery_v2/types/model.py +++ /dev/null @@ -1,1507 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import proto # type: ignore - -from google.cloud.bigquery_v2.types import encryption_config -from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference -from google.cloud.bigquery_v2.types import standard_sql -from google.cloud.bigquery_v2.types import table_reference -from google.protobuf import timestamp_pb2 # type: ignore -from google.protobuf import wrappers_pb2 # type: ignore - - -__protobuf__ = proto.module( - package="google.cloud.bigquery.v2", - manifest={ - "Model", - "GetModelRequest", - "PatchModelRequest", - "DeleteModelRequest", - "ListModelsRequest", - "ListModelsResponse", - }, -) - - -class Model(proto.Message): - r""" - Attributes: - etag (str): - Output only. A hash of this resource. - model_reference (google.cloud.bigquery_v2.types.ModelReference): - Required. Unique identifier for this model. - creation_time (int): - Output only. The time when this model was - created, in millisecs since the epoch. - last_modified_time (int): - Output only. The time when this model was - last modified, in millisecs since the epoch. - description (str): - Optional. A user-friendly description of this - model. - friendly_name (str): - Optional. A descriptive name for this model. - labels (Sequence[google.cloud.bigquery_v2.types.Model.LabelsEntry]): - The labels associated with this model. You - can use these to organize and group your models. - Label keys and values can be no longer than 63 - characters, can only contain lowercase letters, - numeric characters, underscores and dashes. - International characters are allowed. Label - values are optional. Label keys must start with - a letter and each label in the list must have a - different key. - expiration_time (int): - Optional. The time when this model expires, - in milliseconds since the epoch. If not present, - the model will persist indefinitely. Expired - models will be deleted and their storage - reclaimed. The defaultTableExpirationMs - property of the encapsulating dataset can be - used to set a default expirationTime on newly - created models. - location (str): - Output only. The geographic location where - the model resides. This value is inherited from - the dataset. - encryption_configuration (google.cloud.bigquery_v2.types.EncryptionConfiguration): - Custom encryption configuration (e.g., Cloud - KMS keys). This shows the encryption - configuration of the model data while stored in - BigQuery storage. This field can be used with - PatchModel to update encryption key for an - already encrypted model. - model_type (google.cloud.bigquery_v2.types.Model.ModelType): - Output only. Type of the model resource. - training_runs (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]): - Output only. Information for all training runs in increasing - order of start_time. - feature_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): - Output only. Input feature columns that were - used to train this model. - label_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): - Output only. Label columns that were used to train this - model. The output of the model will have a `predicted_` - prefix to these columns. - best_trial_id (int): - The best trial_id across all training runs. - """ - - class ModelType(proto.Enum): - r"""Indicates the type of the Model.""" - MODEL_TYPE_UNSPECIFIED = 0 - LINEAR_REGRESSION = 1 - LOGISTIC_REGRESSION = 2 - KMEANS = 3 - MATRIX_FACTORIZATION = 4 - DNN_CLASSIFIER = 5 - TENSORFLOW = 6 - DNN_REGRESSOR = 7 - BOOSTED_TREE_REGRESSOR = 9 - BOOSTED_TREE_CLASSIFIER = 10 - ARIMA = 11 - AUTOML_REGRESSOR = 12 - AUTOML_CLASSIFIER = 13 - ARIMA_PLUS = 19 - - class LossType(proto.Enum): - r"""Loss metric to evaluate model training performance.""" - LOSS_TYPE_UNSPECIFIED = 0 - MEAN_SQUARED_LOSS = 1 - MEAN_LOG_LOSS = 2 - - class DistanceType(proto.Enum): - r"""Distance metric used to compute the distance between two - points. - """ - DISTANCE_TYPE_UNSPECIFIED = 0 - EUCLIDEAN = 1 - COSINE = 2 - - class DataSplitMethod(proto.Enum): - r"""Indicates the method to split input data into multiple - tables. - """ - DATA_SPLIT_METHOD_UNSPECIFIED = 0 - RANDOM = 1 - CUSTOM = 2 - SEQUENTIAL = 3 - NO_SPLIT = 4 - AUTO_SPLIT = 5 - - class DataFrequency(proto.Enum): - r"""Type of supported data frequency for time series forecasting - models. - """ - DATA_FREQUENCY_UNSPECIFIED = 0 - AUTO_FREQUENCY = 1 - YEARLY = 2 - QUARTERLY = 3 - MONTHLY = 4 - WEEKLY = 5 - DAILY = 6 - HOURLY = 7 - PER_MINUTE = 8 - - class HolidayRegion(proto.Enum): - r"""Type of supported holiday regions for time series forecasting - models. - """ - HOLIDAY_REGION_UNSPECIFIED = 0 - GLOBAL = 1 - NA = 2 - JAPAC = 3 - EMEA = 4 - LAC = 5 - AE = 6 - AR = 7 - AT = 8 - AU = 9 - BE = 10 - BR = 11 - CA = 12 - CH = 13 - CL = 14 - CN = 15 - CO = 16 - CS = 17 - CZ = 18 - DE = 19 - DK = 20 - DZ = 21 - EC = 22 - EE = 23 - EG = 24 - ES = 25 - FI = 26 - FR = 27 - GB = 28 - GR = 29 - HK = 30 - HU = 31 - ID = 32 - IE = 33 - IL = 34 - IN = 35 - IR = 36 - IT = 37 - JP = 38 - KR = 39 - LV = 40 - MA = 41 - MX = 42 - MY = 43 - NG = 44 - NL = 45 - NO = 46 - NZ = 47 - PE = 48 - PH = 49 - PK = 50 - PL = 51 - PT = 52 - RO = 53 - RS = 54 - RU = 55 - SA = 56 - SE = 57 - SG = 58 - SI = 59 - SK = 60 - TH = 61 - TR = 62 - TW = 63 - UA = 64 - US = 65 - VE = 66 - VN = 67 - ZA = 68 - - class LearnRateStrategy(proto.Enum): - r"""Indicates the learning rate optimization strategy to use.""" - LEARN_RATE_STRATEGY_UNSPECIFIED = 0 - LINE_SEARCH = 1 - CONSTANT = 2 - - class OptimizationStrategy(proto.Enum): - r"""Indicates the optimization strategy used for training.""" - OPTIMIZATION_STRATEGY_UNSPECIFIED = 0 - BATCH_GRADIENT_DESCENT = 1 - NORMAL_EQUATION = 2 - - class FeedbackType(proto.Enum): - r"""Indicates the training algorithm to use for matrix - factorization models. - """ - FEEDBACK_TYPE_UNSPECIFIED = 0 - IMPLICIT = 1 - EXPLICIT = 2 - - class SeasonalPeriod(proto.Message): - r""" """ - - class SeasonalPeriodType(proto.Enum): - r"""""" - SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0 - NO_SEASONALITY = 1 - DAILY = 2 - WEEKLY = 3 - MONTHLY = 4 - QUARTERLY = 5 - YEARLY = 6 - - class KmeansEnums(proto.Message): - r""" """ - - class KmeansInitializationMethod(proto.Enum): - r"""Indicates the method used to initialize the centroids for - KMeans clustering algorithm. - """ - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 - RANDOM = 1 - CUSTOM = 2 - KMEANS_PLUS_PLUS = 3 - - class RegressionMetrics(proto.Message): - r"""Evaluation metrics for regression and explicit feedback type - matrix factorization models. - - Attributes: - mean_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): - Mean absolute error. - mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): - Mean squared error. - mean_squared_log_error (google.protobuf.wrappers_pb2.DoubleValue): - Mean squared log error. - median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): - Median absolute error. - r_squared (google.protobuf.wrappers_pb2.DoubleValue): - R^2 score. This corresponds to r2_score in ML.EVALUATE. - """ - - mean_absolute_error = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, - ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, - ) - mean_squared_log_error = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, - ) - median_absolute_error = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, - ) - r_squared = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, - ) - - class AggregateClassificationMetrics(proto.Message): - r"""Aggregate metrics for classification/classifier models. For - multi-class models, the metrics are either macro-averaged or - micro-averaged. When macro-averaged, the metrics are calculated - for each label and then an unweighted average is taken of those - values. When micro-averaged, the metric is calculated globally - by counting the total number of correctly predicted rows. - - Attributes: - precision (google.protobuf.wrappers_pb2.DoubleValue): - Precision is the fraction of actual positive - predictions that had positive actual labels. For - multiclass this is a macro-averaged metric - treating each class as a binary classifier. - recall (google.protobuf.wrappers_pb2.DoubleValue): - Recall is the fraction of actual positive - labels that were given a positive prediction. - For multiclass this is a macro-averaged metric. - accuracy (google.protobuf.wrappers_pb2.DoubleValue): - Accuracy is the fraction of predictions given - the correct label. For multiclass this is a - micro-averaged metric. - threshold (google.protobuf.wrappers_pb2.DoubleValue): - Threshold at which the metrics are computed. - For binary classification models this is the - positive class threshold. For multi-class - classfication models this is the confidence - threshold. - f1_score (google.protobuf.wrappers_pb2.DoubleValue): - The F1 score is an average of recall and - precision. For multiclass this is a macro- - averaged metric. - log_loss (google.protobuf.wrappers_pb2.DoubleValue): - Logarithmic Loss. For multiclass this is a - macro-averaged metric. - roc_auc (google.protobuf.wrappers_pb2.DoubleValue): - Area Under a ROC Curve. For multiclass this - is a macro-averaged metric. - """ - - precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, - ) - recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,) - accuracy = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, - ) - threshold = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, - ) - f1_score = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, - ) - log_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, - ) - roc_auc = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, - ) - - class BinaryClassificationMetrics(proto.Message): - r"""Evaluation metrics for binary classification/classifier - models. - - Attributes: - aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): - Aggregate classification metrics. - binary_confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): - Binary confusion matrix at multiple - thresholds. - positive_label (str): - Label representing the positive class. - negative_label (str): - Label representing the negative class. - """ - - class BinaryConfusionMatrix(proto.Message): - r"""Confusion matrix for binary classification models. - Attributes: - positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): - Threshold value used when computing each of - the following metric. - true_positives (google.protobuf.wrappers_pb2.Int64Value): - Number of true samples predicted as true. - false_positives (google.protobuf.wrappers_pb2.Int64Value): - Number of false samples predicted as true. - true_negatives (google.protobuf.wrappers_pb2.Int64Value): - Number of true samples predicted as false. - false_negatives (google.protobuf.wrappers_pb2.Int64Value): - Number of false samples predicted as false. - precision (google.protobuf.wrappers_pb2.DoubleValue): - The fraction of actual positive predictions - that had positive actual labels. - recall (google.protobuf.wrappers_pb2.DoubleValue): - The fraction of actual positive labels that - were given a positive prediction. - f1_score (google.protobuf.wrappers_pb2.DoubleValue): - The equally weighted average of recall and - precision. - accuracy (google.protobuf.wrappers_pb2.DoubleValue): - The fraction of predictions given the correct - label. - """ - - positive_class_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, - ) - true_positives = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, - ) - false_positives = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, - ) - true_negatives = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, - ) - false_negatives = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value, - ) - precision = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, - ) - recall = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, - ) - f1_score = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue, - ) - accuracy = proto.Field( - proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue, - ) - - aggregate_classification_metrics = proto.Field( - proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", - ) - binary_confusion_matrix_list = proto.RepeatedField( - proto.MESSAGE, - number=2, - message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", - ) - positive_label = proto.Field(proto.STRING, number=3,) - negative_label = proto.Field(proto.STRING, number=4,) - - class MultiClassClassificationMetrics(proto.Message): - r"""Evaluation metrics for multi-class classification/classifier - models. - - Attributes: - aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): - Aggregate classification metrics. - confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix]): - Confusion matrix at different thresholds. - """ - - class ConfusionMatrix(proto.Message): - r"""Confusion matrix for multi-class classification models. - Attributes: - confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): - Confidence threshold used when computing the - entries of the confusion matrix. - rows (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): - One row per actual label. - """ - - class Entry(proto.Message): - r"""A single entry in the confusion matrix. - Attributes: - predicted_label (str): - The predicted label. For confidence_threshold > 0, we will - also add an entry indicating the number of items under the - confidence threshold. - item_count (google.protobuf.wrappers_pb2.Int64Value): - Number of items being predicted as this - label. - """ - - predicted_label = proto.Field(proto.STRING, number=1,) - item_count = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, - ) - - class Row(proto.Message): - r"""A single row in the confusion matrix. - Attributes: - actual_label (str): - The original label of this row. - entries (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): - Info describing predicted label distribution. - """ - - actual_label = proto.Field(proto.STRING, number=1,) - entries = proto.RepeatedField( - proto.MESSAGE, - number=2, - message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", - ) - - confidence_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, - ) - rows = proto.RepeatedField( - proto.MESSAGE, - number=2, - message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", - ) - - aggregate_classification_metrics = proto.Field( - proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", - ) - confusion_matrix_list = proto.RepeatedField( - proto.MESSAGE, - number=2, - message="Model.MultiClassClassificationMetrics.ConfusionMatrix", - ) - - class ClusteringMetrics(proto.Message): - r"""Evaluation metrics for clustering models. - Attributes: - davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): - Davies-Bouldin index. - mean_squared_distance (google.protobuf.wrappers_pb2.DoubleValue): - Mean of squared distances between each sample - to its cluster centroid. - clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): - Information for all clusters. - """ - - class Cluster(proto.Message): - r"""Message containing the information about one cluster. - Attributes: - centroid_id (int): - Centroid id. - feature_values (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue]): - Values of highly variant features for this - cluster. - count (google.protobuf.wrappers_pb2.Int64Value): - Count of training data rows that were - assigned to this cluster. - """ - - class FeatureValue(proto.Message): - r"""Representative value of a single feature within the cluster. - Attributes: - feature_column (str): - The feature column name. - numerical_value (google.protobuf.wrappers_pb2.DoubleValue): - The numerical feature value. This is the - centroid value for this feature. - categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): - The categorical feature value. - """ - - class CategoricalValue(proto.Message): - r"""Representative value of a categorical feature. - Attributes: - category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): - Counts of all categories for the categorical feature. If - there are more than ten categories, we return top ten (by - count) and return one more CategoryCount with category - "*OTHER*" and count as aggregate counts of remaining - categories. - """ - - class CategoryCount(proto.Message): - r"""Represents the count of a single category within the cluster. - Attributes: - category (str): - The name of category. - count (google.protobuf.wrappers_pb2.Int64Value): - The count of training samples matching the - category within the cluster. - """ - - category = proto.Field(proto.STRING, number=1,) - count = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, - ) - - category_counts = proto.RepeatedField( - proto.MESSAGE, - number=1, - message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", - ) - - feature_column = proto.Field(proto.STRING, number=1,) - numerical_value = proto.Field( - proto.MESSAGE, - number=2, - oneof="value", - message=wrappers_pb2.DoubleValue, - ) - categorical_value = proto.Field( - proto.MESSAGE, - number=3, - oneof="value", - message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", - ) - - centroid_id = proto.Field(proto.INT64, number=1,) - feature_values = proto.RepeatedField( - proto.MESSAGE, - number=2, - message="Model.ClusteringMetrics.Cluster.FeatureValue", - ) - count = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, - ) - - davies_bouldin_index = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, - ) - mean_squared_distance = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, - ) - clusters = proto.RepeatedField( - proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", - ) - - class RankingMetrics(proto.Message): - r"""Evaluation metrics used by weighted-ALS models specified by - feedback_type=implicit. - - Attributes: - mean_average_precision (google.protobuf.wrappers_pb2.DoubleValue): - Calculates a precision per user for all the - items by ranking them and then averages all the - precisions across all the users. - mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): - Similar to the mean squared error computed in - regression and explicit recommendation models - except instead of computing the rating directly, - the output from evaluate is computed against a - preference which is 1 or 0 depending on if the - rating exists or not. - normalized_discounted_cumulative_gain (google.protobuf.wrappers_pb2.DoubleValue): - A metric to determine the goodness of a - ranking calculated from the predicted confidence - by comparing it to an ideal rank measured by the - original ratings. - average_rank (google.protobuf.wrappers_pb2.DoubleValue): - Determines the goodness of a ranking by - computing the percentile rank from the predicted - confidence and dividing it by the original rank. - """ - - mean_average_precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, - ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, - ) - normalized_discounted_cumulative_gain = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, - ) - average_rank = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, - ) - - class ArimaForecastingMetrics(proto.Message): - r"""Model evaluation metrics for ARIMA forecasting models. - Attributes: - non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): - Non-seasonal order. - arima_fitting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics]): - Arima model fitting metrics. - seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): - Seasonal periods. Repeated because multiple - periods are supported for one time series. - has_drift (Sequence[bool]): - Whether Arima model fitted with drift or not. - It is always false when d is not 1. - time_series_id (Sequence[str]): - Id to differentiate different time series for - the large-scale case. - arima_single_model_forecasting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): - Repeated as there can be many metric sets - (one for each model) in auto-arima and the - large-scale case. - """ - - class ArimaSingleModelForecastingMetrics(proto.Message): - r"""Model evaluation metrics for a single ARIMA forecasting - model. - - Attributes: - non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): - Non-seasonal order. - arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): - Arima fitting metrics. - has_drift (bool): - Is arima model fitted with drift or not. It - is always false when d is not 1. - time_series_id (str): - The time_series_id value for this time series. It will be - one of the unique values from the time_series_id_column - specified during ARIMA model training. Only present when - time_series_id_column training option was used. - time_series_ids (Sequence[str]): - The tuple of time_series_ids identifying this time series. - It will be one of the unique tuples of values present in the - time_series_id_columns specified during ARIMA model - training. Only present when time_series_id_columns training - option was used and the order of values here are same as the - order of time_series_id_columns. - seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): - Seasonal periods. Repeated because multiple - periods are supported for one time series. - has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): - If true, holiday_effect is a part of time series - decomposition result. - has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): - If true, spikes_and_dips is a part of time series - decomposition result. - has_step_changes (google.protobuf.wrappers_pb2.BoolValue): - If true, step_changes is a part of time series decomposition - result. - """ - - non_seasonal_order = proto.Field( - proto.MESSAGE, number=1, message="Model.ArimaOrder", - ) - arima_fitting_metrics = proto.Field( - proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", - ) - has_drift = proto.Field(proto.BOOL, number=3,) - time_series_id = proto.Field(proto.STRING, number=4,) - time_series_ids = proto.RepeatedField(proto.STRING, number=9,) - seasonal_periods = proto.RepeatedField( - proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", - ) - has_holiday_effect = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, - ) - has_spikes_and_dips = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, - ) - has_step_changes = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, - ) - - non_seasonal_order = proto.RepeatedField( - proto.MESSAGE, number=1, message="Model.ArimaOrder", - ) - arima_fitting_metrics = proto.RepeatedField( - proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", - ) - seasonal_periods = proto.RepeatedField( - proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", - ) - has_drift = proto.RepeatedField(proto.BOOL, number=4,) - time_series_id = proto.RepeatedField(proto.STRING, number=5,) - arima_single_model_forecasting_metrics = proto.RepeatedField( - proto.MESSAGE, - number=6, - message="Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics", - ) - - class EvaluationMetrics(proto.Message): - r"""Evaluation metrics of a model. These are either computed on - all training data or just the eval data based on whether eval - data was used during training. These are not present for - imported models. - - Attributes: - regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics): - Populated for regression models and explicit - feedback type matrix factorization models. - binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics): - Populated for binary - classification/classifier models. - multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics): - Populated for multi-class - classification/classifier models. - clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics): - Populated for clustering models. - ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics): - Populated for implicit feedback type matrix - factorization models. - arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics): - Populated for ARIMA models. - """ - - regression_metrics = proto.Field( - proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", - ) - binary_classification_metrics = proto.Field( - proto.MESSAGE, - number=2, - oneof="metrics", - message="Model.BinaryClassificationMetrics", - ) - multi_class_classification_metrics = proto.Field( - proto.MESSAGE, - number=3, - oneof="metrics", - message="Model.MultiClassClassificationMetrics", - ) - clustering_metrics = proto.Field( - proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", - ) - ranking_metrics = proto.Field( - proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", - ) - arima_forecasting_metrics = proto.Field( - proto.MESSAGE, - number=6, - oneof="metrics", - message="Model.ArimaForecastingMetrics", - ) - - class DataSplitResult(proto.Message): - r"""Data split result. This contains references to the training - and evaluation data tables that were used to train the model. - - Attributes: - training_table (google.cloud.bigquery_v2.types.TableReference): - Table reference of the training data after - split. - evaluation_table (google.cloud.bigquery_v2.types.TableReference): - Table reference of the evaluation data after - split. - """ - - training_table = proto.Field( - proto.MESSAGE, number=1, message=table_reference.TableReference, - ) - evaluation_table = proto.Field( - proto.MESSAGE, number=2, message=table_reference.TableReference, - ) - - class ArimaOrder(proto.Message): - r"""Arima order, can be used for both non-seasonal and seasonal - parts. - - Attributes: - p (int): - Order of the autoregressive part. - d (int): - Order of the differencing part. - q (int): - Order of the moving-average part. - """ - - p = proto.Field(proto.INT64, number=1,) - d = proto.Field(proto.INT64, number=2,) - q = proto.Field(proto.INT64, number=3,) - - class ArimaFittingMetrics(proto.Message): - r"""ARIMA model fitting metrics. - Attributes: - log_likelihood (float): - Log-likelihood. - aic (float): - AIC. - variance (float): - Variance. - """ - - log_likelihood = proto.Field(proto.DOUBLE, number=1,) - aic = proto.Field(proto.DOUBLE, number=2,) - variance = proto.Field(proto.DOUBLE, number=3,) - - class GlobalExplanation(proto.Message): - r"""Global explanations containing the top most important - features after training. - - Attributes: - explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation.Explanation]): - A list of the top global explanations. Sorted - by absolute value of attribution in descending - order. - class_label (str): - Class label for this set of global - explanations. Will be empty/null for binary - logistic and linear regression models. Sorted - alphabetically in descending order. - """ - - class Explanation(proto.Message): - r"""Explanation for a single feature. - Attributes: - feature_name (str): - Full name of the feature. For non-numerical features, will - be formatted like .. - Overall size of feature name will always be truncated to - first 120 characters. - attribution (google.protobuf.wrappers_pb2.DoubleValue): - Attribution of feature. - """ - - feature_name = proto.Field(proto.STRING, number=1,) - attribution = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, - ) - - explanations = proto.RepeatedField( - proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", - ) - class_label = proto.Field(proto.STRING, number=2,) - - class TrainingRun(proto.Message): - r"""Information about a single training query run for the model. - Attributes: - training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): - Options that were used for this training run, - includes user specified and default options that - were used. - start_time (google.protobuf.timestamp_pb2.Timestamp): - The start time of this training run. - results (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult]): - Output of each iteration run, results.size() <= - max_iterations. - evaluation_metrics (google.cloud.bigquery_v2.types.Model.EvaluationMetrics): - The evaluation metrics over training/eval - data that were computed at the end of training. - data_split_result (google.cloud.bigquery_v2.types.Model.DataSplitResult): - Data split result of the training run. Only - set when the input data is actually split. - global_explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation]): - Global explanations for important features of - the model. For multi-class models, there is one - entry for each label class. For other models, - there is only one entry in the list. - """ - - class TrainingOptions(proto.Message): - r"""Options used in model training. - Attributes: - max_iterations (int): - The maximum number of iterations in training. - Used only for iterative training algorithms. - loss_type (google.cloud.bigquery_v2.types.Model.LossType): - Type of loss function used during training - run. - learn_rate (float): - Learning rate in training. Used only for - iterative training algorithms. - l1_regularization (google.protobuf.wrappers_pb2.DoubleValue): - L1 regularization coefficient. - l2_regularization (google.protobuf.wrappers_pb2.DoubleValue): - L2 regularization coefficient. - min_relative_progress (google.protobuf.wrappers_pb2.DoubleValue): - When early_stop is true, stops training when accuracy - improvement is less than 'min_relative_progress'. Used only - for iterative training algorithms. - warm_start (google.protobuf.wrappers_pb2.BoolValue): - Whether to train a model from the last - checkpoint. - early_stop (google.protobuf.wrappers_pb2.BoolValue): - Whether to stop early when the loss doesn't improve - significantly any more (compared to min_relative_progress). - Used only for iterative training algorithms. - input_label_columns (Sequence[str]): - Name of input label columns in training data. - data_split_method (google.cloud.bigquery_v2.types.Model.DataSplitMethod): - The data split type for training and - evaluation, e.g. RANDOM. - data_split_eval_fraction (float): - The fraction of evaluation data over the - whole input data. The rest of data will be used - as training data. The format should be double. - Accurate to two decimal places. - Default value is 0.2. - data_split_column (str): - The column to split data with. This column won't be used as - a feature. - - 1. When data_split_method is CUSTOM, the corresponding - column should be boolean. The rows with true value tag - are eval data, and the false are training data. - 2. When data_split_method is SEQ, the first - DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) - in the corresponding column are used as training data, - and the rest are eval data. It respects the order in - Orderable data types: - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties - learn_rate_strategy (google.cloud.bigquery_v2.types.Model.LearnRateStrategy): - The strategy to determine learn rate for the - current iteration. - initial_learn_rate (float): - Specifies the initial learning rate for the - line search learn rate strategy. - label_class_weights (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): - Weights associated with each label class, for - rebalancing the training data. Only applicable - for classification models. - user_column (str): - User column specified for matrix - factorization models. - item_column (str): - Item column specified for matrix - factorization models. - distance_type (google.cloud.bigquery_v2.types.Model.DistanceType): - Distance type for clustering models. - num_clusters (int): - Number of clusters for clustering models. - model_uri (str): - Google Cloud Storage URI from which the model - was imported. Only applicable for imported - models. - optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): - Optimization strategy for training linear - regression models. - hidden_units (Sequence[int]): - Hidden units for dnn models. - batch_size (int): - Batch size for dnn models. - dropout (google.protobuf.wrappers_pb2.DoubleValue): - Dropout probability for dnn models. - max_tree_depth (int): - Maximum depth of a tree for boosted tree - models. - subsample (float): - Subsample fraction of the training data to - grow tree to prevent overfitting for boosted - tree models. - min_split_loss (google.protobuf.wrappers_pb2.DoubleValue): - Minimum split loss for boosted tree models. - num_factors (int): - Num factors specified for matrix - factorization models. - feedback_type (google.cloud.bigquery_v2.types.Model.FeedbackType): - Feedback type that specifies which algorithm - to run for matrix factorization. - wals_alpha (google.protobuf.wrappers_pb2.DoubleValue): - Hyperparameter for matrix factoration when - implicit feedback type is specified. - kmeans_initialization_method (google.cloud.bigquery_v2.types.Model.KmeansEnums.KmeansInitializationMethod): - The method used to initialize the centroids - for kmeans algorithm. - kmeans_initialization_column (str): - The column used to provide the initial centroids for kmeans - algorithm when kmeans_initialization_method is CUSTOM. - time_series_timestamp_column (str): - Column to be designated as time series - timestamp for ARIMA model. - time_series_data_column (str): - Column to be designated as time series data - for ARIMA model. - auto_arima (bool): - Whether to enable auto ARIMA or not. - non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): - A specification of the non-seasonal part of - the ARIMA model: the three components (p, d, q) - are the AR order, the degree of differencing, - and the MA order. - data_frequency (google.cloud.bigquery_v2.types.Model.DataFrequency): - The data frequency of a time series. - include_drift (bool): - Include drift when fitting an ARIMA model. - holiday_region (google.cloud.bigquery_v2.types.Model.HolidayRegion): - The geographical region based on which the - holidays are considered in time series modeling. - If a valid value is specified, then holiday - effects modeling is enabled. - time_series_id_column (str): - The time series id column that was used - during ARIMA model training. - time_series_id_columns (Sequence[str]): - The time series id columns that were used - during ARIMA model training. - horizon (int): - The number of periods ahead that need to be - forecasted. - preserve_input_structs (bool): - Whether to preserve the input structs in output feature - names. Suppose there is a struct A with field b. When false - (default), the output feature name is A_b. When true, the - output feature name is A.b. - auto_arima_max_order (int): - The max value of non-seasonal p and q. - decompose_time_series (google.protobuf.wrappers_pb2.BoolValue): - If true, perform decompose time series and - save the results. - clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): - If true, clean spikes and dips in the input - time series. - adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue): - If true, detect step changes and make data - adjustment in the input time series. - """ - - max_iterations = proto.Field(proto.INT64, number=1,) - loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) - learn_rate = proto.Field(proto.DOUBLE, number=3,) - l1_regularization = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, - ) - l2_regularization = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, - ) - min_relative_progress = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, - ) - warm_start = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, - ) - early_stop = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, - ) - input_label_columns = proto.RepeatedField(proto.STRING, number=9,) - data_split_method = proto.Field( - proto.ENUM, number=10, enum="Model.DataSplitMethod", - ) - data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,) - data_split_column = proto.Field(proto.STRING, number=12,) - learn_rate_strategy = proto.Field( - proto.ENUM, number=13, enum="Model.LearnRateStrategy", - ) - initial_learn_rate = proto.Field(proto.DOUBLE, number=16,) - label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,) - user_column = proto.Field(proto.STRING, number=18,) - item_column = proto.Field(proto.STRING, number=19,) - distance_type = proto.Field( - proto.ENUM, number=20, enum="Model.DistanceType", - ) - num_clusters = proto.Field(proto.INT64, number=21,) - model_uri = proto.Field(proto.STRING, number=22,) - optimization_strategy = proto.Field( - proto.ENUM, number=23, enum="Model.OptimizationStrategy", - ) - hidden_units = proto.RepeatedField(proto.INT64, number=24,) - batch_size = proto.Field(proto.INT64, number=25,) - dropout = proto.Field( - proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue, - ) - max_tree_depth = proto.Field(proto.INT64, number=27,) - subsample = proto.Field(proto.DOUBLE, number=28,) - min_split_loss = proto.Field( - proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue, - ) - num_factors = proto.Field(proto.INT64, number=30,) - feedback_type = proto.Field( - proto.ENUM, number=31, enum="Model.FeedbackType", - ) - wals_alpha = proto.Field( - proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue, - ) - kmeans_initialization_method = proto.Field( - proto.ENUM, - number=33, - enum="Model.KmeansEnums.KmeansInitializationMethod", - ) - kmeans_initialization_column = proto.Field(proto.STRING, number=34,) - time_series_timestamp_column = proto.Field(proto.STRING, number=35,) - time_series_data_column = proto.Field(proto.STRING, number=36,) - auto_arima = proto.Field(proto.BOOL, number=37,) - non_seasonal_order = proto.Field( - proto.MESSAGE, number=38, message="Model.ArimaOrder", - ) - data_frequency = proto.Field( - proto.ENUM, number=39, enum="Model.DataFrequency", - ) - include_drift = proto.Field(proto.BOOL, number=41,) - holiday_region = proto.Field( - proto.ENUM, number=42, enum="Model.HolidayRegion", - ) - time_series_id_column = proto.Field(proto.STRING, number=43,) - time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) - horizon = proto.Field(proto.INT64, number=44,) - preserve_input_structs = proto.Field(proto.BOOL, number=45,) - auto_arima_max_order = proto.Field(proto.INT64, number=46,) - decompose_time_series = proto.Field( - proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, - ) - clean_spikes_and_dips = proto.Field( - proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, - ) - adjust_step_changes = proto.Field( - proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, - ) - - class IterationResult(proto.Message): - r"""Information about a single iteration of the training run. - Attributes: - index (google.protobuf.wrappers_pb2.Int32Value): - Index of the iteration, 0 based. - duration_ms (google.protobuf.wrappers_pb2.Int64Value): - Time taken to run the iteration in - milliseconds. - training_loss (google.protobuf.wrappers_pb2.DoubleValue): - Loss computed on the training data at the end - of iteration. - eval_loss (google.protobuf.wrappers_pb2.DoubleValue): - Loss computed on the eval data at the end of - iteration. - learn_rate (float): - Learn rate used for this iteration. - cluster_infos (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ClusterInfo]): - Information about top clusters for clustering - models. - arima_result (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult): - - """ - - class ClusterInfo(proto.Message): - r"""Information about a single cluster for clustering model. - Attributes: - centroid_id (int): - Centroid id. - cluster_radius (google.protobuf.wrappers_pb2.DoubleValue): - Cluster radius, the average distance from - centroid to each point assigned to the cluster. - cluster_size (google.protobuf.wrappers_pb2.Int64Value): - Cluster size, the total number of points - assigned to the cluster. - """ - - centroid_id = proto.Field(proto.INT64, number=1,) - cluster_radius = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, - ) - cluster_size = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, - ) - - class ArimaResult(proto.Message): - r"""(Auto-)arima fitting result. Wrap everything in ArimaResult - for easier refactoring if we want to use model-specific - iteration results. - - Attributes: - arima_model_info (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): - This message is repeated because there are - multiple arima models fitted in auto-arima. For - non-auto-arima model, its size is one. - seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): - Seasonal periods. Repeated because multiple - periods are supported for one time series. - """ - - class ArimaCoefficients(proto.Message): - r"""Arima coefficients. - Attributes: - auto_regressive_coefficients (Sequence[float]): - Auto-regressive coefficients, an array of - double. - moving_average_coefficients (Sequence[float]): - Moving-average coefficients, an array of - double. - intercept_coefficient (float): - Intercept coefficient, just a double not an - array. - """ - - auto_regressive_coefficients = proto.RepeatedField( - proto.DOUBLE, number=1, - ) - moving_average_coefficients = proto.RepeatedField( - proto.DOUBLE, number=2, - ) - intercept_coefficient = proto.Field(proto.DOUBLE, number=3,) - - class ArimaModelInfo(proto.Message): - r"""Arima model information. - Attributes: - non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): - Non-seasonal order. - arima_coefficients (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): - Arima coefficients. - arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): - Arima fitting metrics. - has_drift (bool): - Whether Arima model fitted with drift or not. - It is always false when d is not 1. - time_series_id (str): - The time_series_id value for this time series. It will be - one of the unique values from the time_series_id_column - specified during ARIMA model training. Only present when - time_series_id_column training option was used. - time_series_ids (Sequence[str]): - The tuple of time_series_ids identifying this time series. - It will be one of the unique tuples of values present in the - time_series_id_columns specified during ARIMA model - training. Only present when time_series_id_columns training - option was used and the order of values here are same as the - order of time_series_id_columns. - seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): - Seasonal periods. Repeated because multiple - periods are supported for one time series. - has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): - If true, holiday_effect is a part of time series - decomposition result. - has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): - If true, spikes_and_dips is a part of time series - decomposition result. - has_step_changes (google.protobuf.wrappers_pb2.BoolValue): - If true, step_changes is a part of time series decomposition - result. - """ - - non_seasonal_order = proto.Field( - proto.MESSAGE, number=1, message="Model.ArimaOrder", - ) - arima_coefficients = proto.Field( - proto.MESSAGE, - number=2, - message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", - ) - arima_fitting_metrics = proto.Field( - proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", - ) - has_drift = proto.Field(proto.BOOL, number=4,) - time_series_id = proto.Field(proto.STRING, number=5,) - time_series_ids = proto.RepeatedField(proto.STRING, number=10,) - seasonal_periods = proto.RepeatedField( - proto.ENUM, - number=6, - enum="Model.SeasonalPeriod.SeasonalPeriodType", - ) - has_holiday_effect = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, - ) - has_spikes_and_dips = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, - ) - has_step_changes = proto.Field( - proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, - ) - - arima_model_info = proto.RepeatedField( - proto.MESSAGE, - number=1, - message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo", - ) - seasonal_periods = proto.RepeatedField( - proto.ENUM, - number=2, - enum="Model.SeasonalPeriod.SeasonalPeriodType", - ) - - index = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value, - ) - duration_ms = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, - ) - training_loss = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, - ) - eval_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, - ) - learn_rate = proto.Field(proto.DOUBLE, number=7,) - cluster_infos = proto.RepeatedField( - proto.MESSAGE, - number=8, - message="Model.TrainingRun.IterationResult.ClusterInfo", - ) - arima_result = proto.Field( - proto.MESSAGE, - number=9, - message="Model.TrainingRun.IterationResult.ArimaResult", - ) - - training_options = proto.Field( - proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", - ) - start_time = proto.Field( - proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp, - ) - results = proto.RepeatedField( - proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", - ) - evaluation_metrics = proto.Field( - proto.MESSAGE, number=7, message="Model.EvaluationMetrics", - ) - data_split_result = proto.Field( - proto.MESSAGE, number=9, message="Model.DataSplitResult", - ) - global_explanations = proto.RepeatedField( - proto.MESSAGE, number=10, message="Model.GlobalExplanation", - ) - - etag = proto.Field(proto.STRING, number=1,) - model_reference = proto.Field( - proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, - ) - creation_time = proto.Field(proto.INT64, number=5,) - last_modified_time = proto.Field(proto.INT64, number=6,) - description = proto.Field(proto.STRING, number=12,) - friendly_name = proto.Field(proto.STRING, number=14,) - labels = proto.MapField(proto.STRING, proto.STRING, number=15,) - expiration_time = proto.Field(proto.INT64, number=16,) - location = proto.Field(proto.STRING, number=13,) - encryption_configuration = proto.Field( - proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, - ) - model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) - training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) - feature_columns = proto.RepeatedField( - proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, - ) - label_columns = proto.RepeatedField( - proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, - ) - best_trial_id = proto.Field(proto.INT64, number=19,) - - -class GetModelRequest(proto.Message): - r""" - Attributes: - project_id (str): - Required. Project ID of the requested model. - dataset_id (str): - Required. Dataset ID of the requested model. - model_id (str): - Required. Model ID of the requested model. - """ - - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) - - -class PatchModelRequest(proto.Message): - r""" - Attributes: - project_id (str): - Required. Project ID of the model to patch. - dataset_id (str): - Required. Dataset ID of the model to patch. - model_id (str): - Required. Model ID of the model to patch. - model (google.cloud.bigquery_v2.types.Model): - Required. Patched model. - Follows RFC5789 patch semantics. Missing fields - are not updated. To clear a field, explicitly - set to default value. - """ - - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) - model = proto.Field(proto.MESSAGE, number=4, message="Model",) - - -class DeleteModelRequest(proto.Message): - r""" - Attributes: - project_id (str): - Required. Project ID of the model to delete. - dataset_id (str): - Required. Dataset ID of the model to delete. - model_id (str): - Required. Model ID of the model to delete. - """ - - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) - - -class ListModelsRequest(proto.Message): - r""" - Attributes: - project_id (str): - Required. Project ID of the models to list. - dataset_id (str): - Required. Dataset ID of the models to list. - max_results (google.protobuf.wrappers_pb2.UInt32Value): - The maximum number of results to return in a - single response page. Leverage the page tokens - to iterate through the entire collection. - page_token (str): - Page token, returned by a previous call to - request the next page of results - """ - - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - max_results = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value, - ) - page_token = proto.Field(proto.STRING, number=4,) - - -class ListModelsResponse(proto.Message): - r""" - Attributes: - models (Sequence[google.cloud.bigquery_v2.types.Model]): - Models in the requested dataset. Only the following fields - are populated: model_reference, model_type, creation_time, - last_modified_time and labels. - next_page_token (str): - A token to request the next page of results. - """ - - @property - def raw_page(self): - return self - - models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) - next_page_token = proto.Field(proto.STRING, number=2,) - - -__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py deleted file mode 100644 index a9ebad613..000000000 --- a/google/cloud/bigquery_v2/types/model_reference.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import proto # type: ignore - - -__protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"ModelReference",}, -) - - -class ModelReference(proto.Message): - r"""Id path of a model. - Attributes: - project_id (str): - Required. The ID of the project containing - this model. - dataset_id (str): - Required. The ID of the dataset containing - this model. - model_id (str): - Required. The ID of the model. The ID must contain only - letters (a-z, A-Z), numbers (0-9), or underscores (_). The - maximum length is 1,024 characters. - """ - - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) - - -__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py deleted file mode 100644 index 7a845fc48..000000000 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ /dev/null @@ -1,117 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import proto # type: ignore - - -__protobuf__ = proto.module( - package="google.cloud.bigquery.v2", - manifest={ - "StandardSqlDataType", - "StandardSqlField", - "StandardSqlStructType", - "StandardSqlTableType", - }, -) - - -class StandardSqlDataType(proto.Message): - r"""The type of a variable, e.g., a function argument. Examples: INT64: - {type_kind="INT64"} ARRAY: {type_kind="ARRAY", - array_element_type="STRING"} STRUCT: - {type_kind="STRUCT", struct_type={fields=[ {name="x", - type={type_kind="STRING"}}, {name="y", type={type_kind="ARRAY", - array_element_type="DATE"}} ]}} - - Attributes: - type_kind (google.cloud.bigquery_v2.types.StandardSqlDataType.TypeKind): - Required. The top level type of this field. - Can be any standard SQL data type (e.g., - "INT64", "DATE", "ARRAY"). - array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType): - The type of the array's elements, if type_kind = "ARRAY". - struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType): - The fields of this struct, in order, if type_kind = - "STRUCT". - """ - - class TypeKind(proto.Enum): - r"""""" - TYPE_KIND_UNSPECIFIED = 0 - INT64 = 2 - BOOL = 5 - FLOAT64 = 7 - STRING = 8 - BYTES = 9 - TIMESTAMP = 19 - DATE = 10 - TIME = 20 - DATETIME = 21 - INTERVAL = 26 - GEOGRAPHY = 22 - NUMERIC = 23 - BIGNUMERIC = 24 - JSON = 25 - ARRAY = 16 - STRUCT = 17 - - type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) - array_element_type = proto.Field( - proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", - ) - struct_type = proto.Field( - proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", - ) - - -class StandardSqlField(proto.Message): - r"""A field or a column. - Attributes: - name (str): - Optional. The name of this field. Can be - absent for struct fields. - type (google.cloud.bigquery_v2.types.StandardSqlDataType): - Optional. The type of this parameter. Absent - if not explicitly specified (e.g., CREATE - FUNCTION statement can omit the return type; in - this case the output parameter does not have - this "type" field). - """ - - name = proto.Field(proto.STRING, number=1,) - type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) - - -class StandardSqlStructType(proto.Message): - r""" - Attributes: - fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): - - """ - - fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) - - -class StandardSqlTableType(proto.Message): - r"""A table type - Attributes: - columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): - The columns in this table type - """ - - columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) - - -__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py deleted file mode 100644 index d56e5b09f..000000000 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import proto # type: ignore - - -__protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"TableReference",}, -) - - -class TableReference(proto.Message): - r""" - Attributes: - project_id (str): - Required. The ID of the project containing - this table. - dataset_id (str): - Required. The ID of the dataset containing - this table. - table_id (str): - Required. The ID of the table. The ID must contain only - letters (a-z, A-Z), numbers (0-9), or underscores (_). The - maximum length is 1,024 characters. Certain operations allow - suffixing of the table ID with a partition decorator, such - as ``sample_table$20190123``. - project_id_alternative (Sequence[str]): - The alternative field that will be used when ESF is not able - to translate the received data to the project_id field. - dataset_id_alternative (Sequence[str]): - The alternative field that will be used when ESF is not able - to translate the received data to the project_id field. - table_id_alternative (Sequence[str]): - The alternative field that will be used when ESF is not able - to translate the received data to the project_id field. - """ - - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - table_id = proto.Field(proto.STRING, number=3,) - project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) - dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) - table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) - - -__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/owlbot.py b/owlbot.py index 09aa8ca6f..be493957e 100644 --- a/owlbot.py +++ b/owlbot.py @@ -21,74 +21,6 @@ common = gcp.CommonTemplates() -default_version = "v2" - -for library in s.get_staging_dirs(default_version): - # Do not expose ModelServiceClient and ModelServiceAsyncClient, as there - # is no public API endpoint for the models service. - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"from \.services\.model_service import ModelServiceClient", - "", - ) - - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"from \.services\.model_service import ModelServiceAsyncClient", - "", - ) - - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"""["']ModelServiceClient["'],""", - "", - ) - - s.replace( - library / f"google/cloud/bigquery_{library.name}/__init__.py", - r"""["']ModelServiceAsyncClient["'],""", - "", - ) - - # Adjust Model docstring so that Sphinx does not think that "predicted_" is - # a reference to something, issuing a false warning. - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/model.py", - r'will have a "predicted_"', - "will have a `predicted_`", - ) - - # Avoid breaking change due to change in field renames. - # https://github.com/googleapis/python-bigquery/issues/319 - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", - r"type_ ", - "type ", - ) - - s.move( - library, - excludes=[ - "*.tar.gz", - ".coveragerc", - "docs/index.rst", - f"docs/bigquery_{library.name}/*_service.rst", - f"docs/bigquery_{library.name}/services.rst", - "README.rst", - "noxfile.py", - "setup.py", - f"scripts/fixup_bigquery_{library.name}_keywords.py", - "google/cloud/bigquery/__init__.py", - "google/cloud/bigquery/py.typed", - # There are no public API endpoints for the generated ModelServiceClient, - # thus there's no point in generating it and its tests. - f"google/cloud/bigquery_{library.name}/services/**", - f"tests/unit/gapic/bigquery_{library.name}/**", - ], - ) - -s.remove_staging_dirs() - # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- @@ -120,6 +52,10 @@ ], ) +# Remove unneeded intersphinx links, the library does not use any proto-generated code. +s.replace("docs/conf.py", r'\s+"(proto-plus|protobuf)":.*$', "") + + # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- @@ -132,13 +68,6 @@ '{"members": True, "inherited-members": True}', ) -# Tell Sphinx to ingore autogenerated docs files. -s.replace( - "docs/conf.py", - r'"samples/snippets/README\.rst",', - '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', -) - # ---------------------------------------------------------------------------- # pytype-related changes # ---------------------------------------------------------------------------- @@ -159,7 +88,6 @@ google/cloud/ exclude = tests/ - google/cloud/bigquery_v2/ output = .pytype/ disable = # There's some issue with finding some pyi files, thus disabling. @@ -169,52 +97,4 @@ ), ) - -# Remove the replacements below once -# https://github.com/googleapis/synthtool/pull/1188 is merged - -# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files -s.replace( - ".kokoro/*.sh", - "repo-automation-bots/tree/master", - "repo-automation-bots/tree/main", -) - -# Customize CONTRIBUTING.rst to replace master with main -s.replace( - "CONTRIBUTING.rst", - "fetch and merge changes from upstream into master", - "fetch and merge changes from upstream into main", -) - -s.replace( - "CONTRIBUTING.rst", "git merge upstream/master", "git merge upstream/main", -) - -s.replace( - "CONTRIBUTING.rst", - """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""", - """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""", -) - -s.replace( - "CONTRIBUTING.rst", r"remote \(``master``\)", "remote (``main``)", -) - -s.replace( - "CONTRIBUTING.rst", "blob/master/CONTRIBUTING.rst", "blob/main/CONTRIBUTING.rst", -) - -s.replace( - "CONTRIBUTING.rst", "blob/master/noxfile.py", "blob/main/noxfile.py", -) - -s.replace( - "docs/conf.py", "master_doc", "root_doc", -) - -s.replace( - "docs/conf.py", "# The master toctree document.", "# The root toctree document.", -) - s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/samples/create_routine.py b/samples/create_routine.py index 1cb4a80b4..b8746905d 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -17,7 +17,6 @@ def create_routine(routine_id): # [START bigquery_create_routine] from google.cloud import bigquery - from google.cloud import bigquery_v2 # Construct a BigQuery client object. client = bigquery.Client() @@ -33,8 +32,8 @@ def create_routine(routine_id): arguments=[ bigquery.RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ], diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index 0fdacaaec..4764a571f 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -20,7 +20,6 @@ import pytest from google.cloud import bigquery -from google.cloud import bigquery_v2 @pytest.fixture(scope="session", autouse=True) @@ -125,8 +124,8 @@ def routine_id(client, dataset_id): routine.arguments = [ bigquery.RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] diff --git a/samples/tests/test_routine_samples.py b/samples/tests/test_routine_samples.py index c1b0bb5a7..b457c464a 100644 --- a/samples/tests/test_routine_samples.py +++ b/samples/tests/test_routine_samples.py @@ -13,7 +13,6 @@ # limitations under the License. from google.cloud import bigquery -from google.cloud import bigquery_v2 def test_create_routine(capsys, random_routine_id): @@ -37,22 +36,22 @@ def test_create_routine_ddl(capsys, random_routine_id, client): expected_arguments = [ bigquery.RoutineArgument( name="arr", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, - array_element_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRUCT, - struct_type=bigquery_v2.types.StandardSqlStructType( + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.ARRAY, + array_element_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=bigquery.StandardSqlStructType( fields=[ - bigquery_v2.types.StandardSqlField( + bigquery.StandardSqlField( name="name", - type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRING + type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRING ), ), - bigquery_v2.types.StandardSqlField( + bigquery.StandardSqlField( name="val", - type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ), ] @@ -82,7 +81,7 @@ def test_get_routine(capsys, routine_id): assert "Type: 'SCALAR_FUNCTION'" in out assert "Language: 'SQL'" in out assert "Name: 'x'" in out - assert "Type: 'type_kind: INT64\n'" in out + assert "type_kind=" in out def test_delete_routine(capsys, routine_id): diff --git a/setup.cfg b/setup.cfg index 8eefc4435..28b7b0f26 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,6 @@ inputs = google/cloud/ exclude = tests/ - google/cloud/bigquery_v2/ output = .pytype/ disable = # There's some issue with finding some pyi files, thus disabling. diff --git a/setup.py b/setup.py index f1464e77a..130d8f49c 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,6 @@ # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.29.0, <3.0.0dev", - "proto-plus >= 1.10.0", "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed @@ -42,7 +41,6 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", - "protobuf >= 3.12.0", "pyarrow >= 3.0.0, < 6.0dev", "requests >= 2.18.0, < 3.0.0dev", ] diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 6e27172b2..620f23ca2 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -15,8 +15,6 @@ opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 pandas==1.0.0 -proto-plus==1.10.0 -protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 Shapely==1.6.0 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 4b9868f10..120e10f45 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -51,7 +51,6 @@ from google.api_core.exceptions import TooManyRequests from google.api_core.iam import Policy from google.cloud import bigquery -from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -2181,8 +2180,8 @@ def test_insert_rows_nested_nested_dictionary(self): def test_create_routine(self): routine_name = "test_routine" dataset = self.temp_dataset(_make_dataset_id("create_routine")) - float64_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.FLOAT64 + float64_type = bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.FLOAT64 ) routine = bigquery.Routine( dataset.routine(routine_name), @@ -2196,8 +2195,8 @@ def test_create_routine(self): routine.arguments = [ bigquery.RoutineArgument( name="arr", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, + data_type=bigquery.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.ARRAY, array_element_type=float64_type, ), ) @@ -2216,14 +2215,19 @@ def test_create_routine(self): assert rows[0].max_value == 100.0 def test_create_tvf_routine(self): - from google.cloud.bigquery import Routine, RoutineArgument, RoutineType + from google.cloud.bigquery import ( + Routine, + RoutineArgument, + RoutineType, + StandardSqlTypeNames, + ) - StandardSqlDataType = bigquery_v2.types.StandardSqlDataType - StandardSqlField = bigquery_v2.types.StandardSqlField - StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + StandardSqlDataType = bigquery.StandardSqlDataType + StandardSqlField = bigquery.StandardSqlField + StandardSqlTableType = bigquery.StandardSqlTableType - INT64 = StandardSqlDataType.TypeKind.INT64 - STRING = StandardSqlDataType.TypeKind.STRING + INT64 = StandardSqlTypeNames.INT64 + STRING = StandardSqlTypeNames.STRING client = Config.CLIENT diff --git a/tests/unit/enums/__init__.py b/tests/unit/enums/__init__.py deleted file mode 100644 index c5cce0430..000000000 --- a/tests/unit/enums/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2019, Google LLC All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/tests/unit/enums/test_standard_sql_data_types.py b/tests/unit/enums/test_standard_sql_data_types.py deleted file mode 100644 index 7f62c46fd..000000000 --- a/tests/unit/enums/test_standard_sql_data_types.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - - -@pytest.fixture -def module_under_test(): - from google.cloud.bigquery import enums - - return enums - - -@pytest.fixture -def enum_under_test(): - from google.cloud.bigquery.enums import StandardSqlDataTypes - - return StandardSqlDataTypes - - -@pytest.fixture -def gapic_enum(): - """The referential autogenerated enum the enum under test is based on.""" - from google.cloud.bigquery_v2.types import StandardSqlDataType - - return StandardSqlDataType.TypeKind - - -def test_all_gapic_enum_members_are_known(module_under_test, gapic_enum): - gapic_names = set(type_.name for type_ in gapic_enum) - anticipated_names = ( - module_under_test._SQL_SCALAR_TYPES | module_under_test._SQL_NONSCALAR_TYPES - ) - assert not (gapic_names - anticipated_names) # no unhandled names - - -def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): - # check the presence of a few typical SQL types - for name in ("INT64", "FLOAT64", "DATE", "BOOL", "GEOGRAPHY"): - assert name in enum_under_test.__members__ - - # the enum members must match those in the original gapic enum - for member in enum_under_test: - assert member.name in gapic_enum.__members__ - assert member.value == gapic_enum[member.name].value - - # check a few members that should *not* be copied over from the gapic enum - for name in ("STRUCT", "ARRAY"): - assert name in gapic_enum.__members__ - assert name not in enum_under_test.__members__ - - -@pytest.mark.skip(reason="Code generator issue, the docstring is not generated.") -def test_standard_sql_types_enum_docstring( - enum_under_test, gapic_enum -): # pragma: NO COVER - assert "STRUCT (int):" not in enum_under_test.__doc__ - assert "BOOL (int):" in enum_under_test.__doc__ - assert "TIME (int):" in enum_under_test.__doc__ - - # All lines in the docstring should actually come from the original docstring, - # except for the header. - assert "An Enum of scalar SQL types." in enum_under_test.__doc__ - doc_lines = enum_under_test.__doc__.splitlines() - assert set(doc_lines[1:]) <= set(gapic_enum.__doc__.splitlines()) diff --git a/tests/unit/gapic/__init__.py b/tests/unit/gapic/__init__.py deleted file mode 100644 index 4de65971c..000000000 --- a/tests/unit/gapic/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 8f0bf58d5..c5f9b77c1 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -19,7 +19,6 @@ import pytest import google.cloud._helpers -from google.cloud.bigquery_v2 import types KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @@ -79,7 +78,7 @@ def test_from_api_repr(target_class): "description": "A friendly description.", "friendlyName": "A friendly name.", "modelType": "LOGISTIC_REGRESSION", - "labels": {"greeting": u"こんにちは"}, + "labels": {"greeting": "こんにちは"}, "trainingRuns": [ { "trainingOptions": {"initialLearnRate": 1.0}, @@ -95,9 +94,9 @@ def test_from_api_repr(target_class): }, { "trainingOptions": {"initialLearnRate": 0.25}, - # Allow milliseconds since epoch format. - # TODO: Remove this hack once CL 238585470 hits prod. - "startTime": str(google.cloud._helpers._millis(expiration_time)), + "startTime": str( + google.cloud._helpers._datetime_to_rfc3339(expiration_time) + ), }, ], "featureColumns": [], @@ -115,30 +114,24 @@ def test_from_api_repr(target_class): assert got.created == creation_time assert got.modified == modified_time assert got.expires == expiration_time - assert got.description == u"A friendly description." - assert got.friendly_name == u"A friendly name." - assert got.model_type == types.Model.ModelType.LOGISTIC_REGRESSION - assert got.labels == {"greeting": u"こんにちは"} + assert got.description == "A friendly description." + assert got.friendly_name == "A friendly name." + assert got.model_type == "LOGISTIC_REGRESSION" + assert got.labels == {"greeting": "こんにちは"} assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME - assert got.training_runs[0].training_options.initial_learn_rate == 1.0 + assert got.training_runs[0]["trainingOptions"]["initialLearnRate"] == 1.0 assert ( - got.training_runs[0] - .start_time.ToDatetime() - .replace(tzinfo=google.cloud._helpers.UTC) + google.cloud._helpers._rfc3339_to_datetime(got.training_runs[0]["startTime"]) == creation_time ) - assert got.training_runs[1].training_options.initial_learn_rate == 0.5 + assert got.training_runs[1]["trainingOptions"]["initialLearnRate"] == 0.5 assert ( - got.training_runs[1] - .start_time.ToDatetime() - .replace(tzinfo=google.cloud._helpers.UTC) + google.cloud._helpers._rfc3339_to_datetime(got.training_runs[1]["startTime"]) == modified_time ) - assert got.training_runs[2].training_options.initial_learn_rate == 0.25 + assert got.training_runs[2]["trainingOptions"]["initialLearnRate"] == 0.25 assert ( - got.training_runs[2] - .start_time.ToDatetime() - .replace(tzinfo=google.cloud._helpers.UTC) + google.cloud._helpers._rfc3339_to_datetime(got.training_runs[2]["startTime"]) == expiration_time ) @@ -155,14 +148,14 @@ def test_from_api_repr_w_minimal_resource(target_class): } got = target_class.from_api_repr(resource) assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") - assert got.location == "" - assert got.etag == "" + assert got.location is None + assert got.etag is None assert got.created is None assert got.modified is None assert got.expires is None assert got.description is None assert got.friendly_name is None - assert got.model_type == types.Model.ModelType.MODEL_TYPE_UNSPECIFIED + assert got.model_type == "MODEL_TYPE_UNSPECIFIED" assert got.labels == {} assert got.encryption_configuration is None assert len(got.training_runs) == 0 @@ -183,7 +176,7 @@ def test_from_api_repr_w_unknown_fields(target_class): } got = target_class.from_api_repr(resource) assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") - assert got._properties is resource + assert got._properties == resource def test_from_api_repr_w_unknown_type(target_class): @@ -195,12 +188,19 @@ def test_from_api_repr_w_unknown_type(target_class): "datasetId": "my_dataset", "modelId": "my_model", }, - "modelType": "BE_A_GOOD_ROLE_MODEL", + "modelType": "BE_A_GOOD_ROLE_MODEL", # This model type does not exist. } got = target_class.from_api_repr(resource) assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") - assert got.model_type == 0 - assert got._properties is resource + assert got.model_type == "BE_A_GOOD_ROLE_MODEL" # No checks for invalid types. + assert got._properties == resource + + +def test_from_api_repr_w_missing_reference(target_class): + resource = {} + got = target_class.from_api_repr(resource) + assert got.reference is None + assert got._properties == resource @pytest.mark.parametrize( @@ -338,8 +338,6 @@ def test_repr(target_class): def test_to_api_repr(target_class): - from google.protobuf import json_format - model = target_class("my-proj.my_dset.my_model") resource = { "etag": "abcdefg", @@ -374,8 +372,6 @@ def test_to_api_repr(target_class): "kmsKeyName": "projects/1/locations/us/keyRings/1/cryptoKeys/1" }, } - model._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) + model._properties = resource got = model.to_api_repr() assert got == resource diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index fdaf13324..80a3def73 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -19,7 +19,6 @@ import google.cloud._helpers from google.cloud import bigquery -from google.cloud import bigquery_v2 @pytest.fixture @@ -62,15 +61,15 @@ def test_ctor_w_properties(target_class): arguments = [ RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] body = "x * 3" language = "SQL" - return_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + return_type = bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) type_ = "SCALAR_FUNCTION" description = "A routine description." @@ -146,15 +145,15 @@ def test_from_api_repr(target_class): assert actual_routine.arguments == [ RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] assert actual_routine.body == "42" assert actual_routine.language == "SQL" - assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + assert actual_routine.return_type == bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) assert actual_routine.return_table_type is None assert actual_routine.type_ == "SCALAR_FUNCTION" @@ -168,9 +167,9 @@ def test_from_api_repr_tvf_function(target_class): from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType - StandardSqlDataType = bigquery_v2.types.StandardSqlDataType - StandardSqlField = bigquery_v2.types.StandardSqlField - StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + StandardSqlDataType = bigquery.standard_sql.StandardSqlDataType + StandardSqlField = bigquery.standard_sql.StandardSqlField + StandardSqlTableType = bigquery.standard_sql.StandardSqlTableType creation_time = datetime.datetime( 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC @@ -216,7 +215,9 @@ def test_from_api_repr_tvf_function(target_class): assert actual_routine.arguments == [ RoutineArgument( name="a", - data_type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + data_type=StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 + ), ) ] assert actual_routine.body == "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a" @@ -226,7 +227,7 @@ def test_from_api_repr_tvf_function(target_class): columns=[ StandardSqlField( name="int_col", - type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + type=StandardSqlDataType(type_kind=bigquery.StandardSqlTypeNames.INT64), ) ] ) @@ -460,19 +461,21 @@ def test_set_return_table_type_w_none(object_under_test): def test_set_return_table_type_w_not_none(object_under_test): - StandardSqlDataType = bigquery_v2.types.StandardSqlDataType - StandardSqlField = bigquery_v2.types.StandardSqlField - StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + StandardSqlDataType = bigquery.standard_sql.StandardSqlDataType + StandardSqlField = bigquery.standard_sql.StandardSqlField + StandardSqlTableType = bigquery.standard_sql.StandardSqlTableType table_type = StandardSqlTableType( columns=[ StandardSqlField( name="int_col", - type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + type=StandardSqlDataType(type_kind=bigquery.StandardSqlTypeNames.INT64), ), StandardSqlField( name="str_col", - type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.STRING), + type=StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.STRING + ), ), ] ) diff --git a/tests/unit/routine/test_routine_argument.py b/tests/unit/routine/test_routine_argument.py index e3bda9539..b7f168a30 100644 --- a/tests/unit/routine/test_routine_argument.py +++ b/tests/unit/routine/test_routine_argument.py @@ -16,7 +16,7 @@ import pytest -from google.cloud import bigquery_v2 +from google.cloud import bigquery @pytest.fixture @@ -27,8 +27,8 @@ def target_class(): def test_ctor(target_class): - data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type = bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) actual_arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type @@ -50,8 +50,8 @@ def test_from_api_repr(target_class): assert actual_arg.name == "field_name" assert actual_arg.kind == "FIXED_TYPE" assert actual_arg.mode == "IN" - assert actual_arg.data_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + assert actual_arg.data_type == bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) @@ -71,8 +71,8 @@ def test_from_api_repr_w_unknown_fields(target_class): def test_eq(target_class): - data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type = bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ) arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4c6ec5b4f..9b12128c6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -49,8 +49,8 @@ import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers +from google.cloud import bigquery from google.cloud import bigquery_storage -from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.retry import DEFAULT_TIMEOUT @@ -1832,7 +1832,7 @@ def test_update_model(self): self.assertEqual(updated_model.expires, model.expires) # ETag becomes If-Match header. - model._proto.etag = "etag" + model._properties["etag"] = "etag" client.update_model(model, []) req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") @@ -1862,8 +1862,8 @@ def test_update_routine(self): routine.arguments = [ RoutineArgument( name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + data_type=bigquery.standard_sql.StandardSqlDataType( + type_kind=bigquery.StandardSqlTypeNames.INT64 ), ) ] diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 84c74eeec..b4bb9365f 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -22,7 +22,7 @@ import pytest import google.cloud._helpers -from google.cloud.bigquery import table, enums +from google.cloud.bigquery import query, table from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -338,8 +338,8 @@ def test_custom_on_closed_error_type(self): VALID_BQ_TYPES = [ - (name, getattr(enums.SqlParameterScalarTypes, name)._type) - for name in dir(enums.SqlParameterScalarTypes) + (name, getattr(query.SqlParameterScalarTypes, name)._type) + for name in dir(query.SqlParameterScalarTypes) if not name.startswith("_") ] diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 69a6772e5..71ca67616 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -432,11 +432,11 @@ def test_positional(self): self.assertEqual(param.value, 123) def test_ctor_w_scalar_query_parameter_type(self): - from google.cloud.bigquery import enums + from google.cloud.bigquery import query param = self._make_one( name="foo", - type_=enums.SqlParameterScalarTypes.BIGNUMERIC, + type_=query.SqlParameterScalarTypes.BIGNUMERIC, value=decimal.Decimal("123.456"), ) self.assertEqual(param.name, "foo") diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index d0b5ca54c..edc05494c 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery.schema import PolicyTagList import unittest @@ -28,9 +30,9 @@ def _get_target_class(): @staticmethod def _get_standard_sql_data_type_class(): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql - return types.StandardSqlDataType + return standard_sql.StandardSqlDataType def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) @@ -204,18 +206,17 @@ def test_fields_property(self): self.assertEqual(schema_field.fields, fields) def test_to_standard_sql_simple_type(self): - sql_type = self._get_standard_sql_data_type_class() examples = ( # a few legacy types - ("INTEGER", sql_type.TypeKind.INT64), - ("FLOAT", sql_type.TypeKind.FLOAT64), - ("BOOLEAN", sql_type.TypeKind.BOOL), - ("DATETIME", sql_type.TypeKind.DATETIME), + ("INTEGER", bigquery.StandardSqlTypeNames.INT64), + ("FLOAT", bigquery.StandardSqlTypeNames.FLOAT64), + ("BOOLEAN", bigquery.StandardSqlTypeNames.BOOL), + ("DATETIME", bigquery.StandardSqlTypeNames.DATETIME), # a few standard types - ("INT64", sql_type.TypeKind.INT64), - ("FLOAT64", sql_type.TypeKind.FLOAT64), - ("BOOL", sql_type.TypeKind.BOOL), - ("GEOGRAPHY", sql_type.TypeKind.GEOGRAPHY), + ("INT64", bigquery.StandardSqlTypeNames.INT64), + ("FLOAT64", bigquery.StandardSqlTypeNames.FLOAT64), + ("BOOL", bigquery.StandardSqlTypeNames.BOOL), + ("GEOGRAPHY", bigquery.StandardSqlTypeNames.GEOGRAPHY), ) for legacy_type, standard_type in examples: field = self._make_one("some_field", legacy_type) @@ -224,7 +225,7 @@ def test_to_standard_sql_simple_type(self): self.assertEqual(standard_field.type.type_kind, standard_type) def test_to_standard_sql_struct_type(self): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql # Expected result object: # @@ -258,30 +259,39 @@ def test_to_standard_sql_struct_type(self): sql_type = self._get_standard_sql_data_type_class() # level 2 fields - sub_sub_field_date = types.StandardSqlField( - name="date_field", type=sql_type(type_kind=sql_type.TypeKind.DATE) + sub_sub_field_date = standard_sql.StandardSqlField( + name="date_field", + type=sql_type(type_kind=bigquery.StandardSqlTypeNames.DATE), ) - sub_sub_field_time = types.StandardSqlField( - name="time_field", type=sql_type(type_kind=sql_type.TypeKind.TIME) + sub_sub_field_time = standard_sql.StandardSqlField( + name="time_field", + type=sql_type(type_kind=bigquery.StandardSqlTypeNames.TIME), ) # level 1 fields - sub_field_struct = types.StandardSqlField( - name="last_used", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) - ) - sub_field_struct.type.struct_type.fields.extend( - [sub_sub_field_date, sub_sub_field_time] + sub_field_struct = standard_sql.StandardSqlField( + name="last_used", + type=sql_type( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=standard_sql.StandardSqlStructType( + fields=[sub_sub_field_date, sub_sub_field_time] + ), + ), ) - sub_field_bytes = types.StandardSqlField( - name="image_content", type=sql_type(type_kind=sql_type.TypeKind.BYTES) + sub_field_bytes = standard_sql.StandardSqlField( + name="image_content", + type=sql_type(type_kind=bigquery.StandardSqlTypeNames.BYTES), ) # level 0 (top level) - expected_result = types.StandardSqlField( - name="image_usage", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) - ) - expected_result.type.struct_type.fields.extend( - [sub_field_bytes, sub_field_struct] + expected_result = standard_sql.StandardSqlField( + name="image_usage", + type=sql_type( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=standard_sql.StandardSqlStructType( + fields=[sub_field_bytes, sub_field_struct] + ), + ), ) # construct legacy SchemaField object @@ -300,14 +310,16 @@ def test_to_standard_sql_struct_type(self): self.assertEqual(standard_field, expected_result) def test_to_standard_sql_array_type_simple(self): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql sql_type = self._get_standard_sql_data_type_class() # construct expected result object - expected_sql_type = sql_type(type_kind=sql_type.TypeKind.ARRAY) - expected_sql_type.array_element_type.type_kind = sql_type.TypeKind.INT64 - expected_result = types.StandardSqlField( + expected_sql_type = sql_type( + type_kind=bigquery.StandardSqlTypeNames.ARRAY, + array_element_type=sql_type(type_kind=bigquery.StandardSqlTypeNames.INT64), + ) + expected_result = standard_sql.StandardSqlField( name="valid_numbers", type=expected_sql_type ) @@ -318,27 +330,31 @@ def test_to_standard_sql_array_type_simple(self): self.assertEqual(standard_field, expected_result) def test_to_standard_sql_array_type_struct(self): - from google.cloud.bigquery_v2 import types + from google.cloud.bigquery import standard_sql sql_type = self._get_standard_sql_data_type_class() # define person STRUCT - name_field = types.StandardSqlField( - name="name", type=sql_type(type_kind=sql_type.TypeKind.STRING) + name_field = standard_sql.StandardSqlField( + name="name", type=sql_type(type_kind=bigquery.StandardSqlTypeNames.STRING) ) - age_field = types.StandardSqlField( - name="age", type=sql_type(type_kind=sql_type.TypeKind.INT64) + age_field = standard_sql.StandardSqlField( + name="age", type=sql_type(type_kind=bigquery.StandardSqlTypeNames.INT64) ) - person_struct = types.StandardSqlField( - name="person_info", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) + person_struct = standard_sql.StandardSqlField( + name="person_info", + type=sql_type( + type_kind=bigquery.StandardSqlTypeNames.STRUCT, + struct_type=StandardSqlStructType(fields=[name_field, age_field]), + ), ) - person_struct.type.struct_type.fields.extend([name_field, age_field]) # define expected result - an ARRAY of person structs expected_sql_type = sql_type( - type_kind=sql_type.TypeKind.ARRAY, array_element_type=person_struct.type + type_kind=bigquery.StandardSqlTypeNames.ARRAY, + array_element_type=person_struct.type, ) - expected_result = types.StandardSqlField( + expected_result = standard_sql.StandardSqlField( name="known_people", type=expected_sql_type ) @@ -353,14 +369,14 @@ def test_to_standard_sql_array_type_struct(self): self.assertEqual(standard_field, expected_result) def test_to_standard_sql_unknown_type(self): - sql_type = self._get_standard_sql_data_type_class() field = self._make_one("weird_field", "TROOLEAN") standard_field = field.to_standard_sql() self.assertEqual(standard_field.name, "weird_field") self.assertEqual( - standard_field.type.type_kind, sql_type.TypeKind.TYPE_KIND_UNSPECIFIED + standard_field.type.type_kind, + bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) def test___eq___wrong_type(self): diff --git a/tests/unit/test_standard_sql_types.py b/tests/unit/test_standard_sql_types.py new file mode 100644 index 000000000..b91f877cc --- /dev/null +++ b/tests/unit/test_standard_sql_types.py @@ -0,0 +1,588 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import pytest + +from google.cloud import bigquery as bq + + +class TestStandardSqlDataType: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlDataType + + return StandardSqlDataType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_default_type_kind(self): + instance = self._make_one() + assert instance.type_kind == bq.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED + + def test_to_api_repr_no_type_set(self): + instance = self._make_one() + instance.type_kind = None + + result = instance.to_api_repr() + + assert result == {"typeKind": "TYPE_KIND_UNSPECIFIED"} + + def test_to_api_repr_scalar_type(self): + instance = self._make_one(bq.StandardSqlTypeNames.FLOAT64) + + result = instance.to_api_repr() + + assert result == {"typeKind": "FLOAT64"} + + def test_to_api_repr_array_type_element_type_missing(self): + instance = self._make_one( + bq.StandardSqlTypeNames.ARRAY, array_element_type=None + ) + + result = instance.to_api_repr() + + expected = {"typeKind": "ARRAY"} + assert result == expected + + def test_to_api_repr_array_type_w_element_type(self): + array_element_type = self._make_one(type_kind=bq.StandardSqlTypeNames.BOOL) + instance = self._make_one( + bq.StandardSqlTypeNames.ARRAY, array_element_type=array_element_type + ) + + result = instance.to_api_repr() + + expected = {"typeKind": "ARRAY", "arrayElementType": {"typeKind": "BOOL"}} + assert result == expected + + def test_to_api_repr_struct_type_field_types_missing(self): + instance = self._make_one(bq.StandardSqlTypeNames.STRUCT, struct_type=None) + + result = instance.to_api_repr() + + assert result == {"typeKind": "STRUCT"} + + def test_to_api_repr_struct_type_w_field_types(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + StandardSqlDataType = self._get_target_class() + TypeNames = bq.StandardSqlTypeNames + + person_type = StandardSqlStructType( + fields=[ + StandardSqlField("name", StandardSqlDataType(TypeNames.STRING)), + StandardSqlField("age", StandardSqlDataType(TypeNames.INT64)), + ] + ) + employee_type = StandardSqlStructType( + fields=[ + StandardSqlField("job_title", StandardSqlDataType(TypeNames.STRING)), + StandardSqlField("salary", StandardSqlDataType(TypeNames.FLOAT64)), + StandardSqlField( + "employee_info", + StandardSqlDataType( + type_kind=TypeNames.STRUCT, struct_type=person_type, + ), + ), + ] + ) + + instance = self._make_one(TypeNames.STRUCT, struct_type=employee_type) + result = instance.to_api_repr() + + expected = { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "job_title", "type": {"typeKind": "STRING"}}, + {"name": "salary", "type": {"typeKind": "FLOAT64"}}, + { + "name": "employee_info", + "type": { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "name", "type": {"typeKind": "STRING"}}, + {"name": "age", "type": {"typeKind": "INT64"}}, + ], + }, + }, + }, + ], + }, + } + assert result == expected + + def test_from_api_repr_empty_resource(self): + klass = self._get_target_class() + result = klass.from_api_repr(resource={}) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_scalar_type(self): + klass = self._get_target_class() + resource = {"typeKind": "DATE"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.DATE, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_array_type_full(self): + klass = self._get_target_class() + resource = {"typeKind": "ARRAY", "arrayElementType": {"typeKind": "BYTES"}} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.ARRAY, + array_element_type=klass(type_kind=bq.StandardSqlTypeNames.BYTES), + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_array_type_missing_element_type(self): + klass = self._get_target_class() + resource = {"typeKind": "ARRAY"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.ARRAY, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_struct_type_nested(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + klass = self._get_target_class() + TypeNames = bq.StandardSqlTypeNames + + resource = { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "job_title", "type": {"typeKind": "STRING"}}, + {"name": "salary", "type": {"typeKind": "FLOAT64"}}, + { + "name": "employee_info", + "type": { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"name": "name", "type": {"typeKind": "STRING"}}, + {"name": "age", "type": {"typeKind": "INT64"}}, + ], + }, + }, + }, + ], + }, + } + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=TypeNames.STRUCT, + struct_type=StandardSqlStructType( + fields=[ + StandardSqlField("job_title", klass(TypeNames.STRING)), + StandardSqlField("salary", klass(TypeNames.FLOAT64)), + StandardSqlField( + "employee_info", + klass( + type_kind=TypeNames.STRUCT, + struct_type=StandardSqlStructType( + fields=[ + StandardSqlField("name", klass(TypeNames.STRING)), + StandardSqlField("age", klass(TypeNames.INT64)), + ] + ), + ), + ), + ] + ), + ) + assert result == expected + + def test_from_api_repr_struct_type_missing_struct_info(self): + klass = self._get_target_class() + resource = {"typeKind": "STRUCT"} + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=bq.StandardSqlTypeNames.STRUCT, + array_element_type=None, + struct_type=None, + ) + assert result == expected + + def test_from_api_repr_struct_type_incomplete_field_info(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + klass = self._get_target_class() + TypeNames = bq.StandardSqlTypeNames + + resource = { + "typeKind": "STRUCT", + "structType": { + "fields": [ + {"type": {"typeKind": "STRING"}}, # missing name + {"name": "salary"}, # missing type + ], + }, + } + + result = klass.from_api_repr(resource=resource) + + expected = klass( + type_kind=TypeNames.STRUCT, + struct_type=StandardSqlStructType( + fields=[ + StandardSqlField(None, klass(TypeNames.STRING)), + StandardSqlField("salary", klass(TypeNames.TYPE_KIND_UNSPECIFIED)), + ] + ), + ) + assert result == expected + + def test__eq__another_type(self): + instance = self._make_one() + + class SqlTypeWannabe: + pass + + not_a_type = SqlTypeWannabe() + not_a_type._properties = instance._properties + + assert instance != not_a_type # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one() + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "type_kind": bq.StandardSqlTypeNames.GEOGRAPHY, + "array_element_type": bq.StandardSqlDataType( + type_kind=bq.StandardSqlTypeNames.INT64 + ), + "struct_type": bq.StandardSqlStructType(fields=[]), + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + @pytest.mark.parametrize( + ("attr_name", "value", "value2"), + ( + ( + "type_kind", + bq.StandardSqlTypeNames.INT64, + bq.StandardSqlTypeNames.FLOAT64, + ), + ( + "array_element_type", + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.STRING), + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ), + ( + "struct_type", + bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="foo")]), + bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="bar")]), + ), + ), + ) + def test__eq__attribute_differs(self, attr_name, value, value2): + instance = self._make_one(**{attr_name: value}) + instance2 = self._make_one(**{attr_name: value2}) + assert instance != instance2 + + def test_str(self): + instance = self._make_one(type_kind=bq.StandardSqlTypeNames.BOOL) + bool_type_repr = repr(bq.StandardSqlTypeNames.BOOL) + assert str(instance) == f"StandardSqlDataType(type_kind={bool_type_repr}, ...)" + + +class TestStandardSqlField: + # This class only contains minimum tests to cover what other tests don't + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlField + + return StandardSqlField + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_name(self): + instance = self._make_one(name="foo") + assert instance.name == "foo" + instance.name = "bar" + assert instance.name == "bar" + + def test_type_missing(self): + instance = self._make_one(type=None) + assert instance.type is None + + def test_type_set_none(self): + instance = self._make_one( + type=bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL) + ) + instance.type = None + assert instance.type is None + + def test_type_set_not_none(self): + instance = self._make_one(type=bq.StandardSqlDataType(type_kind=None)) + instance.type = bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.INT64) + assert instance.type == bq.StandardSqlDataType( + type_kind=bq.StandardSqlTypeNames.INT64 + ) + + def test__eq__another_type(self): + instance = self._make_one( + name="foo", + type=bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ) + + class FieldWannabe: + pass + + not_a_field = FieldWannabe() + not_a_field._properties = instance._properties + + assert instance != not_a_field # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one( + name="foo", + type=bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ) + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "name": "foo", + "type": bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.INT64), + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + @pytest.mark.parametrize( + ("attr_name", "value", "value2"), + ( + ("name", "foo", "bar",), + ( + "type", + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.INTERVAL), + bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.TIME), + ), + ), + ) + def test__eq__attribute_differs(self, attr_name, value, value2): + instance = self._make_one(**{attr_name: value}) + instance2 = self._make_one(**{attr_name: value2}) + assert instance != instance2 + + +class TestStandardSqlStructType: + # This class only contains minimum tests to cover what other tests don't + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlStructType + + return StandardSqlStructType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_fields(self): + instance = self._make_one(fields=[]) + assert instance.fields == [] + + new_fields = [bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + instance.fields = new_fields + assert instance.fields == new_fields + + def test__eq__another_type(self): + instance = self._make_one(fields=[bq.StandardSqlField(name="foo")]) + + class StructTypeWannabe: + pass + + not_a_type = StructTypeWannabe() + not_a_type._properties = instance._properties + + assert instance != not_a_type # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one(fields=[bq.StandardSqlField(name="foo")]) + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "fields": [bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + def test__eq__attribute_differs(self): + instance = self._make_one(fields=[bq.StandardSqlField(name="foo")]) + instance2 = self._make_one( + fields=[bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + ) + assert instance != instance2 + + +class TestStandardSqlTableType: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.standard_sql import StandardSqlTableType + + return StandardSqlTableType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_columns_shallow_copy(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + + columns = [ + StandardSqlField("foo"), + StandardSqlField("bar"), + StandardSqlField("baz"), + ] + + instance = self._make_one(columns=columns) + + assert len(instance.columns) == 3 + columns.pop() + assert len(instance.columns) == 3 # Still the same. + + def test_columns_setter(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + + columns = [StandardSqlField("foo")] + instance = self._make_one(columns=columns) + assert instance.columns == columns + + new_columns = [StandardSqlField(name="bar")] + instance.columns = new_columns + assert instance.columns == new_columns + + def test_to_api_repr_no_columns(self): + instance = self._make_one(columns=[]) + result = instance.to_api_repr() + assert result == {"columns": []} + + def test_to_api_repr_with_columns(self): + from google.cloud.bigquery.standard_sql import StandardSqlField + + columns = [StandardSqlField("foo"), StandardSqlField("bar")] + instance = self._make_one(columns=columns) + + result = instance.to_api_repr() + + expected = { + "columns": [{"name": "foo", "type": None}, {"name": "bar", "type": None}] + } + assert result == expected + + def test_from_api_repr_missing_columns(self): + resource = {} + result = self._get_target_class().from_api_repr(resource) + assert result.columns == [] + + def test_from_api_repr_with_incomplete_columns(self): + from google.cloud.bigquery.standard_sql import StandardSqlDataType + from google.cloud.bigquery.standard_sql import StandardSqlField + + resource = { + "columns": [ + {"type": {"typeKind": "BOOL"}}, # missing name + {"name": "bar"}, # missing type + ] + } + + result = self._get_target_class().from_api_repr(resource) + + assert len(result.columns) == 2 + + expected = StandardSqlField( + name=None, type=StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + ) + assert result.columns[0] == expected + + expected = StandardSqlField( + name="bar", + type=StandardSqlDataType( + type_kind=bq.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED + ), + ) + assert result.columns[1] == expected + + def test__eq__another_type(self): + instance = self._make_one(columns=[bq.StandardSqlField(name="foo")]) + + class TableTypeWannabe: + pass + + not_a_type = TableTypeWannabe() + not_a_type._properties = instance._properties + + assert instance != not_a_type # Can't fake it. + + def test__eq__delegates_comparison_to_another_type(self): + instance = self._make_one(columns=[bq.StandardSqlField(name="foo")]) + assert instance == mock.ANY + + def test__eq__similar_instance(self): + kwargs = { + "columns": [ + bq.StandardSqlField(name="foo"), + bq.StandardSqlField(name="bar"), + ] + } + instance = self._make_one(**kwargs) + instance2 = self._make_one(**kwargs) + assert instance == instance2 + + def test__eq__attribute_differs(self): + instance = self._make_one(columns=[bq.StandardSqlField(name="foo")]) + instance2 = self._make_one( + columns=[bq.StandardSqlField(name="foo"), bq.StandardSqlField(name="bar")] + ) + assert instance != instance2 From b0cbfefff28e595a2dd7f00ff22504a79d5a85f3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 30 Sep 2021 20:04:16 +0200 Subject: [PATCH 11/35] chore: sync v3 branch with main (#996) A yet another episode of the well-know series. :) --- .github/.OwlBot.lock.yaml | 4 +- .github/.OwlBot.yaml | 2 +- .kokoro/continuous/prerelease-deps-3.8.cfg | 7 + CHANGELOG.md | 27 ++ README.rst | 2 +- docs/conf.py | 2 +- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/_helpers.py | 16 +- google/cloud/bigquery/_pandas_helpers.py | 14 +- google/cloud/bigquery/job/base.py | 13 + google/cloud/bigquery/model.py | 14 + google/cloud/bigquery/retry.py | 2 +- google/cloud/bigquery/schema.py | 100 +++--- google/cloud/bigquery/table.py | 250 ++++++------- google/cloud/bigquery/version.py | 2 +- noxfile.py | 59 +++- owlbot.py | 3 +- samples/geography/requirements.txt | 20 +- samples/snippets/natality_tutorial.py | 14 +- samples/snippets/requirements.txt | 6 +- tests/system/conftest.py | 17 + tests/system/test_arrow.py | 59 ++++ tests/system/test_client.py | 17 +- tests/system/test_pandas.py | 2 +- tests/unit/conftest.py | 5 + tests/unit/job/test_base.py | 6 + tests/unit/job/test_load_config.py | 4 - tests/unit/model/test_model.py | 3 + tests/unit/test__helpers.py | 13 +- tests/unit/test__pandas_helpers.py | 22 ++ tests/unit/test_client.py | 64 ++-- tests/unit/test_external_config.py | 9 +- tests/unit/test_schema.py | 173 ++++----- tests/unit/test_table.py | 385 ++++++++++++--------- 35 files changed, 755 insertions(+), 584 deletions(-) create mode 100644 .kokoro/continuous/prerelease-deps-3.8.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index c07f148f0..2567653c0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:0ffe3bdd6c7159692df5f7744da74e5ef19966288a6bf76023e8e04e0c424d7d + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest + digest: sha256:87eee22d276554e4e52863ec9b1cb6a7245815dfae20439712bf644348215a5a diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml index 2b6451c19..e54051157 100644 --- a/.github/.OwlBot.yaml +++ b/.github/.OwlBot.yaml @@ -13,7 +13,7 @@ # limitations under the License. docker: - image: gcr.io/repo-automation-bots/owlbot-python:latest + image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest deep-remove-regex: - /owl-bot-staging diff --git a/.kokoro/continuous/prerelease-deps-3.8.cfg b/.kokoro/continuous/prerelease-deps-3.8.cfg new file mode 100644 index 000000000..3595fb43f --- /dev/null +++ b/.kokoro/continuous/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3cb6bee..d531ec477 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.27.1](https://www.github.com/googleapis/python-bigquery/compare/v2.27.0...v2.27.1) (2021-09-27) + + +### Bug Fixes + +* remove py.typed since package fails mypy check ([#988](https://www.github.com/googleapis/python-bigquery/issues/988)) ([39030f2](https://www.github.com/googleapis/python-bigquery/commit/39030f26ce081cfacd456b84694c68e3f04ed48d)) + +## [2.27.0](https://www.github.com/googleapis/python-bigquery/compare/v2.26.0...v2.27.0) (2021-09-24) + + +### Features + +* Add py.typed for PEP 561 compliance ([#976](https://www.github.com/googleapis/python-bigquery/issues/976)) ([96e6bee](https://www.github.com/googleapis/python-bigquery/commit/96e6beef3c63b663b7e5879b1458f4dd1a47a5b5)) +* include key metadata in Job representation ([#964](https://www.github.com/googleapis/python-bigquery/issues/964)) ([acca1cb](https://www.github.com/googleapis/python-bigquery/commit/acca1cb7baaa3b00508246c994ade40314d421c3)) + + +### Bug Fixes + +* Arrow extension-type metadata was not set when calling the REST API or when there are no rows ([#946](https://www.github.com/googleapis/python-bigquery/issues/946)) ([864383b](https://www.github.com/googleapis/python-bigquery/commit/864383bc01636b3774f7da194587b8b7edd0383d)) +* disambiguate missing policy tags from explicitly unset policy tags ([#983](https://www.github.com/googleapis/python-bigquery/issues/983)) ([f83c00a](https://www.github.com/googleapis/python-bigquery/commit/f83c00acead70fc0ce9959eefb133a672d816277)) +* remove default timeout ([#974](https://www.github.com/googleapis/python-bigquery/issues/974)) ([1cef0d4](https://www.github.com/googleapis/python-bigquery/commit/1cef0d4664bf448168b26487a71795144b7f4d6b)) + + +### Documentation + +* simplify destination table sample with f-strings ([#966](https://www.github.com/googleapis/python-bigquery/issues/966)) ([ab6e76f](https://www.github.com/googleapis/python-bigquery/commit/ab6e76f9489262fd9c1876a1c4f93d7e139aa999)) + ## [2.26.0](https://www.github.com/googleapis/python-bigquery/compare/v2.25.2...v2.26.0) (2021-09-01) diff --git a/README.rst b/README.rst index 8454cf9c0..d0ad059a2 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ processing power of Google's infrastructure. - `Product Documentation`_ .. |GA| image:: https://img.shields.io/badge/support-GA-gold.svg - :target: https://github.com/googleapis/google-cloud-python/blob/master/README.rst#general-availability + :target: https://github.com/googleapis/google-cloud-python/blob/main/README.rst#general-availability .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg diff --git a/docs/conf.py b/docs/conf.py index b8ddbd8c8..9545d06ed 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -363,7 +363,7 @@ "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/reference.rst b/docs/reference.rst index 128dee718..8fff2e68f 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -128,6 +128,7 @@ Schema :toctree: generated schema.SchemaField + schema.PolicyTagList Query diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 660a660b4..ec8f68af0 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -87,6 +87,7 @@ from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType +from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.standard_sql import StandardSqlDataType from google.cloud.bigquery.standard_sql import StandardSqlField @@ -145,6 +146,7 @@ "RoutineReference", # Shared helpers "SchemaField", + "PolicyTagList", "UDFResource", "ExternalConfig", "BigtableOptions", diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index e7f5bd59b..f400f9b70 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -604,8 +604,9 @@ def _get_sub_prop(container, keys, default=None): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to get the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to get the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. default (Optional[object]): @@ -632,6 +633,9 @@ def _get_sub_prop(container, keys, default=None): Returns: object: The value if present or the default. """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys: if key not in sub_val: @@ -647,8 +651,9 @@ def _set_sub_prop(container, keys, value): container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): - A sequence of keys to attempt to set the value for. Each item in - the sequence represents a deeper nesting. The first key is for + A sequence of keys to attempt to set the value for. If ``keys`` is a + string, it is treated as sequence containing a single string key. Each item + in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. value (object): Value to set within the container. @@ -675,6 +680,9 @@ def _set_sub_prop(container, keys, value): >>> container {'key': {'subkey': 'new'}} """ + if isinstance(keys, str): + keys = [keys] + sub_val = container for key in keys[:-1]: if key not in sub_val: diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index a627f5226..b034c0fd1 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -183,6 +183,13 @@ def pyarrow_timestamp(): # the type ID matters, and it's the same for all decimal256 instances. pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", } +BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { + "GEOGRAPHY": { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, +} def bq_to_arrow_struct_data_type(field): @@ -233,7 +240,12 @@ def bq_to_arrow_field(bq_field, array_type=None): if array_type is not None: arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" - return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) + metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get( + bq_field.field_type.upper() if bq_field.field_type else "" + ) + return pyarrow.field( + bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata + ) warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) return None diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index e5fc592a6..698181092 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -357,6 +357,11 @@ def reservation_usage(self): def transaction_info(self) -> Optional[TransactionInfo]: """Information of the multi-statement transaction if this job is part of one. + Since a scripting query job can execute multiple transactions, this + property is only expected on child jobs. Use the + :meth:`google.cloud.bigquery.client.Client.list_jobs` method with the + ``parent_job`` parameter to iterate over child jobs. + .. versionadded:: 2.24.0 """ info = self._properties.get("statistics", {}).get("transactionInfo") @@ -722,6 +727,14 @@ def cancelled(self): and self.error_result.get("reason") == _STOPPED_REASON ) + def __repr__(self): + result = ( + f"{self.__class__.__name__}<" + f"project={self.project}, location={self.location}, id={self.job_id}" + ">" + ) + return result + class _JobConfig(object): """Abstract base class for job configuration objects. diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 18b7b13ec..2f8dfbb8b 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -174,6 +174,20 @@ def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: """ return self._properties.get("labelColumns", []) + @property + def best_trial_id(self) -> Optional[int]: + """The best trial_id across all training runs. + + .. deprecated:: + This property is deprecated! + + Read-only. + """ + value = self._properties.get("bestTrialId") + if value is not None: + value = int(value) + return value + @property def expires(self) -> Optional[datetime.datetime]: """The datetime when this model expires. diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 830582322..8a86973cd 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -60,7 +60,7 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ -DEFAULT_TIMEOUT = 5.0 * 60.0 +DEFAULT_TIMEOUT = None """The default API timeout. This is the time to wait per request. To adjust the total wait time, set a diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b52e288f4..f221e65a8 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,13 +15,13 @@ """Schemas for BigQuery tables / queries.""" import collections -from typing import Optional +import enum +from typing import Iterable, Union from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames -_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -50,47 +50,62 @@ """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" +class _DefaultSentinel(enum.Enum): + """Object used as 'sentinel' indicating default value should be used. + + Uses enum so that pytype/mypy knows that this is the only possible value. + https://stackoverflow.com/a/60605919/101923 + + Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8. + https://docs.python.org/3/library/typing.html#typing.Literal + """ + + DEFAULT_VALUE = object() + + +_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE + + class SchemaField(object): """Describe a single field within a table schema. Args: - name (str): The name of the field. + name: The name of the field. - field_type (str): The type of the field. See + field_type: + The type of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type - mode (Optional[str]): The mode of the field. See + mode: + Defaults to ``'NULLABLE'``. The mode of the field. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode - description (Optional[str]): Description for the field. + description: Description for the field. - fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]): - Subfields (requires ``field_type`` of 'RECORD'). + fields: Subfields (requires ``field_type`` of 'RECORD'). - policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + policy_tags: The policy tag list for the field. - precision (Optional[int]): + precision: Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. - scale (Optional[int]): + scale: Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. - max_length (Optional[int]): - Maximim length of fields with STRING or BYTES type. - + max_length: Maximum length of fields with STRING or BYTES type. """ def __init__( self, - name, - field_type, - mode="NULLABLE", - description=_DEFAULT_VALUE, - fields=(), - policy_tags=None, - precision=_DEFAULT_VALUE, - scale=_DEFAULT_VALUE, - max_length=_DEFAULT_VALUE, + name: str, + field_type: str, + mode: str = "NULLABLE", + description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, + fields: Iterable["SchemaField"] = (), + policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, + precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, + max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, ): self._properties = { "name": name, @@ -106,28 +121,12 @@ def __init__( self._properties["scale"] = scale if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length + if policy_tags is not _DEFAULT_VALUE: + self._properties["policyTags"] = ( + policy_tags.to_api_repr() if policy_tags is not None else None + ) self._fields = tuple(fields) - self._policy_tags = self._determine_policy_tags(field_type, policy_tags) - - @staticmethod - def _determine_policy_tags( - field_type: str, given_policy_tags: Optional["PolicyTagList"] - ) -> Optional["PolicyTagList"]: - """Return the given policy tags, or their suitable representation if `None`. - - Args: - field_type: The type of the schema field. - given_policy_tags: The policy tags to maybe ajdust. - """ - if given_policy_tags is not None: - return given_policy_tags - - if field_type is not None and field_type.upper() in _STRUCT_TYPES: - return None - - return PolicyTagList() - @staticmethod def __get_int(api_repr, name): v = api_repr.get(name, _DEFAULT_VALUE) @@ -153,10 +152,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) - policy_tags = cls._determine_policy_tags( - field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) - ) + if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: + policy_tags = PolicyTagList.from_api_repr(policy_tags) return cls( field_type=field_type, @@ -231,7 +230,8 @@ def policy_tags(self): """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list definition for this field. """ - return self._policy_tags + resource = self._properties.get("policyTags") + return PolicyTagList.from_api_repr(resource) if resource is not None else None def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -245,10 +245,6 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - else: - # Explicitly include policy tag definition (we must not do it for RECORD - # fields, because those are not leaf fields). - answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. return answer @@ -273,7 +269,7 @@ def _key(self): field_type = f"{field_type}({self.precision})" policy_tags = ( - () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + () if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) return ( diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c44289324..75901afb4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -64,6 +64,7 @@ import pandas import geopandas from google.cloud import bigquery_storage + from google.cloud.bigquery.dataset import DatasetReference _NO_PANDAS_ERROR = ( @@ -117,45 +118,95 @@ def _view_use_legacy_sql_getter(table): return True -class TableReference(object): +class _TableBase: + """Base class for Table-related classes with common functionality.""" + + _PROPERTY_TO_API_FIELD = { + "dataset_id": ["tableReference", "datasetId"], + "project": ["tableReference", "projectId"], + "table_id": ["tableReference", "tableId"], + } + + def __init__(self): + self._properties = {} + + @property + def project(self) -> str: + """Project bound to the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) + + @property + def dataset_id(self) -> str: + """ID of dataset containing the table.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) + + @property + def table_id(self) -> str: + """The table ID.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) + + @property + def path(self) -> str: + """URL path for the table's APIs.""" + return ( + f"/projects/{self.project}/datasets/{self.dataset_id}" + f"/tables/{self.table_id}" + ) + + def __eq__(self, other): + if isinstance(other, _TableBase): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + + +class TableReference(_TableBase): """TableReferences are pointers to tables. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference Args: - dataset_ref (google.cloud.bigquery.dataset.DatasetReference): + dataset_ref: A pointer to the dataset - table_id (str): The ID of the table + table_id: + The ID of the table """ - def __init__(self, dataset_ref, table_id): - self._project = dataset_ref.project - self._dataset_id = dataset_ref.dataset_id - self._table_id = table_id - - @property - def project(self): - """str: Project bound to the table""" - return self._project - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._dataset_id + _PROPERTY_TO_API_FIELD = { + "dataset_id": "datasetId", + "project": "projectId", + "table_id": "tableId", + } - @property - def table_id(self): - """str: The table ID.""" - return self._table_id + def __init__(self, dataset_ref: "DatasetReference", table_id: str): + self._properties = {} - @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self._project, - self._dataset_id, - self._table_id, + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["project"], + dataset_ref.project, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["dataset_id"], + dataset_ref.dataset_id, + ) + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"], table_id, ) @classmethod @@ -216,6 +267,7 @@ def from_api_repr(cls, resource: dict) -> "TableReference": project = resource["projectId"] dataset_id = resource["datasetId"] table_id = resource["tableId"] + return cls(DatasetReference(project, dataset_id), table_id) def to_api_repr(self) -> dict: @@ -224,11 +276,7 @@ def to_api_repr(self) -> dict: Returns: Dict[str, object]: Table reference represented as an API resource """ - return { - "projectId": self._project, - "datasetId": self._dataset_id, - "tableId": self._table_id, - } + return copy.deepcopy(self._properties) def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. @@ -248,54 +296,25 @@ def to_bqstorage(self) -> str: str: A reference to this table in the BigQuery Storage API. """ - table_id, _, _ = self._table_id.partition("@") + table_id, _, _ = self.table_id.partition("@") table_id, _, _ = table_id.partition("$") - table_ref = "projects/{}/datasets/{}/tables/{}".format( - self._project, self._dataset_id, table_id, + table_ref = ( + f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}" ) - return table_ref - def _key(self): - """A tuple key that uniquely describes this field. - - Used to compute this instance's hashcode and evaluate equality. - - Returns: - Tuple[str]: The contents of this :class:`DatasetReference`. - """ - return (self._project, self._dataset_id, self._table_id) - - def __eq__(self, other): - if isinstance(other, (Table, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - elif isinstance(other, TableReference): - return self._key() == other._key() - else: - return NotImplemented - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self._key()) - def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference(self._project, self._dataset_id) - return "TableReference({}, '{}')".format(repr(dataset_ref), self._table_id) + dataset_ref = DatasetReference(self.project, self.dataset_id) + return f"TableReference({dataset_ref!r}, '{self.table_id}')" -class Table(object): +class Table(_TableBase): """Tables represent a set of rows whose values correspond to a schema. See @@ -316,9 +335,9 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + **_TableBase._PROPERTY_TO_API_FIELD, "clustering_fields": "clustering", "created": "creationTime", - "dataset_id": ["tableReference", "datasetId"], "description": "description", "encryption_configuration": "encryptionConfiguration", "etag": "etag", @@ -337,14 +356,12 @@ class Table(object): "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", - "project": ["tableReference", "projectId"], "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", "schema": "schema", "snapshot_definition": "snapshotDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", - "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", "type": "type", "view_use_legacy_sql": "view", @@ -359,38 +376,8 @@ def __init__(self, table_ref, schema=None): if schema is not None: self.schema = schema - @property - def project(self): - """str: Project bound to the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["project"] - ) - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] - ) - - @property - def table_id(self): - """str: ID of the table.""" - return _helpers._get_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["table_id"] - ) - reference = property(_reference_getter) - @property - def path(self): - """str: URL path for the table's APIs.""" - return "/projects/%s/datasets/%s/tables/%s" % ( - self.project, - self.dataset_id, - self.table_id, - ) - @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -1031,29 +1018,11 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) - def __eq__(self, other): - if isinstance(other, Table): - return ( - self._properties["tableReference"] - == other._properties["tableReference"] - ) - elif isinstance(other, (TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def __repr__(self): return "Table({})".format(repr(self.reference)) -class TableListItem(object): +class TableListItem(_TableBase): """A read-only table resource from a list operation. For performance reasons, the BigQuery API only includes some of the table @@ -1117,21 +1086,6 @@ def expires(self): 1000.0 * float(expiration_time) ) - @property - def project(self): - """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] - - @property - def dataset_id(self): - """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] - - @property - def table_id(self): - """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] - reference = property(_reference_getter) @property @@ -1267,19 +1221,6 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) - def __eq__(self, other): - if isinstance(other, (Table, TableReference, TableListItem)): - return ( - self.project == other.project - and self.dataset_id == other.dataset_id - and self.table_id == other.table_id - ) - else: - return NotImplemented - - def __hash__(self): - return hash((self.project, self.dataset_id, self.table_id)) - def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. @@ -1783,10 +1724,14 @@ def to_arrow( if owns_bqstorage_client: bqstorage_client._transport.grpc_channel.close() - if record_batches: + if record_batches and bqstorage_client is not None: return pyarrow.Table.from_batches(record_batches) else: - # No records, use schema based on BigQuery schema. + # No records (not record_batches), use schema based on BigQuery schema + # **or** + # we used the REST API (bqstorage_client is None), + # which doesn't add arrow extension metadata, so we let + # `bq_to_arrow_schema` do it. arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) @@ -2225,7 +2170,10 @@ def to_geodataframe( """ if geopandas is None: raise ValueError(_NO_GEOPANDAS_ERROR) - return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + + # Since an empty GeoDataFrame has no geometry column, we do not CRS on it, + # because that's deprecated. + return geopandas.GeoDataFrame() def to_dataframe_iterable( self, diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 1f7d79ab9..3e5c77ede 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.26.0" +__version__ = "2.27.1" diff --git a/noxfile.py b/noxfile.py index dbf6a163c..d41573407 100644 --- a/noxfile.py +++ b/noxfile.py @@ -16,6 +16,7 @@ import pathlib import os +import re import shutil import nox @@ -97,6 +98,13 @@ def unit(session): @nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]]) def unit_noextras(session): """Run the unit test suite.""" + + # Install optional dependencies that are out-of-date. + # https://github.com/googleapis/python-bigquery/issues/933 + # There is no pyarrow 1.0.0 package for Python 3.9. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.install("pyarrow==1.0.0") + default(session, install_extras=False) @@ -205,9 +213,31 @@ def prerelease_deps(session): # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( - "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" + "--extra-index-url", + "https://pypi.fury.io/arrow-nightlies/", + "--prefer-binary", + "--pre", + "--upgrade", + "pyarrow", + ) + session.install( + "--extra-index-url", + "https://pypi.anaconda.org/scipy-wheels-nightly/simple", + "--prefer-binary", + "--pre", + "--upgrade", + "pandas", + ) + + session.install( + "--pre", + "--upgrade", + "google-api-core", + "google-cloud-bigquery-storage", + "google-cloud-core", + "google-resumable-media", + "grpcio", ) - session.install("--pre", "grpcio", "pandas") session.install( "freezegun", "google-cloud-datacatalog", @@ -219,7 +249,30 @@ def prerelease_deps(session): "pytest", "pytest-cov", ) - session.install("-e", ".[all]") + + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + # We use --no-deps to ensure that pre-release versions aren't overwritten + # by the version ranges in setup.py. + session.install(*deps) + session.install("--no-deps", "-e", ".[all]") # Print out prerelease package versions. session.run("python", "-c", "import grpc; print(grpc.__version__)") diff --git a/owlbot.py b/owlbot.py index be493957e..dd9255d2f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -30,7 +30,7 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": "http://pandas.pydata.org/pandas-docs/dev", + "pandas": "http://pandas.pydata.org/pandas-docs/stable/", "geopandas": "https://geopandas.org/", }, ) @@ -42,6 +42,7 @@ "noxfile.py", "docs/multiprocessing.rst", ".coveragerc", + ".github/CODEOWNERS", # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8fb578018..46162762c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -2,7 +2,7 @@ attrs==21.2.0 cachetools==4.2.2 certifi==2021.5.30 cffi==1.14.6 -charset-normalizer==2.0.4 +charset-normalizer==2.0.6 click==8.0.1 click-plugins==1.1.1 cligj==0.7.2 @@ -11,17 +11,17 @@ Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 google-api-core==2.0.1 -google-auth==2.0.2 -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-auth==2.2.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-cloud-core==2.0.0 -google-crc32c==1.1.2 -google-resumable-media==2.0.2 +google-crc32c==1.2.0 +google-resumable-media==2.0.3 googleapis-common-protos==1.53.0 -grpcio==1.39.0 +grpcio==1.41.0 idna==3.2 importlib-metadata==4.8.1 -libcst==0.3.20 +libcst==0.3.21 munch==2.5.0 mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" @@ -29,8 +29,6 @@ numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5; python_version < '3.7' pandas==1.3.2; python_version >= '3.7' -proto-plus==1.19.0 -protobuf==3.17.3 pyarrow==5.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 @@ -47,5 +45,5 @@ Shapely==1.7.1 six==1.16.0 typing-extensions==3.10.0.2 typing-inspect==0.7.1 -urllib3==1.26.6 +urllib3==1.26.7 zipp==3.5.0 diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py index a8d90501a..ed08b279a 100644 --- a/samples/snippets/natality_tutorial.py +++ b/samples/snippets/natality_tutorial.py @@ -38,12 +38,12 @@ def run_natality_tutorial(override_values={}): # Prepare a reference to a new dataset for storing the query results. dataset_id = "natality_regression" - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [END bigquery_query_natality_tutorial] # To facilitate testing, we replace values with alternatives # provided by the testing harness. dataset_id = override_values.get("dataset_id", dataset_id) - dataset_id_full = "{}.{}".format(client.project, dataset_id) + dataset_id_full = f"{client.project}.{dataset_id}" # [START bigquery_query_natality_tutorial] dataset = bigquery.Dataset(dataset_id_full) @@ -51,15 +51,13 @@ def run_natality_tutorial(override_values={}): # Create the new BigQuery dataset. dataset = client.create_dataset(dataset) - # In the new BigQuery dataset, create a reference to a new table for - # storing the query results. - table_ref = dataset.table("regression_input") - # Configure the query job. job_config = bigquery.QueryJobConfig() - # Set the destination table to the table reference created above. - job_config.destination = table_ref + # Set the destination table to where you want to store query results. + # As of google-cloud-bigquery 1.11.0, a fully qualified table ID can be + # used in place of a TableReference. + job_config.destination = f"{dataset_id_full}.regression_input" # Set up a query in Standard SQL, which is the default for the BigQuery # Python client library. diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e096af157..f9b9d023c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ -google-cloud-bigquery==2.26.0 -google-cloud-bigquery-storage==2.7.0 +google-cloud-bigquery==2.27.1 +google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 -grpcio==1.39.0 +grpcio==1.41.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' diff --git a/tests/system/conftest.py b/tests/system/conftest.py index cc2c2a4dc..7eec76a32 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import pathlib +import re import pytest import test_utils.prefixer @@ -61,6 +62,17 @@ def dataset_id(bigquery_client): bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) +@pytest.fixture() +def dataset_client(bigquery_client, dataset_id): + import google.cloud.bigquery.job + + return bigquery.Client( + default_query_job_config=google.cloud.bigquery.job.QueryJobConfig( + default_dataset=f"{bigquery_client.project}.{dataset_id}", + ) + ) + + @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" @@ -98,3 +110,8 @@ def scalars_extreme_table( job.result() yield full_table_id bigquery_client.delete_table(full_table_id) + + +@pytest.fixture +def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): + return replace_non_anum("_", request.node.name) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index f36dc0944..092562b3c 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -106,3 +106,62 @@ def test_list_rows_nullable_scalars_dtypes( timestamp_type = schema.field("timestamp_col").type assert timestamp_type.unit == "us" assert timestamp_type.tz is not None + + +@pytest.mark.parametrize("do_insert", [True, False]) +def test_arrow_extension_types_same_for_storage_and_REST_APIs_894( + dataset_client, test_table_name, do_insert +): + types = dict( + astring=("STRING", "'x'"), + astring9=("STRING(9)", "'x'"), + abytes=("BYTES", "b'x'"), + abytes9=("BYTES(9)", "b'x'"), + anumeric=("NUMERIC", "42"), + anumeric9=("NUMERIC(9)", "42"), + anumeric92=("NUMERIC(9,2)", "42"), + abignumeric=("BIGNUMERIC", "42e30"), + abignumeric49=("BIGNUMERIC(37)", "42e30"), + abignumeric492=("BIGNUMERIC(37,2)", "42e30"), + abool=("BOOL", "true"), + adate=("DATE", "'2021-09-06'"), + adatetime=("DATETIME", "'2021-09-06T09:57:26'"), + ageography=("GEOGRAPHY", "ST_GEOGFROMTEXT('point(0 0)')"), + # Can't get arrow data for interval :( + # ainterval=('INTERVAL', "make_interval(1, 2, 3, 4, 5, 6)"), + aint64=("INT64", "42"), + afloat64=("FLOAT64", "42.0"), + astruct=("STRUCT", "struct(42)"), + atime=("TIME", "'1:2:3'"), + atimestamp=("TIMESTAMP", "'2021-09-06T09:57:26'"), + ) + columns = ", ".join(f"{k} {t[0]}" for k, t in types.items()) + dataset_client.query(f"create table {test_table_name} ({columns})").result() + if do_insert: + names = list(types) + values = ", ".join(types[name][1] for name in names) + names = ", ".join(names) + dataset_client.query( + f"insert into {test_table_name} ({names}) values ({values})" + ).result() + at = dataset_client.query(f"select * from {test_table_name}").result().to_arrow() + storage_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + at = ( + dataset_client.query(f"select * from {test_table_name}") + .result() + .to_arrow(create_bqstorage_client=False) + ) + rest_api_metadata = { + at.field(i).name: at.field(i).metadata for i in range(at.num_columns) + } + + assert rest_api_metadata == storage_api_metadata + assert rest_api_metadata["adatetime"] == { + b"ARROW:extension:name": b"google:sqlType:datetime" + } + assert rest_api_metadata["ageography"] == { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + } diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 120e10f45..4884112ac 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -665,14 +665,15 @@ def test_unset_table_schema_attributes(self): mode=old_field.mode, description=None, fields=old_field.fields, - policy_tags=None, + policy_tags=PolicyTagList(), ) table.schema = new_schema updated_table = Config.CLIENT.update_table(table, ["schema"]) self.assertFalse(updated_table.schema[1].description) # Empty string or None. - self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + # policyTags key expected to be missing from response. + self.assertIsNone(updated_table.schema[1].policy_tags) def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) @@ -1578,9 +1579,15 @@ def test_transaction_info(self): query_job = Config.CLIENT.query(sql) query_job.result() - # Transaction ID set by the server should be accessible - assert query_job.transaction_info is not None - assert query_job.transaction_info.transaction_id != "" + child_jobs = Config.CLIENT.list_jobs(parent_job=query_job) + begin_transaction_job = next(iter(child_jobs)) + + # Transaction ID set by the server should be accessible on the child + # job responsible for `BEGIN TRANSACTION`. It is not expected to be + # present on the parent job itself. + # https://github.com/googleapis/python-bigquery/issues/975 + assert begin_transaction_job.transaction_info is not None + assert begin_transaction_job.transaction_info.transaction_id != "" def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 075d3b680..cbf4dff27 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -26,8 +26,8 @@ import pytest from google.cloud import bigquery -from google.cloud.bigquery import enums from google.cloud import bigquery_storage +from google.cloud.bigquery import enums from . import helpers diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index feba65aa5..c2ae78eaa 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -54,3 +54,8 @@ def disable_add_server_timeout_header(request): noop_add_server_timeout_header, ): yield + + +def pytest_configure(config): + # Explicitly register custom test markers to avoid warnings. + config.addinivalue_line("markers", "enable_add_server_timeout_header") diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index aa8e9c045..e320c72cb 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1043,6 +1043,12 @@ def test_cancelled_w_error_result_w_stopped(self): self.assertTrue(job.cancelled()) + def test_repr(self): + client = _make_client(project="project-foo") + job = self._make_one("job-99", client) + job._properties.setdefault("jobReference", {})["location"] = "ABC" + assert repr(job) == "_AsyncJob" + class Test_JobConfig(unittest.TestCase): JOB_TYPE = "testing" diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index cbe087dac..5a0c5a83f 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -484,13 +484,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index c5f9b77c1..3cc1dd4c4 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -99,6 +99,7 @@ def test_from_api_repr(target_class): ), }, ], + "bestTrialId": "123", "featureColumns": [], "encryptionConfiguration": {"kmsKeyName": KMS_KEY_NAME}, } @@ -119,6 +120,7 @@ def test_from_api_repr(target_class): assert got.model_type == "LOGISTIC_REGRESSION" assert got.labels == {"greeting": "こんにちは"} assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME + assert got.best_trial_id == 123 assert got.training_runs[0]["trainingOptions"]["initialLearnRate"] == 1.0 assert ( google.cloud._helpers._rfc3339_to_datetime(got.training_runs[0]["startTime"]) @@ -161,6 +163,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert len(got.training_runs) == 0 assert len(got.feature_columns) == 0 assert len(got.label_columns) == 0 + assert got.best_trial_id is None def test_from_api_repr_w_unknown_fields(target_class): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 035f04456..2377be79c 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1143,9 +1143,13 @@ def test_w_empty_container_default_default(self): def test_w_missing_key_explicit_default(self): self.assertEqual(self._call_fut({"key2": 2}, ["key1"], default=1), 1) - def test_w_matching_single_key(self): + def test_w_matching_single_key_in_sequence(self): self.assertEqual(self._call_fut({"key1": 1}, ["key1"]), 1) + def test_w_matching_single_string_key(self): + data = {"k": {"e": {"y": "foo"}}, "key": "bar"} + self.assertEqual(self._call_fut(data, "key"), "bar") + def test_w_matching_first_key_missing_second_key(self): self.assertIsNone(self._call_fut({"key1": {"key3": 3}}, ["key1", "key2"])) @@ -1159,11 +1163,16 @@ def _call_fut(self, container, keys, value): return _set_sub_prop(container, keys, value) - def test_w_empty_container_single_key(self): + def test_w_empty_container_single_key_in_sequence(self): container = {} self._call_fut(container, ["key1"], "value") self.assertEqual(container, {"key1": "value"}) + def test_w_empty_container_single_string_key(self): + container = {} + self._call_fut(container, "key", "value") + self.assertEqual(container, {"key": "value"}) + def test_w_empty_container_nested_keys(self): container = {} self._call_fut(container, ["key1", "key2", "key3"], "value") diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0140beb77..36becf182 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1610,3 +1610,25 @@ def test_bq_to_arrow_field_type_override(module_under_test): ).type == pyarrow.binary() ) + + +@pytest.mark.parametrize( + "field_type, metadata", + [ + ("datetime", {b"ARROW:extension:name": b"google:sqlType:datetime"}), + ( + "geography", + { + b"ARROW:extension:name": b"google:sqlType:geography", + b"ARROW:extension:metadata": b'{"encoding": "WKT"}', + }, + ), + ], +) +def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", field_type) + ).metadata + == metadata + ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 9b12128c6..2ddf98077 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -940,18 +940,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -985,18 +975,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -1919,6 +1899,7 @@ def test_update_routine(self): def test_update_table(self): from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.table import Table path = "projects/%s/datasets/%s/tables/%s" % ( @@ -1945,7 +1926,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -1956,7 +1936,15 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + # Explicly setting policyTags to no names should be included in the sent resource. + # https://github.com/googleapis/python-bigquery/issues/981 + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + description=None, + policy_tags=PolicyTagList(names=()), + ), SchemaField( "age", "INTEGER", mode="REQUIRED", description="New field description" ), @@ -1994,7 +1982,6 @@ def test_update_table(self): "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", - "policyTags": {"names": []}, }, ] }, @@ -2113,21 +2100,14 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", - "policyTags": {"names": []}, - }, - { - "name": "country", - "type": "STRING", - "mode": "NULLABLE", - "policyTags": {"names": []}, }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6706,7 +6686,13 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) - assert field["policyTags"]["names"] == [] + # Avoid accidentally updating policy tags when not explicitly included. + # https://github.com/googleapis/python-bigquery/issues/981 + # Also, avoid 403 if someone has permission to write to table but + # not update policy tags by omitting policy tags we might have + # received from a get table request. + # https://github.com/googleapis/python-bigquery/pull/557 + assert "policyTags" not in field # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field @@ -7904,21 +7890,18 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -7951,21 +7934,18 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", - "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", - "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 1f49dba5d..3dc9dd179 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -78,14 +78,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index edc05494c..c845d08c1 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -44,15 +44,40 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, PolicyTagList()) + self.assertIsNone(field.policy_tags) def test_constructor_explicit(self): - field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") self.assertEqual(field.mode, "REQUIRED") self.assertEqual(field.description, "Testing") self.assertEqual(field.fields, ()) + self.assertEqual( + field.policy_tags, + PolicyTagList( + names=( + "projects/a/locations/b/taxonomies/c/policyTags/e", + "projects/f/locations/g/taxonomies/h/policyTags/i", + ) + ), + ) + + def test_constructor_explicit_none(self): + field = self._make_one("test", "STRING", description=None, policy_tags=None) + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -68,20 +93,6 @@ def test_constructor_subfields(self): self.assertEqual(field.fields[0], sub_field1) self.assertEqual(field.fields[1], sub_field2) - def test_constructor_with_policy_tags(self): - from google.cloud.bigquery.schema import PolicyTagList - - policy = PolicyTagList(names=("foo", "bar")) - field = self._make_one( - "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy - ) - self.assertEqual(field.name, "test") - self.assertEqual(field.field_type, "STRING") - self.assertEqual(field.mode, "REQUIRED") - self.assertEqual(field.description, "Testing") - self.assertEqual(field.fields, ()) - self.assertEqual(field.policy_tags, policy) - def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -90,17 +101,28 @@ def test_to_api_repr(self): policy.to_api_repr(), {"names": ["foo", "bar"]}, ) - field = self._make_one("foo", "INTEGER", "NULLABLE", policy_tags=policy) + field = self._make_one( + "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + ) self.assertEqual( field.to_api_repr(), { "mode": "NULLABLE", "name": "foo", "type": "INTEGER", + "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, }, ) + def test_to_api_repr_omits_unset_properties(self): + # Prevent accidentally modifying fields that aren't explicitly set. + # https://github.com/googleapis/python-bigquery/issues/981 + field = self._make_one("foo", "INTEGER") + resource = field.to_api_repr() + self.assertNotIn("description", resource) + self.assertNotIn("policyTags", resource) + def test_to_api_repr_with_subfield(self): for record_type in ("RECORD", "STRUCT"): subfield = self._make_one("bar", "INTEGER", "NULLABLE") @@ -108,14 +130,7 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "policyTags": {"names": []}, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -165,9 +180,15 @@ def test_from_api_repr_defaults(self): self.assertEqual(field.name, "foo") self.assertEqual(field.field_type, "RECORD") self.assertEqual(field.mode, "NULLABLE") - self.assertEqual(field.description, None) self.assertEqual(len(field.fields), 0) + # Keys not present in API representation shouldn't be included in + # _properties. + self.assertIsNone(field.description) + self.assertIsNone(field.policy_tags) + self.assertNotIn("description", field._properties) + self.assertNotIn("policyTags", field._properties) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -583,22 +604,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ) def test_w_description(self): @@ -624,7 +633,6 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, - "policyTags": {"names": []}, }, ) self.assertEqual( @@ -634,7 +642,6 @@ def test_w_description(self): "type": "INTEGER", "mode": "REQUIRED", "description": None, - "policyTags": {"names": []}, }, ) @@ -650,13 +657,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -665,18 +666,8 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "policyTags": {"names": []}, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) @@ -888,83 +879,43 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="NUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9,), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict( - name="n", - type="NUMERIC", - mode="NULLABLE", - precision=9, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2,), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict( - name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40,), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict( - name="n", - type="BIGNUMERIC", - mode="NULLABLE", - precision=40, - scale=2, - policyTags={"names": []}, - ), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2,), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="STRING", mode="NULLABLE"), ), ( dict(name="n", field_type="STRING", max_length=9), - dict( - name="n", - type="STRING", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9,), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), + dict(name="n", type="BYTES", mode="NULLABLE"), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict( - name="n", - type="BYTES", - mode="NULLABLE", - maxLength=9, - policyTags={"names": []}, - ), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9,), ), ], ) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ed9ed5d0f..a34b0d56b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -93,6 +93,189 @@ def test_ctor_with_key(self): self.assertEqual(encryption_config.kms_key_name, self.KMS_KEY_NAME) +class TestTableBase: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import _TableBase + + return _TableBase + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + instance = self._make_one() + assert instance._properties == {} + + def test_project(self): + instance = self._make_one() + instance._properties = {"tableReference": {"projectId": "p_1"}} + assert instance.project == "p_1" + + def test_dataset_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"datasetId": "ds_1"}} + assert instance.dataset_id == "ds_1" + + def test_table_id(self): + instance = self._make_one() + instance._properties = {"tableReference": {"tableId": "tbl_1"}} + assert instance.table_id == "tbl_1" + + def test_path(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance.path == "/projects/p_1/datasets/ds_1/tables/tbl_1" + + def test___eq___wrong_type(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + class TableWannabe: + pass + + wannabe_other = TableWannabe() + wannabe_other._properties = instance._properties + wannabe_other.project = "p_1" + wannabe_other.dataset_id = "ds_1" + wannabe_other.table_id = "tbl_1" + + assert instance != wannabe_other # Can't fake it. + assert instance == mock.ANY # ...but delegation to other object works. + + def test___eq___project_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "projectId": "p_2", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + assert instance != other + + def test___eq___dataset_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_2", + "tableId": "tbl_1", + } + } + assert instance != other + + def test___eq___table_mismatch(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_2", + } + } + assert instance != other + + def test___eq___equality(self): + instance = self._make_one() + instance._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + other = self._make_one() + other._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + assert instance == other + + def test___hash__set_equality(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1, instance_2} + set_two = {instance_1, instance_2} + assert set_one == set_two + + def test___hash__sets_not_equal(self): + instance_1 = self._make_one() + instance_1._properties = { + "tableReference": { + "projectId": "p_1", + "datasetId": "ds_1", + "tableId": "tbl_1", + } + } + + instance_2 = self._make_one() + instance_2._properties = { + "tableReference": { + "projectId": "p_2", + "datasetId": "ds_2", + "tableId": "tbl_2", + } + } + + set_one = {instance_1} + set_two = {instance_2} + assert set_one != set_two + + class TestTableReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -187,55 +370,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") - def test___eq___wrong_type(self): - dataset_ref = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset_ref, "table_1") - other = object() - self.assertNotEqual(table, other) - self.assertEqual(table, mock.ANY) - - def test___eq___project_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_2", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___dataset_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - other_dataset = DatasetReference("project_1", "dataset_2") - table = self._make_one(dataset, "table_1") - other = self._make_one(other_dataset, "table_1") - self.assertNotEqual(table, other) - - def test___eq___table_mismatch(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_2") - self.assertNotEqual(table, other) - - def test___eq___equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table = self._make_one(dataset, "table_1") - other = self._make_one(dataset, "table_1") - self.assertEqual(table, other) - - def test___hash__set_equality(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1, table2} - set_two = {table1, table2} - self.assertEqual(set_one, set_two) - - def test___hash__not_equals(self): - dataset = DatasetReference("project_1", "dataset_1") - table1 = self._make_one(dataset, "table1") - table2 = self._make_one(dataset, "table2") - set_one = {table1} - set_two = {table2} - self.assertNotEqual(set_one, set_two) - def test___repr__(self): dataset = DatasetReference("project1", "dataset1") table1 = self._make_one(dataset, "table1") @@ -549,44 +683,6 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") - def test__eq__wrong_type(self): - table = self._make_one("project_foo.dataset_bar.table_baz") - - class TableWannabe: - pass - - not_a_table = TableWannabe() - not_a_table._properties = table._properties - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table_basic(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - assert table_1 == table_2 - - def test__eq__same_table_multiple_properties(self): - from google.cloud.bigquery import SchemaField - - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_1.require_partition_filter = True - table_1.labels = {"first": "one", "second": "two"} - - table_1.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - table_2 = self._make_one("project_foo.dataset_bar.table_baz") - table_2.require_partition_filter = True - table_2.labels = {"first": "one", "second": "two"} - table_2.schema = [ - SchemaField("name", "STRING", "REQUIRED"), - SchemaField("age", "INTEGER", "NULLABLE"), - ] - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is table baz" @@ -596,12 +692,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - table_1 = self._make_one("project_foo.dataset_bar.table_baz") - table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") - - assert table_1 != table_2 - def test_hashable(self): table_1 = self._make_one("project_foo.dataset_bar.table_baz") table_1.description = "This is a table" @@ -1575,38 +1665,6 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) - def test__eq__wrong_type(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table = self._make_one(resource) - - class FakeTableListItem: - project = "project_foo" - dataset_id = "dataset_bar" - table_id = "table_baz" - - not_a_table = FakeTableListItem() - - assert table != not_a_table # Can't fake it. - - def test__eq__same_table(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource) - table_2 = self._make_one(resource) - - assert table_1 == table_2 - def test__eq__same_table_property_different(self): table_ref_resource = { "projectId": "project_foo", @@ -1622,40 +1680,6 @@ def test__eq__same_table_property_different(self): assert table_1 == table_2 # Still equal, only table reference is important. - def test__eq__different_table(self): - resource_1 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_1 = self._make_one(resource_1) - - resource_2 = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_quux", - } - } - table_2 = self._make_one(resource_2) - - assert table_1 != table_2 - - def test_hashable(self): - resource = { - "tableReference": { - "projectId": "project_foo", - "datasetId": "dataset_bar", - "tableId": "table_baz", - } - } - table_item = self._make_one(resource) - table_item_2 = self._make_one(resource) - - assert hash(table_item) == hash(table_item_2) - class TestTableClassesInterchangeability: @staticmethod @@ -1850,8 +1874,7 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(df.crs.srs, "EPSG:4326") - self.assertEqual(df.crs.name, "WGS 84") + self.assertIsNone(df.crs) class TestRowIterator(unittest.TestCase): @@ -3890,8 +3913,14 @@ def test_to_geodataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.geog.dtype.name, "geometry") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") self.assertEqual(df.crs.name, "WGS 84") self.assertEqual(df.geog.crs.srs, "EPSG:4326") @@ -3962,8 +3991,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual(df.geog.dtype.name, "geometry") self.assertEqual(df.geog2.dtype.name, "object") self.assertIsInstance(df.geog, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) - self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] ) @@ -3973,10 +4008,14 @@ def test_to_geodataframe_w_geography_column(self): self.assertEqual( [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] ) + # and can easily be converted to a GeoSeries - self.assertEqual( - list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), + ["0.0", "0.0", "0.0"], + ) @unittest.skipIf(geopandas is None, "Requires `geopandas`") @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") @@ -4028,8 +4067,14 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.g.dtype.name, "geometry") self.assertIsInstance(df.g, geopandas.GeoSeries) - self.assertEqual(list(map(str, df.area)), ["0.0"]) - self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + warnings.filterwarnings("ignore", category=UserWarning) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) From 71dde117b7c018130922745a5a525b5cdb5b11be Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 6 Oct 2021 16:33:20 +0200 Subject: [PATCH 12/35] feat: add a static copy of legacy proto-based types (#1000) * feat: add a static copy of legacy proto SQL types * Exclude legacy types from docs generation * Exclude legacy types from type checks * Exclude legacy types from test coverage * Emit warning if importing legacy types * Re-introduce proto-related dependencies * Expose legacy types in reference docs --- .coveragerc | 1 + UPGRADING.md | 6 +- docs/bigquery/legacy_proto_types.rst | 14 + docs/conf.py | 3 + docs/reference.rst | 15 + google/cloud/bigquery_v2/__init__.py | 56 + google/cloud/bigquery_v2/types/__init__.py | 48 + .../bigquery_v2/types/encryption_config.py | 42 + google/cloud/bigquery_v2/types/model.py | 1507 +++++++++++++++++ .../bigquery_v2/types/model_reference.py | 44 + .../cloud/bigquery_v2/types/standard_sql.py | 117 ++ .../bigquery_v2/types/table_reference.py | 58 + owlbot.py | 8 + setup.cfg | 1 + setup.py | 2 + testing/constraints-3.6.txt | 2 + tests/unit/test_legacy_types.py | 26 + 17 files changed, 1949 insertions(+), 1 deletion(-) create mode 100644 docs/bigquery/legacy_proto_types.rst create mode 100644 google/cloud/bigquery_v2/__init__.py create mode 100644 google/cloud/bigquery_v2/types/__init__.py create mode 100644 google/cloud/bigquery_v2/types/encryption_config.py create mode 100644 google/cloud/bigquery_v2/types/model.py create mode 100644 google/cloud/bigquery_v2/types/model_reference.py create mode 100644 google/cloud/bigquery_v2/types/standard_sql.py create mode 100644 google/cloud/bigquery_v2/types/table_reference.py create mode 100644 tests/unit/test_legacy_types.py diff --git a/.coveragerc b/.coveragerc index 23861a8eb..1ed1a9704 100644 --- a/.coveragerc +++ b/.coveragerc @@ -6,6 +6,7 @@ fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py + google/cloud/bigquery_v2/* # Legacy proto-based types. exclude_lines = # Re-enable the standard pragma pragma: NO COVER diff --git a/UPGRADING.md b/UPGRADING.md index a4ba0efd2..c75c4fddb 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -11,6 +11,10 @@ See the License for the specific language governing permissions and limitations under the License. --> +# 3.0.0 Migration Guide + +TODO + # 2.0.0 Migration Guide @@ -56,4 +60,4 @@ distance_type = enums.Model.DistanceType.COSINE from google.cloud.bigquery_v2 import types distance_type = types.Model.DistanceType.COSINE -``` \ No newline at end of file +``` diff --git a/docs/bigquery/legacy_proto_types.rst b/docs/bigquery/legacy_proto_types.rst new file mode 100644 index 000000000..bc1e93715 --- /dev/null +++ b/docs/bigquery/legacy_proto_types.rst @@ -0,0 +1,14 @@ +Legacy proto-based Types for Google Cloud Bigquery v2 API +========================================================= + +.. warning:: + These types are provided for backward compatibility only, and are not maintained + anymore. They might also differ from the types uspported on the backend. It is + therefore strongly advised to migrate to the types found in :doc:`standard_sql`. + + Also see the :doc:`3.0.0 Migration Guide<../UPGRADING>` for more information. + +.. automodule:: google.cloud.bigquery_v2.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 9545d06ed..fa5217731 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -109,6 +109,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [ + "google/cloud/bigquery_v2/**", # Legacy proto-based types. "_build", "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", @@ -365,6 +366,8 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), + "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), + "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), } diff --git a/docs/reference.rst b/docs/reference.rst index 8fff2e68f..713b9239d 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -204,3 +204,18 @@ Helper SQL type classes. :maxdepth: 2 bigquery/standard_sql + + +Legacy proto-based Types (deprecated) +===================================== + +The legacy type classes based on protocol buffers. + +.. deprecated:: 3.0.0 + These types are provided for backward compatibility only, and are not maintained + anymore. + +.. toctree:: + :maxdepth: 2 + + bigquery/legacy_proto_types diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py new file mode 100644 index 000000000..8df120e18 --- /dev/null +++ b/google/cloud/bigquery_v2/__init__.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings + +from .types.encryption_config import EncryptionConfiguration +from .types.model import DeleteModelRequest +from .types.model import GetModelRequest +from .types.model import ListModelsRequest +from .types.model import ListModelsResponse +from .types.model import Model +from .types.model import PatchModelRequest +from .types.model_reference import ModelReference +from .types.standard_sql import StandardSqlDataType +from .types.standard_sql import StandardSqlField +from .types.standard_sql import StandardSqlStructType +from .types.standard_sql import StandardSqlTableType +from .types.table_reference import TableReference + + +_LEGACY_MSG = ( + "Legacy proto-based types from bigquery_v2 are not maintained anymore, " + "use types defined in google.cloud.bigquery instead." +) + +warnings.warn(_LEGACY_MSG, category=DeprecationWarning) + + +__all__ = ( + "DeleteModelRequest", + "EncryptionConfiguration", + "GetModelRequest", + "ListModelsRequest", + "ListModelsResponse", + "Model", + "ModelReference", + "PatchModelRequest", + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", + "TableReference", +) diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py new file mode 100644 index 000000000..83bbb3a54 --- /dev/null +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from .encryption_config import EncryptionConfiguration +from .model import ( + DeleteModelRequest, + GetModelRequest, + ListModelsRequest, + ListModelsResponse, + Model, + PatchModelRequest, +) +from .model_reference import ModelReference +from .standard_sql import ( + StandardSqlDataType, + StandardSqlField, + StandardSqlStructType, + StandardSqlTableType, +) +from .table_reference import TableReference + +__all__ = ( + "EncryptionConfiguration", + "DeleteModelRequest", + "GetModelRequest", + "ListModelsRequest", + "ListModelsResponse", + "Model", + "PatchModelRequest", + "ModelReference", + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", + "TableReference", +) diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py new file mode 100644 index 000000000..4b9139733 --- /dev/null +++ b/google/cloud/bigquery_v2/types/encryption_config.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import proto # type: ignore + +from google.protobuf import wrappers_pb2 # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"EncryptionConfiguration",}, +) + + +class EncryptionConfiguration(proto.Message): + r""" + Attributes: + kms_key_name (google.protobuf.wrappers_pb2.StringValue): + Optional. Describes the Cloud KMS encryption + key that will be used to protect destination + BigQuery table. The BigQuery Service Account + associated with your project requires access to + this encryption key. + """ + + kms_key_name = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.StringValue, + ) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py new file mode 100644 index 000000000..706418401 --- /dev/null +++ b/google/cloud/bigquery_v2/types/model.py @@ -0,0 +1,1507 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import proto # type: ignore + +from google.cloud.bigquery_v2.types import encryption_config +from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference +from google.cloud.bigquery_v2.types import standard_sql +from google.cloud.bigquery_v2.types import table_reference +from google.protobuf import timestamp_pb2 # type: ignore +from google.protobuf import wrappers_pb2 # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", + manifest={ + "Model", + "GetModelRequest", + "PatchModelRequest", + "DeleteModelRequest", + "ListModelsRequest", + "ListModelsResponse", + }, +) + + +class Model(proto.Message): + r""" + Attributes: + etag (str): + Output only. A hash of this resource. + model_reference (google.cloud.bigquery_v2.types.ModelReference): + Required. Unique identifier for this model. + creation_time (int): + Output only. The time when this model was + created, in millisecs since the epoch. + last_modified_time (int): + Output only. The time when this model was + last modified, in millisecs since the epoch. + description (str): + Optional. A user-friendly description of this + model. + friendly_name (str): + Optional. A descriptive name for this model. + labels (Sequence[google.cloud.bigquery_v2.types.Model.LabelsEntry]): + The labels associated with this model. You + can use these to organize and group your models. + Label keys and values can be no longer than 63 + characters, can only contain lowercase letters, + numeric characters, underscores and dashes. + International characters are allowed. Label + values are optional. Label keys must start with + a letter and each label in the list must have a + different key. + expiration_time (int): + Optional. The time when this model expires, + in milliseconds since the epoch. If not present, + the model will persist indefinitely. Expired + models will be deleted and their storage + reclaimed. The defaultTableExpirationMs + property of the encapsulating dataset can be + used to set a default expirationTime on newly + created models. + location (str): + Output only. The geographic location where + the model resides. This value is inherited from + the dataset. + encryption_configuration (google.cloud.bigquery_v2.types.EncryptionConfiguration): + Custom encryption configuration (e.g., Cloud + KMS keys). This shows the encryption + configuration of the model data while stored in + BigQuery storage. This field can be used with + PatchModel to update encryption key for an + already encrypted model. + model_type (google.cloud.bigquery_v2.types.Model.ModelType): + Output only. Type of the model resource. + training_runs (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]): + Output only. Information for all training runs in increasing + order of start_time. + feature_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + Output only. Input feature columns that were + used to train this model. + label_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + Output only. Label columns that were used to train this + model. The output of the model will have a `predicted_` + prefix to these columns. + best_trial_id (int): + The best trial_id across all training runs. + """ + + class ModelType(proto.Enum): + r"""Indicates the type of the Model.""" + MODEL_TYPE_UNSPECIFIED = 0 + LINEAR_REGRESSION = 1 + LOGISTIC_REGRESSION = 2 + KMEANS = 3 + MATRIX_FACTORIZATION = 4 + DNN_CLASSIFIER = 5 + TENSORFLOW = 6 + DNN_REGRESSOR = 7 + BOOSTED_TREE_REGRESSOR = 9 + BOOSTED_TREE_CLASSIFIER = 10 + ARIMA = 11 + AUTOML_REGRESSOR = 12 + AUTOML_CLASSIFIER = 13 + ARIMA_PLUS = 19 + + class LossType(proto.Enum): + r"""Loss metric to evaluate model training performance.""" + LOSS_TYPE_UNSPECIFIED = 0 + MEAN_SQUARED_LOSS = 1 + MEAN_LOG_LOSS = 2 + + class DistanceType(proto.Enum): + r"""Distance metric used to compute the distance between two + points. + """ + DISTANCE_TYPE_UNSPECIFIED = 0 + EUCLIDEAN = 1 + COSINE = 2 + + class DataSplitMethod(proto.Enum): + r"""Indicates the method to split input data into multiple + tables. + """ + DATA_SPLIT_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + SEQUENTIAL = 3 + NO_SPLIT = 4 + AUTO_SPLIT = 5 + + class DataFrequency(proto.Enum): + r"""Type of supported data frequency for time series forecasting + models. + """ + DATA_FREQUENCY_UNSPECIFIED = 0 + AUTO_FREQUENCY = 1 + YEARLY = 2 + QUARTERLY = 3 + MONTHLY = 4 + WEEKLY = 5 + DAILY = 6 + HOURLY = 7 + PER_MINUTE = 8 + + class HolidayRegion(proto.Enum): + r"""Type of supported holiday regions for time series forecasting + models. + """ + HOLIDAY_REGION_UNSPECIFIED = 0 + GLOBAL = 1 + NA = 2 + JAPAC = 3 + EMEA = 4 + LAC = 5 + AE = 6 + AR = 7 + AT = 8 + AU = 9 + BE = 10 + BR = 11 + CA = 12 + CH = 13 + CL = 14 + CN = 15 + CO = 16 + CS = 17 + CZ = 18 + DE = 19 + DK = 20 + DZ = 21 + EC = 22 + EE = 23 + EG = 24 + ES = 25 + FI = 26 + FR = 27 + GB = 28 + GR = 29 + HK = 30 + HU = 31 + ID = 32 + IE = 33 + IL = 34 + IN = 35 + IR = 36 + IT = 37 + JP = 38 + KR = 39 + LV = 40 + MA = 41 + MX = 42 + MY = 43 + NG = 44 + NL = 45 + NO = 46 + NZ = 47 + PE = 48 + PH = 49 + PK = 50 + PL = 51 + PT = 52 + RO = 53 + RS = 54 + RU = 55 + SA = 56 + SE = 57 + SG = 58 + SI = 59 + SK = 60 + TH = 61 + TR = 62 + TW = 63 + UA = 64 + US = 65 + VE = 66 + VN = 67 + ZA = 68 + + class LearnRateStrategy(proto.Enum): + r"""Indicates the learning rate optimization strategy to use.""" + LEARN_RATE_STRATEGY_UNSPECIFIED = 0 + LINE_SEARCH = 1 + CONSTANT = 2 + + class OptimizationStrategy(proto.Enum): + r"""Indicates the optimization strategy used for training.""" + OPTIMIZATION_STRATEGY_UNSPECIFIED = 0 + BATCH_GRADIENT_DESCENT = 1 + NORMAL_EQUATION = 2 + + class FeedbackType(proto.Enum): + r"""Indicates the training algorithm to use for matrix + factorization models. + """ + FEEDBACK_TYPE_UNSPECIFIED = 0 + IMPLICIT = 1 + EXPLICIT = 2 + + class SeasonalPeriod(proto.Message): + r""" """ + + class SeasonalPeriodType(proto.Enum): + r"""""" + SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0 + NO_SEASONALITY = 1 + DAILY = 2 + WEEKLY = 3 + MONTHLY = 4 + QUARTERLY = 5 + YEARLY = 6 + + class KmeansEnums(proto.Message): + r""" """ + + class KmeansInitializationMethod(proto.Enum): + r"""Indicates the method used to initialize the centroids for + KMeans clustering algorithm. + """ + KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + KMEANS_PLUS_PLUS = 3 + + class RegressionMetrics(proto.Message): + r"""Evaluation metrics for regression and explicit feedback type + matrix factorization models. + + Attributes: + mean_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): + Mean absolute error. + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): + Mean squared error. + mean_squared_log_error (google.protobuf.wrappers_pb2.DoubleValue): + Mean squared log error. + median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): + Median absolute error. + r_squared (google.protobuf.wrappers_pb2.DoubleValue): + R^2 score. This corresponds to r2_score in ML.EVALUATE. + """ + + mean_absolute_error = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + mean_squared_error = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + ) + mean_squared_log_error = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + ) + median_absolute_error = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + r_squared = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + ) + + class AggregateClassificationMetrics(proto.Message): + r"""Aggregate metrics for classification/classifier models. For + multi-class models, the metrics are either macro-averaged or + micro-averaged. When macro-averaged, the metrics are calculated + for each label and then an unweighted average is taken of those + values. When micro-averaged, the metric is calculated globally + by counting the total number of correctly predicted rows. + + Attributes: + precision (google.protobuf.wrappers_pb2.DoubleValue): + Precision is the fraction of actual positive + predictions that had positive actual labels. For + multiclass this is a macro-averaged metric + treating each class as a binary classifier. + recall (google.protobuf.wrappers_pb2.DoubleValue): + Recall is the fraction of actual positive + labels that were given a positive prediction. + For multiclass this is a macro-averaged metric. + accuracy (google.protobuf.wrappers_pb2.DoubleValue): + Accuracy is the fraction of predictions given + the correct label. For multiclass this is a + micro-averaged metric. + threshold (google.protobuf.wrappers_pb2.DoubleValue): + Threshold at which the metrics are computed. + For binary classification models this is the + positive class threshold. For multi-class + classfication models this is the confidence + threshold. + f1_score (google.protobuf.wrappers_pb2.DoubleValue): + The F1 score is an average of recall and + precision. For multiclass this is a macro- + averaged metric. + log_loss (google.protobuf.wrappers_pb2.DoubleValue): + Logarithmic Loss. For multiclass this is a + macro-averaged metric. + roc_auc (google.protobuf.wrappers_pb2.DoubleValue): + Area Under a ROC Curve. For multiclass this + is a macro-averaged metric. + """ + + precision = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,) + accuracy = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + ) + threshold = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + f1_score = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + ) + log_loss = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + roc_auc = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + ) + + class BinaryClassificationMetrics(proto.Message): + r"""Evaluation metrics for binary classification/classifier + models. + + Attributes: + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): + Aggregate classification metrics. + binary_confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): + Binary confusion matrix at multiple + thresholds. + positive_label (str): + Label representing the positive class. + negative_label (str): + Label representing the negative class. + """ + + class BinaryConfusionMatrix(proto.Message): + r"""Confusion matrix for binary classification models. + Attributes: + positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): + Threshold value used when computing each of + the following metric. + true_positives (google.protobuf.wrappers_pb2.Int64Value): + Number of true samples predicted as true. + false_positives (google.protobuf.wrappers_pb2.Int64Value): + Number of false samples predicted as true. + true_negatives (google.protobuf.wrappers_pb2.Int64Value): + Number of true samples predicted as false. + false_negatives (google.protobuf.wrappers_pb2.Int64Value): + Number of false samples predicted as false. + precision (google.protobuf.wrappers_pb2.DoubleValue): + The fraction of actual positive predictions + that had positive actual labels. + recall (google.protobuf.wrappers_pb2.DoubleValue): + The fraction of actual positive labels that + were given a positive prediction. + f1_score (google.protobuf.wrappers_pb2.DoubleValue): + The equally weighted average of recall and + precision. + accuracy (google.protobuf.wrappers_pb2.DoubleValue): + The fraction of predictions given the correct + label. + """ + + positive_class_threshold = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + true_positives = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + ) + false_positives = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + ) + true_negatives = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, + ) + false_negatives = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value, + ) + precision = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + ) + f1_score = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue, + ) + accuracy = proto.Field( + proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue, + ) + + aggregate_classification_metrics = proto.Field( + proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + ) + binary_confusion_matrix_list = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", + ) + positive_label = proto.Field(proto.STRING, number=3,) + negative_label = proto.Field(proto.STRING, number=4,) + + class MultiClassClassificationMetrics(proto.Message): + r"""Evaluation metrics for multi-class classification/classifier + models. + + Attributes: + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): + Aggregate classification metrics. + confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix]): + Confusion matrix at different thresholds. + """ + + class ConfusionMatrix(proto.Message): + r"""Confusion matrix for multi-class classification models. + Attributes: + confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): + Confidence threshold used when computing the + entries of the confusion matrix. + rows (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): + One row per actual label. + """ + + class Entry(proto.Message): + r"""A single entry in the confusion matrix. + Attributes: + predicted_label (str): + The predicted label. For confidence_threshold > 0, we will + also add an entry indicating the number of items under the + confidence threshold. + item_count (google.protobuf.wrappers_pb2.Int64Value): + Number of items being predicted as this + label. + """ + + predicted_label = proto.Field(proto.STRING, number=1,) + item_count = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + ) + + class Row(proto.Message): + r"""A single row in the confusion matrix. + Attributes: + actual_label (str): + The original label of this row. + entries (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): + Info describing predicted label distribution. + """ + + actual_label = proto.Field(proto.STRING, number=1,) + entries = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", + ) + + confidence_threshold = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + rows = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", + ) + + aggregate_classification_metrics = proto.Field( + proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + ) + confusion_matrix_list = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix", + ) + + class ClusteringMetrics(proto.Message): + r"""Evaluation metrics for clustering models. + Attributes: + davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): + Davies-Bouldin index. + mean_squared_distance (google.protobuf.wrappers_pb2.DoubleValue): + Mean of squared distances between each sample + to its cluster centroid. + clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): + Information for all clusters. + """ + + class Cluster(proto.Message): + r"""Message containing the information about one cluster. + Attributes: + centroid_id (int): + Centroid id. + feature_values (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue]): + Values of highly variant features for this + cluster. + count (google.protobuf.wrappers_pb2.Int64Value): + Count of training data rows that were + assigned to this cluster. + """ + + class FeatureValue(proto.Message): + r"""Representative value of a single feature within the cluster. + Attributes: + feature_column (str): + The feature column name. + numerical_value (google.protobuf.wrappers_pb2.DoubleValue): + The numerical feature value. This is the + centroid value for this feature. + categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): + The categorical feature value. + """ + + class CategoricalValue(proto.Message): + r"""Representative value of a categorical feature. + Attributes: + category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): + Counts of all categories for the categorical feature. If + there are more than ten categories, we return top ten (by + count) and return one more CategoryCount with category + "*OTHER*" and count as aggregate counts of remaining + categories. + """ + + class CategoryCount(proto.Message): + r"""Represents the count of a single category within the cluster. + Attributes: + category (str): + The name of category. + count (google.protobuf.wrappers_pb2.Int64Value): + The count of training samples matching the + category within the cluster. + """ + + category = proto.Field(proto.STRING, number=1,) + count = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + ) + + category_counts = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", + ) + + feature_column = proto.Field(proto.STRING, number=1,) + numerical_value = proto.Field( + proto.MESSAGE, + number=2, + oneof="value", + message=wrappers_pb2.DoubleValue, + ) + categorical_value = proto.Field( + proto.MESSAGE, + number=3, + oneof="value", + message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", + ) + + centroid_id = proto.Field(proto.INT64, number=1,) + feature_values = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.ClusteringMetrics.Cluster.FeatureValue", + ) + count = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + ) + + davies_bouldin_index = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + mean_squared_distance = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + ) + clusters = proto.RepeatedField( + proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", + ) + + class RankingMetrics(proto.Message): + r"""Evaluation metrics used by weighted-ALS models specified by + feedback_type=implicit. + + Attributes: + mean_average_precision (google.protobuf.wrappers_pb2.DoubleValue): + Calculates a precision per user for all the + items by ranking them and then averages all the + precisions across all the users. + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): + Similar to the mean squared error computed in + regression and explicit recommendation models + except instead of computing the rating directly, + the output from evaluate is computed against a + preference which is 1 or 0 depending on if the + rating exists or not. + normalized_discounted_cumulative_gain (google.protobuf.wrappers_pb2.DoubleValue): + A metric to determine the goodness of a + ranking calculated from the predicted confidence + by comparing it to an ideal rank measured by the + original ratings. + average_rank (google.protobuf.wrappers_pb2.DoubleValue): + Determines the goodness of a ranking by + computing the percentile rank from the predicted + confidence and dividing it by the original rank. + """ + + mean_average_precision = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + mean_squared_error = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + ) + normalized_discounted_cumulative_gain = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + ) + average_rank = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + + class ArimaForecastingMetrics(proto.Message): + r"""Model evaluation metrics for ARIMA forecasting models. + Attributes: + non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): + Non-seasonal order. + arima_fitting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics]): + Arima model fitting metrics. + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + has_drift (Sequence[bool]): + Whether Arima model fitted with drift or not. + It is always false when d is not 1. + time_series_id (Sequence[str]): + Id to differentiate different time series for + the large-scale case. + arima_single_model_forecasting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): + Repeated as there can be many metric sets + (one for each model) in auto-arima and the + large-scale case. + """ + + class ArimaSingleModelForecastingMetrics(proto.Message): + r"""Model evaluation metrics for a single ARIMA forecasting + model. + + Attributes: + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): + Non-seasonal order. + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): + Arima fitting metrics. + has_drift (bool): + Is arima model fitted with drift or not. It + is always false when d is not 1. + time_series_id (str): + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. + """ + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + arima_fitting_metrics = proto.Field( + proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + ) + has_drift = proto.Field(proto.BOOL, number=3,) + time_series_id = proto.Field(proto.STRING, number=4,) + time_series_ids = proto.RepeatedField(proto.STRING, number=9,) + seasonal_periods = proto.RepeatedField( + proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + + non_seasonal_order = proto.RepeatedField( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + arima_fitting_metrics = proto.RepeatedField( + proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + ) + seasonal_periods = proto.RepeatedField( + proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + has_drift = proto.RepeatedField(proto.BOOL, number=4,) + time_series_id = proto.RepeatedField(proto.STRING, number=5,) + arima_single_model_forecasting_metrics = proto.RepeatedField( + proto.MESSAGE, + number=6, + message="Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics", + ) + + class EvaluationMetrics(proto.Message): + r"""Evaluation metrics of a model. These are either computed on + all training data or just the eval data based on whether eval + data was used during training. These are not present for + imported models. + + Attributes: + regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics): + Populated for regression models and explicit + feedback type matrix factorization models. + binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics): + Populated for binary + classification/classifier models. + multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics): + Populated for multi-class + classification/classifier models. + clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics): + Populated for clustering models. + ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics): + Populated for implicit feedback type matrix + factorization models. + arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics): + Populated for ARIMA models. + """ + + regression_metrics = proto.Field( + proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", + ) + binary_classification_metrics = proto.Field( + proto.MESSAGE, + number=2, + oneof="metrics", + message="Model.BinaryClassificationMetrics", + ) + multi_class_classification_metrics = proto.Field( + proto.MESSAGE, + number=3, + oneof="metrics", + message="Model.MultiClassClassificationMetrics", + ) + clustering_metrics = proto.Field( + proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", + ) + ranking_metrics = proto.Field( + proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", + ) + arima_forecasting_metrics = proto.Field( + proto.MESSAGE, + number=6, + oneof="metrics", + message="Model.ArimaForecastingMetrics", + ) + + class DataSplitResult(proto.Message): + r"""Data split result. This contains references to the training + and evaluation data tables that were used to train the model. + + Attributes: + training_table (google.cloud.bigquery_v2.types.TableReference): + Table reference of the training data after + split. + evaluation_table (google.cloud.bigquery_v2.types.TableReference): + Table reference of the evaluation data after + split. + """ + + training_table = proto.Field( + proto.MESSAGE, number=1, message=table_reference.TableReference, + ) + evaluation_table = proto.Field( + proto.MESSAGE, number=2, message=table_reference.TableReference, + ) + + class ArimaOrder(proto.Message): + r"""Arima order, can be used for both non-seasonal and seasonal + parts. + + Attributes: + p (int): + Order of the autoregressive part. + d (int): + Order of the differencing part. + q (int): + Order of the moving-average part. + """ + + p = proto.Field(proto.INT64, number=1,) + d = proto.Field(proto.INT64, number=2,) + q = proto.Field(proto.INT64, number=3,) + + class ArimaFittingMetrics(proto.Message): + r"""ARIMA model fitting metrics. + Attributes: + log_likelihood (float): + Log-likelihood. + aic (float): + AIC. + variance (float): + Variance. + """ + + log_likelihood = proto.Field(proto.DOUBLE, number=1,) + aic = proto.Field(proto.DOUBLE, number=2,) + variance = proto.Field(proto.DOUBLE, number=3,) + + class GlobalExplanation(proto.Message): + r"""Global explanations containing the top most important + features after training. + + Attributes: + explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation.Explanation]): + A list of the top global explanations. Sorted + by absolute value of attribution in descending + order. + class_label (str): + Class label for this set of global + explanations. Will be empty/null for binary + logistic and linear regression models. Sorted + alphabetically in descending order. + """ + + class Explanation(proto.Message): + r"""Explanation for a single feature. + Attributes: + feature_name (str): + Full name of the feature. For non-numerical features, will + be formatted like .. + Overall size of feature name will always be truncated to + first 120 characters. + attribution (google.protobuf.wrappers_pb2.DoubleValue): + Attribution of feature. + """ + + feature_name = proto.Field(proto.STRING, number=1,) + attribution = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + ) + + explanations = proto.RepeatedField( + proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", + ) + class_label = proto.Field(proto.STRING, number=2,) + + class TrainingRun(proto.Message): + r"""Information about a single training query run for the model. + Attributes: + training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): + Options that were used for this training run, + includes user specified and default options that + were used. + start_time (google.protobuf.timestamp_pb2.Timestamp): + The start time of this training run. + results (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult]): + Output of each iteration run, results.size() <= + max_iterations. + evaluation_metrics (google.cloud.bigquery_v2.types.Model.EvaluationMetrics): + The evaluation metrics over training/eval + data that were computed at the end of training. + data_split_result (google.cloud.bigquery_v2.types.Model.DataSplitResult): + Data split result of the training run. Only + set when the input data is actually split. + global_explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation]): + Global explanations for important features of + the model. For multi-class models, there is one + entry for each label class. For other models, + there is only one entry in the list. + """ + + class TrainingOptions(proto.Message): + r"""Options used in model training. + Attributes: + max_iterations (int): + The maximum number of iterations in training. + Used only for iterative training algorithms. + loss_type (google.cloud.bigquery_v2.types.Model.LossType): + Type of loss function used during training + run. + learn_rate (float): + Learning rate in training. Used only for + iterative training algorithms. + l1_regularization (google.protobuf.wrappers_pb2.DoubleValue): + L1 regularization coefficient. + l2_regularization (google.protobuf.wrappers_pb2.DoubleValue): + L2 regularization coefficient. + min_relative_progress (google.protobuf.wrappers_pb2.DoubleValue): + When early_stop is true, stops training when accuracy + improvement is less than 'min_relative_progress'. Used only + for iterative training algorithms. + warm_start (google.protobuf.wrappers_pb2.BoolValue): + Whether to train a model from the last + checkpoint. + early_stop (google.protobuf.wrappers_pb2.BoolValue): + Whether to stop early when the loss doesn't improve + significantly any more (compared to min_relative_progress). + Used only for iterative training algorithms. + input_label_columns (Sequence[str]): + Name of input label columns in training data. + data_split_method (google.cloud.bigquery_v2.types.Model.DataSplitMethod): + The data split type for training and + evaluation, e.g. RANDOM. + data_split_eval_fraction (float): + The fraction of evaluation data over the + whole input data. The rest of data will be used + as training data. The format should be double. + Accurate to two decimal places. + Default value is 0.2. + data_split_column (str): + The column to split data with. This column won't be used as + a feature. + + 1. When data_split_method is CUSTOM, the corresponding + column should be boolean. The rows with true value tag + are eval data, and the false are training data. + 2. When data_split_method is SEQ, the first + DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) + in the corresponding column are used as training data, + and the rest are eval data. It respects the order in + Orderable data types: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties + learn_rate_strategy (google.cloud.bigquery_v2.types.Model.LearnRateStrategy): + The strategy to determine learn rate for the + current iteration. + initial_learn_rate (float): + Specifies the initial learning rate for the + line search learn rate strategy. + label_class_weights (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): + Weights associated with each label class, for + rebalancing the training data. Only applicable + for classification models. + user_column (str): + User column specified for matrix + factorization models. + item_column (str): + Item column specified for matrix + factorization models. + distance_type (google.cloud.bigquery_v2.types.Model.DistanceType): + Distance type for clustering models. + num_clusters (int): + Number of clusters for clustering models. + model_uri (str): + Google Cloud Storage URI from which the model + was imported. Only applicable for imported + models. + optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): + Optimization strategy for training linear + regression models. + hidden_units (Sequence[int]): + Hidden units for dnn models. + batch_size (int): + Batch size for dnn models. + dropout (google.protobuf.wrappers_pb2.DoubleValue): + Dropout probability for dnn models. + max_tree_depth (int): + Maximum depth of a tree for boosted tree + models. + subsample (float): + Subsample fraction of the training data to + grow tree to prevent overfitting for boosted + tree models. + min_split_loss (google.protobuf.wrappers_pb2.DoubleValue): + Minimum split loss for boosted tree models. + num_factors (int): + Num factors specified for matrix + factorization models. + feedback_type (google.cloud.bigquery_v2.types.Model.FeedbackType): + Feedback type that specifies which algorithm + to run for matrix factorization. + wals_alpha (google.protobuf.wrappers_pb2.DoubleValue): + Hyperparameter for matrix factoration when + implicit feedback type is specified. + kmeans_initialization_method (google.cloud.bigquery_v2.types.Model.KmeansEnums.KmeansInitializationMethod): + The method used to initialize the centroids + for kmeans algorithm. + kmeans_initialization_column (str): + The column used to provide the initial centroids for kmeans + algorithm when kmeans_initialization_method is CUSTOM. + time_series_timestamp_column (str): + Column to be designated as time series + timestamp for ARIMA model. + time_series_data_column (str): + Column to be designated as time series data + for ARIMA model. + auto_arima (bool): + Whether to enable auto ARIMA or not. + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): + A specification of the non-seasonal part of + the ARIMA model: the three components (p, d, q) + are the AR order, the degree of differencing, + and the MA order. + data_frequency (google.cloud.bigquery_v2.types.Model.DataFrequency): + The data frequency of a time series. + include_drift (bool): + Include drift when fitting an ARIMA model. + holiday_region (google.cloud.bigquery_v2.types.Model.HolidayRegion): + The geographical region based on which the + holidays are considered in time series modeling. + If a valid value is specified, then holiday + effects modeling is enabled. + time_series_id_column (str): + The time series id column that was used + during ARIMA model training. + time_series_id_columns (Sequence[str]): + The time series id columns that were used + during ARIMA model training. + horizon (int): + The number of periods ahead that need to be + forecasted. + preserve_input_structs (bool): + Whether to preserve the input structs in output feature + names. Suppose there is a struct A with field b. When false + (default), the output feature name is A_b. When true, the + output feature name is A.b. + auto_arima_max_order (int): + The max value of non-seasonal p and q. + decompose_time_series (google.protobuf.wrappers_pb2.BoolValue): + If true, perform decompose time series and + save the results. + clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, clean spikes and dips in the input + time series. + adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, detect step changes and make data + adjustment in the input time series. + """ + + max_iterations = proto.Field(proto.INT64, number=1,) + loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) + learn_rate = proto.Field(proto.DOUBLE, number=3,) + l1_regularization = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + l2_regularization = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + ) + min_relative_progress = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + warm_start = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + early_stop = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + input_label_columns = proto.RepeatedField(proto.STRING, number=9,) + data_split_method = proto.Field( + proto.ENUM, number=10, enum="Model.DataSplitMethod", + ) + data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,) + data_split_column = proto.Field(proto.STRING, number=12,) + learn_rate_strategy = proto.Field( + proto.ENUM, number=13, enum="Model.LearnRateStrategy", + ) + initial_learn_rate = proto.Field(proto.DOUBLE, number=16,) + label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,) + user_column = proto.Field(proto.STRING, number=18,) + item_column = proto.Field(proto.STRING, number=19,) + distance_type = proto.Field( + proto.ENUM, number=20, enum="Model.DistanceType", + ) + num_clusters = proto.Field(proto.INT64, number=21,) + model_uri = proto.Field(proto.STRING, number=22,) + optimization_strategy = proto.Field( + proto.ENUM, number=23, enum="Model.OptimizationStrategy", + ) + hidden_units = proto.RepeatedField(proto.INT64, number=24,) + batch_size = proto.Field(proto.INT64, number=25,) + dropout = proto.Field( + proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue, + ) + max_tree_depth = proto.Field(proto.INT64, number=27,) + subsample = proto.Field(proto.DOUBLE, number=28,) + min_split_loss = proto.Field( + proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue, + ) + num_factors = proto.Field(proto.INT64, number=30,) + feedback_type = proto.Field( + proto.ENUM, number=31, enum="Model.FeedbackType", + ) + wals_alpha = proto.Field( + proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue, + ) + kmeans_initialization_method = proto.Field( + proto.ENUM, + number=33, + enum="Model.KmeansEnums.KmeansInitializationMethod", + ) + kmeans_initialization_column = proto.Field(proto.STRING, number=34,) + time_series_timestamp_column = proto.Field(proto.STRING, number=35,) + time_series_data_column = proto.Field(proto.STRING, number=36,) + auto_arima = proto.Field(proto.BOOL, number=37,) + non_seasonal_order = proto.Field( + proto.MESSAGE, number=38, message="Model.ArimaOrder", + ) + data_frequency = proto.Field( + proto.ENUM, number=39, enum="Model.DataFrequency", + ) + include_drift = proto.Field(proto.BOOL, number=41,) + holiday_region = proto.Field( + proto.ENUM, number=42, enum="Model.HolidayRegion", + ) + time_series_id_column = proto.Field(proto.STRING, number=43,) + time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) + horizon = proto.Field(proto.INT64, number=44,) + preserve_input_structs = proto.Field(proto.BOOL, number=45,) + auto_arima_max_order = proto.Field(proto.INT64, number=46,) + decompose_time_series = proto.Field( + proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + ) + clean_spikes_and_dips = proto.Field( + proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + ) + adjust_step_changes = proto.Field( + proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + ) + + class IterationResult(proto.Message): + r"""Information about a single iteration of the training run. + Attributes: + index (google.protobuf.wrappers_pb2.Int32Value): + Index of the iteration, 0 based. + duration_ms (google.protobuf.wrappers_pb2.Int64Value): + Time taken to run the iteration in + milliseconds. + training_loss (google.protobuf.wrappers_pb2.DoubleValue): + Loss computed on the training data at the end + of iteration. + eval_loss (google.protobuf.wrappers_pb2.DoubleValue): + Loss computed on the eval data at the end of + iteration. + learn_rate (float): + Learn rate used for this iteration. + cluster_infos (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ClusterInfo]): + Information about top clusters for clustering + models. + arima_result (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult): + + """ + + class ClusterInfo(proto.Message): + r"""Information about a single cluster for clustering model. + Attributes: + centroid_id (int): + Centroid id. + cluster_radius (google.protobuf.wrappers_pb2.DoubleValue): + Cluster radius, the average distance from + centroid to each point assigned to the cluster. + cluster_size (google.protobuf.wrappers_pb2.Int64Value): + Cluster size, the total number of points + assigned to the cluster. + """ + + centroid_id = proto.Field(proto.INT64, number=1,) + cluster_radius = proto.Field( + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + ) + cluster_size = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + ) + + class ArimaResult(proto.Message): + r"""(Auto-)arima fitting result. Wrap everything in ArimaResult + for easier refactoring if we want to use model-specific + iteration results. + + Attributes: + arima_model_info (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): + This message is repeated because there are + multiple arima models fitted in auto-arima. For + non-auto-arima model, its size is one. + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + """ + + class ArimaCoefficients(proto.Message): + r"""Arima coefficients. + Attributes: + auto_regressive_coefficients (Sequence[float]): + Auto-regressive coefficients, an array of + double. + moving_average_coefficients (Sequence[float]): + Moving-average coefficients, an array of + double. + intercept_coefficient (float): + Intercept coefficient, just a double not an + array. + """ + + auto_regressive_coefficients = proto.RepeatedField( + proto.DOUBLE, number=1, + ) + moving_average_coefficients = proto.RepeatedField( + proto.DOUBLE, number=2, + ) + intercept_coefficient = proto.Field(proto.DOUBLE, number=3,) + + class ArimaModelInfo(proto.Message): + r"""Arima model information. + Attributes: + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): + Non-seasonal order. + arima_coefficients (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): + Arima coefficients. + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): + Arima fitting metrics. + has_drift (bool): + Whether Arima model fitted with drift or not. + It is always false when d is not 1. + time_series_id (str): + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. + """ + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + arima_coefficients = proto.Field( + proto.MESSAGE, + number=2, + message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", + ) + arima_fitting_metrics = proto.Field( + proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", + ) + has_drift = proto.Field(proto.BOOL, number=4,) + time_series_id = proto.Field(proto.STRING, number=5,) + time_series_ids = proto.RepeatedField(proto.STRING, number=10,) + seasonal_periods = proto.RepeatedField( + proto.ENUM, + number=6, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + ) + + arima_model_info = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo", + ) + seasonal_periods = proto.RepeatedField( + proto.ENUM, + number=2, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + + index = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value, + ) + duration_ms = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, + ) + training_loss = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + ) + eval_loss = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + learn_rate = proto.Field(proto.DOUBLE, number=7,) + cluster_infos = proto.RepeatedField( + proto.MESSAGE, + number=8, + message="Model.TrainingRun.IterationResult.ClusterInfo", + ) + arima_result = proto.Field( + proto.MESSAGE, + number=9, + message="Model.TrainingRun.IterationResult.ArimaResult", + ) + + training_options = proto.Field( + proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", + ) + start_time = proto.Field( + proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp, + ) + results = proto.RepeatedField( + proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", + ) + evaluation_metrics = proto.Field( + proto.MESSAGE, number=7, message="Model.EvaluationMetrics", + ) + data_split_result = proto.Field( + proto.MESSAGE, number=9, message="Model.DataSplitResult", + ) + global_explanations = proto.RepeatedField( + proto.MESSAGE, number=10, message="Model.GlobalExplanation", + ) + + etag = proto.Field(proto.STRING, number=1,) + model_reference = proto.Field( + proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, + ) + creation_time = proto.Field(proto.INT64, number=5,) + last_modified_time = proto.Field(proto.INT64, number=6,) + description = proto.Field(proto.STRING, number=12,) + friendly_name = proto.Field(proto.STRING, number=14,) + labels = proto.MapField(proto.STRING, proto.STRING, number=15,) + expiration_time = proto.Field(proto.INT64, number=16,) + location = proto.Field(proto.STRING, number=13,) + encryption_configuration = proto.Field( + proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, + ) + model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) + training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) + feature_columns = proto.RepeatedField( + proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, + ) + label_columns = proto.RepeatedField( + proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, + ) + best_trial_id = proto.Field(proto.INT64, number=19,) + + +class GetModelRequest(proto.Message): + r""" + Attributes: + project_id (str): + Required. Project ID of the requested model. + dataset_id (str): + Required. Dataset ID of the requested model. + model_id (str): + Required. Model ID of the requested model. + """ + + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) + + +class PatchModelRequest(proto.Message): + r""" + Attributes: + project_id (str): + Required. Project ID of the model to patch. + dataset_id (str): + Required. Dataset ID of the model to patch. + model_id (str): + Required. Model ID of the model to patch. + model (google.cloud.bigquery_v2.types.Model): + Required. Patched model. + Follows RFC5789 patch semantics. Missing fields + are not updated. To clear a field, explicitly + set to default value. + """ + + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) + model = proto.Field(proto.MESSAGE, number=4, message="Model",) + + +class DeleteModelRequest(proto.Message): + r""" + Attributes: + project_id (str): + Required. Project ID of the model to delete. + dataset_id (str): + Required. Dataset ID of the model to delete. + model_id (str): + Required. Model ID of the model to delete. + """ + + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) + + +class ListModelsRequest(proto.Message): + r""" + Attributes: + project_id (str): + Required. Project ID of the models to list. + dataset_id (str): + Required. Dataset ID of the models to list. + max_results (google.protobuf.wrappers_pb2.UInt32Value): + The maximum number of results to return in a + single response page. Leverage the page tokens + to iterate through the entire collection. + page_token (str): + Page token, returned by a previous call to + request the next page of results + """ + + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + max_results = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value, + ) + page_token = proto.Field(proto.STRING, number=4,) + + +class ListModelsResponse(proto.Message): + r""" + Attributes: + models (Sequence[google.cloud.bigquery_v2.types.Model]): + Models in the requested dataset. Only the following fields + are populated: model_reference, model_type, creation_time, + last_modified_time and labels. + next_page_token (str): + A token to request the next page of results. + """ + + @property + def raw_page(self): + return self + + models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) + next_page_token = proto.Field(proto.STRING, number=2,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py new file mode 100644 index 000000000..a9ebad613 --- /dev/null +++ b/google/cloud/bigquery_v2/types/model_reference.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"ModelReference",}, +) + + +class ModelReference(proto.Message): + r"""Id path of a model. + Attributes: + project_id (str): + Required. The ID of the project containing + this model. + dataset_id (str): + Required. The ID of the dataset containing + this model. + model_id (str): + Required. The ID of the model. The ID must contain only + letters (a-z, A-Z), numbers (0-9), or underscores (_). The + maximum length is 1,024 characters. + """ + + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py new file mode 100644 index 000000000..7a845fc48 --- /dev/null +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", + manifest={ + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", + }, +) + + +class StandardSqlDataType(proto.Message): + r"""The type of a variable, e.g., a function argument. Examples: INT64: + {type_kind="INT64"} ARRAY: {type_kind="ARRAY", + array_element_type="STRING"} STRUCT: + {type_kind="STRUCT", struct_type={fields=[ {name="x", + type={type_kind="STRING"}}, {name="y", type={type_kind="ARRAY", + array_element_type="DATE"}} ]}} + + Attributes: + type_kind (google.cloud.bigquery_v2.types.StandardSqlDataType.TypeKind): + Required. The top level type of this field. + Can be any standard SQL data type (e.g., + "INT64", "DATE", "ARRAY"). + array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType): + The type of the array's elements, if type_kind = "ARRAY". + struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType): + The fields of this struct, in order, if type_kind = + "STRUCT". + """ + + class TypeKind(proto.Enum): + r"""""" + TYPE_KIND_UNSPECIFIED = 0 + INT64 = 2 + BOOL = 5 + FLOAT64 = 7 + STRING = 8 + BYTES = 9 + TIMESTAMP = 19 + DATE = 10 + TIME = 20 + DATETIME = 21 + INTERVAL = 26 + GEOGRAPHY = 22 + NUMERIC = 23 + BIGNUMERIC = 24 + JSON = 25 + ARRAY = 16 + STRUCT = 17 + + type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) + array_element_type = proto.Field( + proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", + ) + struct_type = proto.Field( + proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", + ) + + +class StandardSqlField(proto.Message): + r"""A field or a column. + Attributes: + name (str): + Optional. The name of this field. Can be + absent for struct fields. + type (google.cloud.bigquery_v2.types.StandardSqlDataType): + Optional. The type of this parameter. Absent + if not explicitly specified (e.g., CREATE + FUNCTION statement can omit the return type; in + this case the output parameter does not have + this "type" field). + """ + + name = proto.Field(proto.STRING, number=1,) + type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) + + +class StandardSqlStructType(proto.Message): + r""" + Attributes: + fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + + """ + + fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + + +class StandardSqlTableType(proto.Message): + r"""A table type + Attributes: + columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + The columns in this table type + """ + + columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py new file mode 100644 index 000000000..d56e5b09f --- /dev/null +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"TableReference",}, +) + + +class TableReference(proto.Message): + r""" + Attributes: + project_id (str): + Required. The ID of the project containing + this table. + dataset_id (str): + Required. The ID of the dataset containing + this table. + table_id (str): + Required. The ID of the table. The ID must contain only + letters (a-z, A-Z), numbers (0-9), or underscores (_). The + maximum length is 1,024 characters. Certain operations allow + suffixing of the table ID with a partition decorator, such + as ``sample_table$20190123``. + project_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + dataset_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + table_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + """ + + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + table_id = proto.Field(proto.STRING, number=3,) + project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) + dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) + table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/owlbot.py b/owlbot.py index dd9255d2f..86374858e 100644 --- a/owlbot.py +++ b/owlbot.py @@ -32,6 +32,8 @@ intersphinx_dependencies={ "pandas": "http://pandas.pydata.org/pandas-docs/stable/", "geopandas": "https://geopandas.org/", + "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), + "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), }, ) @@ -68,6 +70,11 @@ r'\{"members": True\}', '{"members": True, "inherited-members": True}', ) +s.replace( + "docs/conf.py", + r"exclude_patterns = \[", + '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', +) # ---------------------------------------------------------------------------- # pytype-related changes @@ -89,6 +96,7 @@ google/cloud/ exclude = tests/ + google/cloud/bigquery_v2/ # Legacy proto-based types. output = .pytype/ disable = # There's some issue with finding some pyi files, thus disabling. diff --git a/setup.cfg b/setup.cfg index 28b7b0f26..25892161f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,7 @@ inputs = google/cloud/ exclude = tests/ + google/cloud/bigquery_v2/ # Legacy proto-based types. output = .pytype/ disable = # There's some issue with finding some pyi files, thus disabling. diff --git a/setup.py b/setup.py index 130d8f49c..a7f99b879 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,8 @@ "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", + "proto-plus >= 1.10.0", # For the legacy proto-based types. + "protobuf >= 3.12.0", # For the legacy proto-based types. "pyarrow >= 3.0.0, < 6.0dev", "requests >= 2.18.0, < 3.0.0dev", ] diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 620f23ca2..6e27172b2 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -15,6 +15,8 @@ opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 pandas==1.0.0 +proto-plus==1.10.0 +protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 Shapely==1.6.0 diff --git a/tests/unit/test_legacy_types.py b/tests/unit/test_legacy_types.py new file mode 100644 index 000000000..49ccb8e5a --- /dev/null +++ b/tests/unit/test_legacy_types.py @@ -0,0 +1,26 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + + +def test_imprting_legacy_types_emits_warning(): + with warnings.catch_warnings(record=True) as warned: + from google.cloud.bigquery_v2 import types # noqa: F401 + + assert len(warned) == 1 + assert warned[0].category is DeprecationWarning + warning_msg = str(warned[0]) + assert "bigquery_v2" in warning_msg + assert "not maintained" in warning_msg From beaadc83ab8a136254e21f9a5e05caa636509ccf Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 6 Oct 2021 15:37:50 +0000 Subject: [PATCH 13/35] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .kokoro/samples/lint/common.cfg | 2 +- .kokoro/samples/python3.6/common.cfg | 2 +- .kokoro/samples/python3.7/common.cfg | 2 +- .kokoro/samples/python3.8/common.cfg | 2 +- .kokoro/samples/python3.9/common.cfg | 2 +- docs/conf.py | 4 ---- 6 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.kokoro/samples/lint/common.cfg b/.kokoro/samples/lint/common.cfg index a3de3b36d..153746ccc 100644 --- a/.kokoro/samples/lint/common.cfg +++ b/.kokoro/samples/lint/common.cfg @@ -31,4 +31,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.6/common.cfg b/.kokoro/samples/python3.6/common.cfg index 747313969..20f6b9691 100644 --- a/.kokoro/samples/python3.6/common.cfg +++ b/.kokoro/samples/python3.6/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg index 8327415f6..d30dc6018 100644 --- a/.kokoro/samples/python3.7/common.cfg +++ b/.kokoro/samples/python3.7/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg index 57b901c42..46759c6d6 100644 --- a/.kokoro/samples/python3.8/common.cfg +++ b/.kokoro/samples/python3.8/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.9/common.cfg b/.kokoro/samples/python3.9/common.cfg index b7f85e2e0..58d56ce74 100644 --- a/.kokoro/samples/python3.9/common.cfg +++ b/.kokoro/samples/python3.9/common.cfg @@ -37,4 +37,4 @@ gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 295c36b81..7c9f6eef2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -366,10 +366,6 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), - "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), - "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), - "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), - "geopandas": ("https://geopandas.org/", None), } From 750c8089aced375c8b868e030d0b4651e521ed2c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 6 Oct 2021 10:39:05 -0500 Subject: [PATCH 14/35] chore: remove unnecessary replacement from owlbot --- owlbot.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/owlbot.py b/owlbot.py index 74de5f06d..5fd5c436a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -54,10 +54,6 @@ ], ) -# Remove unneeded intersphinx links, the library does not use any proto-generated code. -s.replace("docs/conf.py", r'\s+"(proto-plus|protobuf)":.*$', "") - - # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- From 6bfbb7d809a65df030a5fcea001713f6ec02dd33 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 6 Oct 2021 15:42:19 +0000 Subject: [PATCH 15/35] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- docs/conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 7c9f6eef2..3d07b6bf5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -364,6 +364,8 @@ "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), + "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), + "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), "pandas": ("http://pandas.pydata.org/pandas-docs/stable/", None), "geopandas": ("https://geopandas.org/", None), } From 72255a6b57553fac9ed3ada42c260ac5960cce9a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 6 Oct 2021 10:45:42 -0500 Subject: [PATCH 16/35] Apply suggestions from code review --- google/cloud/bigquery/client.py | 2 -- google/cloud/bigquery/table.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 754a704d3..a738dd0f3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -116,8 +116,6 @@ TIMEOUT_HEADER = "X-Server-Timeout" -TIMEOUT_HEADER = "X-Server-Timeout" - class Project(object): """Wrapper for resource describing a BigQuery project. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 410185b9d..376323801 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1688,8 +1688,6 @@ def to_arrow( """ self._maybe_warn_max_results(bqstorage_client) - self._maybe_warn_max_results(bqstorage_client) - if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None From 1661262f47e5a7ceabc48243e33011f75b68d4b0 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 7 Oct 2021 17:31:46 +0000 Subject: [PATCH 17/35] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- google/cloud/bigquery/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d6ce6085d..1dfe2cab7 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -117,7 +117,6 @@ _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 - class Project(object): """Wrapper for resource describing a BigQuery project. From 2c90edc4adf183d221ae7d5b541d513da84699f5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 7 Oct 2021 12:30:53 -0500 Subject: [PATCH 18/35] chore: remove unused _PYARROW_BAD_VERSIONS --- google/cloud/bigquery/_helpers.py | 3 --- google/cloud/bigquery/client.py | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 5fac88a83..f2a8f34f0 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -77,9 +77,6 @@ def is_read_session_optional(self) -> bool: class PyarrowVersions: """Version comparisons for pyarrow package.""" - # https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 - _PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) - def __init__(self): self._installed_version = None diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d6ce6085d..0b904d096 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -116,6 +116,7 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 +TIMEOUT_HEADER = "X-Server-Timeout" class Project(object): From 7852c5cdbdd3dd78f7a47e5094e07444141f6a2e Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 7 Oct 2021 17:37:05 +0000 Subject: [PATCH 19/35] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- google/cloud/bigquery/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 0b7fabf0d..0b904d096 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -118,6 +118,7 @@ TIMEOUT_HEADER = "X-Server-Timeout" + class Project(object): """Wrapper for resource describing a BigQuery project. From 40c92c36546eb33f1121202b93561b24a4271d7a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 1 Nov 2021 11:40:36 -0500 Subject: [PATCH 20/35] chore: cleanup intersphinx links (#1035) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: cleanup intersphinx links * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * use https for pandas docs Co-authored-by: Owl Bot --- owlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlbot.py b/owlbot.py index 5fd5c436a..93620ab98 100644 --- a/owlbot.py +++ b/owlbot.py @@ -30,7 +30,7 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": "http://pandas.pydata.org/pandas-docs/stable/", + "pandas": "https://pandas.pydata.org/pandas-docs/stable/", "geopandas": "https://geopandas.org/", }, ) From 859a65d7fc2c2bbed7e56bc22b164b504b36aaeb Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 9 Nov 2021 22:06:07 +0200 Subject: [PATCH 21/35] Fix type hints and discovered bugs --- google/cloud/bigquery/_http.py | 4 +- google/cloud/bigquery/_pandas_helpers.py | 4 +- google/cloud/bigquery/client.py | 2 +- google/cloud/bigquery/model.py | 99 +++++++++++++++--------- google/cloud/bigquery/routine/routine.py | 5 +- google/cloud/bigquery/schema.py | 16 ---- google/cloud/bigquery/standard_sql.py | 32 +++----- google/cloud/bigquery/table.py | 2 +- 8 files changed, 80 insertions(+), 84 deletions(-) diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index f7207f32e..789ef9243 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -52,8 +52,8 @@ def __init__(self, client, client_info=None, api_endpoint=None): self._client_info.gapic_version = __version__ self._client_info.client_library_version = __version__ - API_VERSION = "v2" + API_VERSION = "v2" # type: ignore """The version of the API, used in building the API call's URL.""" - API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" + API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" # type: ignore """A template for the URL of a particular API call.""" diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index a7d10ebac..07b288236 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -28,8 +28,8 @@ else: import numpy -import pyarrow -import pyarrow.parquet +import pyarrow # type: ignore +import pyarrow.parquet # type: ignore try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index a3a193be1..6d94f5bbc 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -197,7 +197,7 @@ class Client(ClientWithProject): to acquire default credentials. """ - SCOPE = ( + SCOPE = ( # type: ignore "https://www.googleapis.com/auth/bigquery", "https://www.googleapis.com/auth/cloud-platform", ) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 20b15cca3..52fe6276e 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -18,11 +18,11 @@ import copy import datetime +import typing from typing import Any, Dict, Optional, Sequence, Union import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers -from google.cloud.bigquery import standard_sql from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -70,46 +70,52 @@ def reference(self) -> Optional["ModelReference"]: Read-only. """ resource = self._properties.get("modelReference") - if resource is not None: + if resource is None: + return None + else: return ModelReference.from_api_repr(resource) @property - def project(self) -> str: + def project(self) -> Optional[str]: """Project bound to the model.""" - return self.reference.project + ref = self.reference + return ref.project if ref is not None else None @property - def dataset_id(self) -> str: + def dataset_id(self) -> Optional[str]: """ID of dataset containing the model.""" - return self.reference.dataset_id + ref = self.reference + return ref.dataset_id if ref is not None else None @property - def model_id(self) -> str: + def model_id(self) -> Optional[str]: """The model ID.""" - return self.reference.model_id + ref = self.reference + return ref.model_id if ref is not None else None @property - def path(self) -> str: + def path(self) -> Optional[str]: """URL path for the model's APIs.""" - return self.reference.path + ref = self.reference + return ref.path if ref is not None else None @property - def location(self) -> str: + def location(self) -> Optional[str]: """The geographic location where the model resides. This value is inherited from the dataset. Read-only. """ - return self._properties.get("location") + return typing.cast(Optional[str], self._properties.get("location")) @property - def etag(self) -> str: + def etag(self) -> Optional[str]: """ETag for the model resource (:data:`None` until set from the server). Read-only. """ - return self._properties.get("etag") + return typing.cast(Optional[str], self._properties.get("etag")) @property def created(self) -> Optional[datetime.datetime]: @@ -117,8 +123,10 @@ def created(self) -> Optional[datetime.datetime]: Read-only. """ - value = self._properties.get("creationTime") - if value is not None: + value = typing.cast(Optional[float], self._properties.get("creationTime")) + if value is None: + return None + else: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) @@ -130,8 +138,10 @@ def modified(self) -> Optional[datetime.datetime]: Read-only. """ - value = value = self._properties.get("lastModifiedTime") - if value is not None: + value = typing.cast(Optional[float], self._properties.get("lastModifiedTime")) + if value is None: + return None + else: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) @@ -143,7 +153,9 @@ def model_type(self) -> str: Read-only. """ - return self._properties.get("modelType", "MODEL_TYPE_UNSPECIFIED") + return typing.cast( + str, self._properties.get("modelType", "MODEL_TYPE_UNSPECIFIED") + ) @property def training_runs(self) -> Sequence[Dict[str, Any]]: @@ -154,25 +166,31 @@ def training_runs(self) -> Sequence[Dict[str, Any]]: Read-only. """ - return self._properties.get("trainingRuns", []) + return typing.cast( + Sequence[Dict[str, Any]], self._properties.get("trainingRuns", []) + ) @property - def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]: + def feature_columns(self) -> Sequence[Dict[str, Any]]: """Input feature columns that were used to train this model. Read-only. """ - return self._properties.get("featureColumns", []) + return typing.cast( + Sequence[Dict[str, Any]], self._properties.get("featureColumns", []) + ) @property - def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: + def label_columns(self) -> Sequence[Dict[str, Any]]: """Label columns that were used to train this model. The output of the model will have a ``predicted_`` prefix to these columns. Read-only. """ - return self._properties.get("labelColumns", []) + return typing.cast( + Sequence[Dict[str, Any]], self._properties.get("labelColumns", []) + ) @property def best_trial_id(self) -> Optional[int]: @@ -183,7 +201,7 @@ def best_trial_id(self) -> Optional[int]: Read-only. """ - value = self._properties.get("bestTrialId") + value = typing.cast(Optional[int], self._properties.get("bestTrialId")) if value is not None: value = int(value) return value @@ -195,8 +213,10 @@ def expires(self) -> Optional[datetime.datetime]: If not present, the model will persist indefinitely. Expired models will be deleted and their storage reclaimed. """ - value = self._properties.get("expirationTime") - if value is not None: + value = typing.cast(Optional[float], self._properties.get("expirationTime")) + if value is None: + return None + else: # value will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( 1000.0 * float(value) @@ -204,27 +224,32 @@ def expires(self) -> Optional[datetime.datetime]: @expires.setter def expires(self, value: Optional[datetime.datetime]): - if value is not None: - value = str(google.cloud._helpers._millis_from_datetime(value)) - self._properties["expirationTime"] = value + if value is None: + value_to_store: Optional[str] = None + else: + value_to_store = str(google.cloud._helpers._millis_from_datetime(value)) + # TODO: Consider using typing.TypedDict when only Python 3.8+ is supported. + self._properties["expirationTime"] = value_to_store # type: ignore @property def description(self) -> Optional[str]: """Description of the model (defaults to :data:`None`).""" - return self._properties.get("description") + return typing.cast(Optional[str], self._properties.get("description")) @description.setter def description(self, value: Optional[str]): - self._properties["description"] = value + # TODO: Consider using typing.TypedDict when only Python 3.8+ is supported. + self._properties["description"] = value # type: ignore @property def friendly_name(self) -> Optional[str]: """Title of the table (defaults to :data:`None`).""" - return self._properties.get("friendlyName") + return typing.cast(Optional[str], self._properties.get("friendlyName")) @friendly_name.setter def friendly_name(self, value: Optional[str]): - self._properties["friendlyName"] = value + # TODO: Consider using typing.TypedDict when only Python 3.8+ is supported. + self._properties["friendlyName"] = value # type: ignore @property def labels(self) -> Dict[str, str]: @@ -256,13 +281,11 @@ def encryption_configuration(self) -> Optional[EncryptionConfiguration]: prop = self._properties.get("encryptionConfiguration") if prop: prop = EncryptionConfiguration.from_api_repr(prop) - return prop + return typing.cast(Optional[EncryptionConfiguration], prop) @encryption_configuration.setter def encryption_configuration(self, value: Optional[EncryptionConfiguration]): - api_repr = value - if value: - api_repr = value.to_api_repr() + api_repr = value.to_api_repr() if value else value self._properties["encryptionConfiguration"] = api_repr @classmethod diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 91fbbb065..18a38c3cc 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -210,10 +210,7 @@ def return_type(self): @return_type.setter def return_type(self, value: StandardSqlDataType): - if value: - resource = value.to_api_repr() - else: - resource = None + resource = None if not value else value.to_api_repr() self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @property diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 31188c807..88a7d8f3c 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -66,22 +66,6 @@ class _DefaultSentinel(enum.Enum): _DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE -class _DefaultSentinel(enum.Enum): - """Object used as 'sentinel' indicating default value should be used. - - Uses enum so that pytype/mypy knows that this is the only possible value. - https://stackoverflow.com/a/60605919/101923 - - Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8. - https://docs.python.org/3/library/typing.html#typing.Literal - """ - - DEFAULT_VALUE = object() - - -_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE - - class SchemaField(object): """Describe a single field within a table schema. diff --git a/google/cloud/bigquery/standard_sql.py b/google/cloud/bigquery/standard_sql.py index 479929c74..e0f22b2de 100644 --- a/google/cloud/bigquery/standard_sql.py +++ b/google/cloud/bigquery/standard_sql.py @@ -13,6 +13,7 @@ # limitations under the License. import copy +import typing from typing import Any, Dict, Iterable, List, Optional from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -61,14 +62,14 @@ def __init__( array_element_type: Optional["StandardSqlDataType"] = None, struct_type: Optional["StandardSqlStructType"] = None, ): - self._properties = {} + self._properties: Dict[str, Any] = {} self.type_kind = type_kind self.array_element_type = array_element_type self.struct_type = struct_type @property - def type_kind(self) -> StandardSqlTypeNames: + def type_kind(self) -> Optional[StandardSqlTypeNames]: """The top level type of this field. Can be any standard SQL data type, e.g. INT64, DATE, ARRAY. @@ -139,7 +140,7 @@ def from_api_repr(cls, resource: Dict[str, Any]): else: # Convert string to an enum member. type_kind = StandardSqlTypeNames[ # pytype: disable=missing-parameter - type_kind + typing.cast(str, type_kind) ] array_element_type = None @@ -166,8 +167,6 @@ def __eq__(self, other): and self.struct_type == other.struct_type ) - __hash__ = None - def __str__(self): result = f"{self.__class__.__name__}(type_kind={self.type_kind!r}, ...)" return result @@ -192,15 +191,13 @@ class StandardSqlField: def __init__( self, name: Optional[str] = None, type: Optional[StandardSqlDataType] = None ): - if type is not None: - type = type.to_api_repr() - - self._properties = {"name": name, "type": type} + type_repr = None if type is None else type.to_api_repr() + self._properties = {"name": name, "type": type_repr} @property def name(self) -> Optional[str]: """The name of this field. Can be absent for struct fields.""" - return self._properties["name"] + return typing.cast(Optional[str], self._properties["name"]) @name.setter def name(self, value: Optional[str]): @@ -219,14 +216,15 @@ def type(self) -> Optional[StandardSqlDataType]: return None result = StandardSqlDataType() - result._properties = type_info # We do not use a copy on purpose. + # We do not use a properties copy on purpose. + result._properties = typing.cast(Dict[str, Any], type_info) + return result @type.setter def type(self, value: Optional[StandardSqlDataType]): - if value is not None: - value = value.to_api_repr() - self._properties["type"] = value + value_repr = None if value is None else value.to_api_repr() + self._properties["type"] = value_repr def to_api_repr(self) -> Dict[str, Any]: """Construct the API resource representation of this SQL field.""" @@ -247,8 +245,6 @@ def __eq__(self, other): else: return self.name == other.name and self.type == other.type - __hash__ = None - class StandardSqlStructType: """Type of a struct field. @@ -300,8 +296,6 @@ def __eq__(self, other): else: return self.fields == other.fields - __hash__ = None - class StandardSqlTableType: """A table type. @@ -359,5 +353,3 @@ def __eq__(self, other): return NotImplemented else: return self.columns == other.columns - - __hash__ = None diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 35392524a..140fa13ae 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -29,7 +29,7 @@ except ImportError: # pragma: NO COVER pandas = None -import pyarrow +import pyarrow # type: ignore try: import geopandas # type: ignore From 3d1af9526786e541ca6cd2c0362a4f4fe27c8669 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 10 Nov 2021 13:01:34 -0700 Subject: [PATCH 22/35] feat!: Use pandas custom data types for BigQuery DATE and TIME columns, remove `date_as_object` argument (#972) * Use new pandas date and time dtypes * Get rid of date_as_object argument * added *unit* test for dealing with dates and timestamps that can't fit in datetime64[ns] * Implemented any, all, min, max and median * test (and fix) load from dataframe with date and time columns * Make sure insert_rows_from_dataframe works * Renamed date and time dtypes to bqdate and bqtime * make fallback date and time dtype names strings to make pytype happy * date and time arrays implement __arrow_array__ to facilitate arrow conversion * Make conversion of date columns from arrow pandas outout to pandas zero-copy when not date_as_object * Added date math support * Support date math with DateOffset scalars * always use types mapper for conversion from arrow to pandas * adjust unit tests to use arrow not avro * avoid "ValueError: need at least one array to concatenate" with empty RecordBatch * add missing db-dtypes requirement * avoid arrow_schema on older versions of bqstorage BREAKING CHANGE: remove `date_as_object` argument from `to_dataframe`. The `dbdate` dtype is used by default with an automatic fallback to `object` when dates are not within the range of a nanosecond-precision pandas timestamp Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- docs/usage/pandas.rst | 14 ++- google/cloud/bigquery/_pandas_helpers.py | 66 ++++++----- google/cloud/bigquery/job/query.py | 16 --- google/cloud/bigquery/table.py | 88 +++++++------- samples/geography/requirements.txt | 1 + samples/magics/requirements.txt | 1 + samples/snippets/requirements.txt | 1 + setup.py | 2 +- testing/constraints-3.6.txt | 1 + testing/constraints-3.7.txt | 1 + testing/constraints-3.8.txt | 1 + tests/system/test_pandas.py | 140 ++++++++++++++++++++--- tests/unit/job/test_query_pandas.py | 100 +++++++++++----- tests/unit/test_client.py | 101 ++++++++++------ tests/unit/test_table.py | 35 +++++- tests/unit/test_table_pandas.py | 4 +- 16 files changed, 396 insertions(+), 176 deletions(-) diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst index 109259711..550a67792 100644 --- a/docs/usage/pandas.rst +++ b/docs/usage/pandas.rst @@ -50,13 +50,25 @@ The following data types are used when creating a pandas DataFrame. - * - DATETIME - datetime64[ns], object - - object is used when there are values not representable in pandas + - The object dtype is used when there are values not representable in a + pandas nanosecond-precision timestamp. + * - DATE + - dbdate, object + - The object dtype is used when there are values not representable in a + pandas nanosecond-precision timestamp. + + Requires the ``db-dtypes`` package. See the `db-dtypes usage guide + `_ * - FLOAT64 - float64 - * - INT64 - Int64 - + * - TIME + - dbtime + - Requires the ``db-dtypes`` package. See the `db-dtypes usage guide + `_ Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame ------------------------------------------------------------ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 07b288236..39fa74dea 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -18,16 +18,21 @@ import functools import logging import queue -from typing import Dict, Sequence import warnings try: import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None + date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype else: import numpy + from db_dtypes import DateDtype, TimeDtype # type: ignore + + date_dtype_name = DateDtype.name + time_dtype_name = TimeDtype.name + import pyarrow # type: ignore import pyarrow.parquet # type: ignore @@ -77,15 +82,6 @@ def _to_wkb(v): _MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads -# If you update the default dtypes, also update the docs at docs/usage/pandas.rst. -_BQ_TO_PANDAS_DTYPE_NULLSAFE = { - "BOOL": "boolean", - "BOOLEAN": "boolean", - "FLOAT": "float64", - "FLOAT64": "float64", - "INT64": "Int64", - "INTEGER": "Int64", -} _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", @@ -102,6 +98,8 @@ def _to_wkb(v): "uint16": "INTEGER", "uint32": "INTEGER", "geometry": "GEOGRAPHY", + date_dtype_name: "DATE", + time_dtype_name: "TIME", } @@ -267,26 +265,40 @@ def bq_to_arrow_schema(bq_schema): return pyarrow.schema(arrow_fields) -def bq_schema_to_nullsafe_pandas_dtypes( - bq_schema: Sequence[schema.SchemaField], -) -> Dict[str, str]: - """Return the default dtypes to use for columns in a BigQuery schema. +def default_types_mapper(date_as_object: bool = False): + """Create a mapping from pyarrow types to pandas types. - Only returns default dtypes which are safe to have NULL values. This - includes Int64, which has pandas.NA values and does not result in - loss-of-precision. + This overrides the pandas defaults to use null-safe extension types where + available. - Returns: - A mapping from column names to pandas dtypes. + See: https://arrow.apache.org/docs/python/api/datatypes.html for a list of + data types. See: + tests/unit/test__pandas_helpers.py::test_bq_to_arrow_data_type for + BigQuery to Arrow type mapping. + + Note to google-cloud-bigquery developers: If you update the default dtypes, + also update the docs at docs/usage/pandas.rst. """ - dtypes = {} - for bq_field in bq_schema: - if bq_field.mode.upper() not in {"NULLABLE", "REQUIRED"}: - continue - field_type = bq_field.field_type.upper() - if field_type in _BQ_TO_PANDAS_DTYPE_NULLSAFE: - dtypes[bq_field.name] = _BQ_TO_PANDAS_DTYPE_NULLSAFE[field_type] - return dtypes + + def types_mapper(arrow_data_type): + if pyarrow.types.is_boolean(arrow_data_type): + return pandas.BooleanDtype() + + elif ( + # If date_as_object is True, we know some DATE columns are + # out-of-bounds of what is supported by pandas. + not date_as_object + and pyarrow.types.is_date(arrow_data_type) + ): + return DateDtype() + + elif pyarrow.types.is_integer(arrow_data_type): + return pandas.Int64Dtype() + + elif pyarrow.types.is_time(arrow_data_type): + return TimeDtype() + + return types_mapper def bq_to_arrow_array(series, bq_field): diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 7ff752da1..6b8b5ce12 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1556,7 +1556,6 @@ def to_dataframe( dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, max_results: Optional[int] = None, geography_as_object: bool = False, ) -> "pandas.DataFrame": @@ -1599,12 +1598,6 @@ def to_dataframe( .. versionadded:: 1.24.0 - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - .. versionadded:: 1.26.0 - max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. @@ -1638,7 +1631,6 @@ def to_dataframe( dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_as_object=geography_as_object, ) @@ -1651,7 +1643,6 @@ def to_geodataframe( dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, ) -> "geopandas.GeoDataFrame": @@ -1694,12 +1685,6 @@ def to_geodataframe( .. versionadded:: 1.24.0 - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - .. versionadded:: 1.26.0 - max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. @@ -1732,7 +1717,6 @@ def to_geodataframe( dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_column=geography_column, ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 140fa13ae..f434688e7 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -28,6 +28,8 @@ import pandas # type: ignore except ImportError: # pragma: NO COVER pandas = None +else: + import db_dtypes # type: ignore # noqa import pyarrow # type: ignore @@ -1815,7 +1817,6 @@ def to_dataframe( dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1865,12 +1866,6 @@ def to_dataframe( .. versionadded:: 1.24.0 - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - .. versionadded:: 1.26.0 - geography_as_object (Optional[bool]): If ``True``, convert GEOGRAPHY data to :mod:`shapely` geometry objects. If ``False`` (default), don't cast @@ -1912,40 +1907,44 @@ def to_dataframe( bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, ) - default_dtypes = _pandas_helpers.bq_schema_to_nullsafe_pandas_dtypes( - self.schema - ) - # Let the user-defined dtypes override the default ones. - # https://stackoverflow.com/a/26853961/101923 - dtypes = {**default_dtypes, **dtypes} - - # When converting timestamp values to nanosecond precision, the result + # When converting date or timestamp values to nanosecond precision, the result # can be out of pyarrow bounds. To avoid the error when converting to - # Pandas, we set the timestamp_as_object parameter to True, if necessary. - types_to_check = { - pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=datetime.timezone.utc), - } - - for column in record_batch: - if column.type in types_to_check: - try: - column.cast("timestamp[ns]") - except pyarrow.lib.ArrowInvalid: - timestamp_as_object = True - break - else: - timestamp_as_object = False - - extra_kwargs = {"timestamp_as_object": timestamp_as_object} + # Pandas, we set the date_as_object or timestamp_as_object parameter to True, + # if necessary. + date_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be date32 or date64 (plus units). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if str(col.type).startswith("date") + ) - df = record_batch.to_pandas( - date_as_object=date_as_object, integer_object_nulls=True, **extra_kwargs + timestamp_as_object = not all( + self.__can_cast_timestamp_ns(col) + for col in record_batch + # Type can be timestamp (plus units and time zone). + # See: https://arrow.apache.org/docs/python/api/datatypes.html + if str(col.type).startswith("timestamp") ) + if len(record_batch) > 0: + df = record_batch.to_pandas( + date_as_object=date_as_object, + timestamp_as_object=timestamp_as_object, + integer_object_nulls=True, + types_mapper=_pandas_helpers.default_types_mapper( + date_as_object=date_as_object + ), + ) + else: + # Avoid "ValueError: need at least one array to concatenate" on + # older versions of pandas when converting empty RecordBatch to + # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241 + df = pandas.DataFrame([], columns=record_batch.schema.names) + for column in dtypes: - df[column] = pandas.Series(df[column], dtype=dtypes[column]) + df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) if geography_as_object: for field in self.schema: @@ -1954,6 +1953,15 @@ def to_dataframe( return df + @staticmethod + def __can_cast_timestamp_ns(column): + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + return False + else: + return True + # If changing the signature of this method, make sure to apply the same # changes to job.QueryJob.to_geodataframe() def to_geodataframe( @@ -1962,7 +1970,6 @@ def to_geodataframe( dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, - date_as_object: bool = True, geography_column: Optional[str] = None, ) -> "geopandas.GeoDataFrame": """Create a GeoPandas GeoDataFrame by loading all pages of a query. @@ -2010,10 +2017,6 @@ def to_geodataframe( This argument does nothing if ``bqstorage_client`` is supplied. - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - geography_column (Optional[str]): If there are more than one GEOGRAPHY column, identifies which one to use to construct a geopandas @@ -2069,7 +2072,6 @@ def to_geodataframe( dtypes, progress_bar_type, create_bqstorage_client, - date_as_object, geography_as_object=True, ) @@ -2126,7 +2128,6 @@ def to_dataframe( dtypes=None, progress_bar_type=None, create_bqstorage_client=True, - date_as_object=True, geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2136,7 +2137,6 @@ def to_dataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. - date_as_object (bool): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. @@ -2151,7 +2151,6 @@ def to_geodataframe( dtypes=None, progress_bar_type=None, create_bqstorage_client=True, - date_as_object=True, geography_column: Optional[str] = None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2161,7 +2160,6 @@ def to_geodataframe( dtypes (Any): Ignored. Added for compatibility with RowIterator. progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. - date_as_object (bool): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e2de86673..d33da667c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -7,6 +7,7 @@ click==8.0.1 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.6; python_version < '3.7' +db-dtypes==0.3.0 Fiona==1.8.20 geojson==2.5.0 geopandas==0.9.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 5cc7ec33f..0d36904c4 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,3 +1,4 @@ +db-dtypes==0.3.0 google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 grpcio==1.41.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f79552392..4f04611ba 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,3 +1,4 @@ +db-dtypes==0.3.0 google-cloud-bigquery-storage==2.9.0 google-auth-oauthlib==0.4.6 grpcio==1.41.0 diff --git a/setup.py b/setup.py index 7ff571b05..09a374303 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 "bqstorage": [], - "pandas": ["pandas>=1.0.0"], + "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index b9b93d4f1..a2fc429a3 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +db-dtypes==0.3.0 geopandas==0.9.0 google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index e69de29bb..684864f2b 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -0,0 +1 @@ +pandas==1.1.0 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index e69de29bb..3fd8886e6 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -0,0 +1 @@ +pandas==1.2.0 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 39ea3e878..bc8e43370 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -24,7 +24,6 @@ import google.api_core.retry import pkg_resources import pytest -import numpy from google.cloud import bigquery from google.cloud import bigquery_storage @@ -34,6 +33,7 @@ pandas = pytest.importorskip("pandas", minversion="0.23.0") +numpy = pytest.importorskip("numpy") PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version @@ -84,6 +84,28 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), + ( + "date_col", + pandas.Series( + [ + datetime.date(2010, 1, 2), + datetime.date(2011, 2, 3), + datetime.date(2012, 3, 14), + ], + dtype="dbdate", + ), + ), + ( + "time_col", + pandas.Series( + [ + datetime.time(3, 44, 50), + datetime.time(14, 50, 59), + datetime.time(15, 16), + ], + dtype="dbtime", + ), + ), ("array_bool_col", pandas.Series([[True], [False], [True]])), ( "array_ts_col", @@ -186,6 +208,8 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i bigquery.SchemaField("uint8_col", "INTEGER"), bigquery.SchemaField("uint16_col", "INTEGER"), bigquery.SchemaField("uint32_col", "INTEGER"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("array_bool_col", "BOOLEAN", mode="REPEATED"), bigquery.SchemaField("array_ts_col", "TIMESTAMP", mode="REPEATED"), # TODO: Update to DATETIME in V3 @@ -201,7 +225,87 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i bigquery.SchemaField("array_uint16_col", "INTEGER", mode="REPEATED"), bigquery.SchemaField("array_uint32_col", "INTEGER", mode="REPEATED"), ) - assert table.num_rows == 3 + assert numpy.array( + sorted(map(list, bigquery_client.list_rows(table)), key=lambda r: r[5]), + dtype="object", + ).transpose().tolist() == [ + # bool_col + [True, False, True], + # ts_col + [ + datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc), + datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc), + ], + # dt_col + # TODO: Remove tzinfo in V3. + # https://github.com/googleapis/python-bigquery/issues/985 + [ + datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc), + datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc), + ], + # float32_col + [1.0, 2.0, 3.0], + # float64_col + [4.0, 5.0, 6.0], + # int8_col + [-12, -11, -10], + # int16_col + [-9, -8, -7], + # int32_col + [-6, -5, -4], + # int64_col + [-3, -2, -1], + # uint8_col + [0, 1, 2], + # uint16_col + [3, 4, 5], + # uint32_col + [6, 7, 8], + # date_col + [ + datetime.date(2010, 1, 2), + datetime.date(2011, 2, 3), + datetime.date(2012, 3, 14), + ], + # time_col + [datetime.time(3, 44, 50), datetime.time(14, 50, 59), datetime.time(15, 16)], + # array_bool_col + [[True], [False], [True]], + # array_ts_col + [ + [datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc)], + [datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc)], + [datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc)], + ], + # array_dt_col + # TODO: Remove tzinfo in V3. + # https://github.com/googleapis/python-bigquery/issues/985 + [ + [datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc)], + [datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc)], + [datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc)], + ], + # array_float32_col + [[1.0], [2.0], [3.0]], + # array_float64_col + [[4.0], [5.0], [6.0]], + # array_int8_col + [[-12], [-11], [-10]], + # array_int16_col + [[-9], [-8], [-7]], + # array_int32_col + [[-6], [-5], [-4]], + # array_int64_col + [[-3], [-2], [-1]], + # array_uint8_col + [[0], [1], [2]], + # array_uint16_col + [[3], [4], [5]], + # array_uint32_col + [[6], [7], [8]], + ] @pytest.mark.skipif( @@ -697,6 +801,8 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): SF("int_col", "INTEGER", mode="REQUIRED"), SF("bool_col", "BOOLEAN", mode="REQUIRED"), SF("string_col", "STRING", mode="NULLABLE"), + SF("date_col", "DATE", mode="NULLABLE"), + SF("time_col", "TIME", mode="NULLABLE"), ] dataframe = pandas.DataFrame( @@ -706,30 +812,40 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): "bool_col": True, "string_col": "my string", "int_col": 10, + "date_col": datetime.date(2021, 1, 1), + "time_col": datetime.time(21, 1, 1), }, { "float_col": 2.22, "bool_col": False, "string_col": "another string", "int_col": 20, + "date_col": datetime.date(2021, 1, 2), + "time_col": datetime.time(21, 1, 2), }, { "float_col": 3.33, "bool_col": False, "string_col": "another string", "int_col": 30, + "date_col": datetime.date(2021, 1, 3), + "time_col": datetime.time(21, 1, 3), }, { "float_col": 4.44, "bool_col": True, "string_col": "another string", "int_col": 40, + "date_col": datetime.date(2021, 1, 4), + "time_col": datetime.time(21, 1, 4), }, { "float_col": 5.55, "bool_col": False, "string_col": "another string", "int_col": 50, + "date_col": datetime.date(2021, 1, 5), + "time_col": datetime.time(21, 1, 5), }, { "float_col": 6.66, @@ -738,9 +854,13 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): # NULL value indicator. "string_col": float("NaN"), "int_col": 60, + "date_col": datetime.date(2021, 1, 6), + "time_col": datetime.time(21, 1, 6), }, ] ) + dataframe["date_col"] = dataframe["date_col"].astype("dbdate") + dataframe["time_col"] = dataframe["time_col"].astype("dbtime") table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe" table_arg = bigquery.Table(table_id, schema=schema) @@ -916,15 +1036,8 @@ def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_r assert df.dtypes["float64_col"].name == "float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" - - # object is used by default, but we can use "datetime64[ns]" automatically - # when data is within the supported range. - # https://github.com/googleapis/python-bigquery/issues/861 - assert df.dtypes["date_col"].name == "object" - - # object is used by default, but we can use "timedelta64[ns]" automatically - # https://github.com/googleapis/python-bigquery/issues/862 - assert df.dtypes["time_col"].name == "object" + assert df.dtypes["date_col"].name == "dbdate" + assert df.dtypes["time_col"].name == "dbtime" # decimal.Decimal is used to avoid loss of precision. assert df.dtypes["bignumeric_col"].name == "object" @@ -974,10 +1087,7 @@ def test_list_rows_nullable_scalars_extreme_dtypes( assert df.dtypes["bool_col"].name == "boolean" assert df.dtypes["float64_col"].name == "float64" assert df.dtypes["int64_col"].name == "Int64" - - # object is used by default, but we can use "timedelta64[ns]" automatically - # https://github.com/googleapis/python-bigquery/issues/862 - assert df.dtypes["time_col"].name == "object" + assert df.dtypes["time_col"].name == "dbtime" # decimal.Decimal is used to avoid loss of precision. assert df.dtypes["numeric_col"].name == "object" diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 8e4fba770..044ca6e9a 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -21,6 +21,8 @@ import pytest from google.cloud import bigquery_storage +import google.cloud.bigquery_storage_v1.reader +import google.cloud.bigquery_storage_v1.services.big_query_read.client try: import pandas @@ -39,8 +41,8 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from ..helpers import make_connection - from .helpers import _make_client from .helpers import _make_job_resource @@ -108,7 +110,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) job_resource["configuration"]["query"]["query"] = query job_resource["status"] = {"state": "DONE"} - get_query_results_resource = { + query_resource = { "jobComplete": True, "jobReference": {"projectId": "test-project", "jobId": "test-job"}, "schema": { @@ -119,25 +121,44 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): }, "totalRows": "4", } - connection = make_connection(get_query_results_resource, job_resource) + stream_id = "projects/1/locations/2/sessions/3/streams/4" + name_array = pyarrow.array( + ["John", "Paul", "George", "Ringo"], type=pyarrow.string() + ) + age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64()) + arrow_schema = pyarrow.schema( + [ + pyarrow.field("name", pyarrow.string(), True), + pyarrow.field("age", pyarrow.int64(), True), + ] + ) + record_batch = pyarrow.RecordBatch.from_arrays( + [name_array, age_array], schema=arrow_schema + ) + connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] bqstorage_client.create_read_session.return_value = session + bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + page = bigquery_storage.types.ReadRowsResponse() + if BQ_STORAGE_VERSIONS.is_read_session_optional: + page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + page.arrow_record_batch.serialized_record_batch = ( + record_batch.serialize().to_pybytes() + ) + bqstorage_base_client.read_rows.return_value = [page] + reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream( + [page], bqstorage_base_client, stream_id, 0, {} + ) + bqstorage_client.read_rows.return_value = reader - job.to_dataframe(bqstorage_client=bqstorage_client) + dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) + assert len(dataframe) == 4 destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] ) @@ -498,25 +519,44 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): ] }, } + stream_id = "projects/1/locations/2/sessions/3/streams/4" + name_array = pyarrow.array( + ["John", "Paul", "George", "Ringo"], type=pyarrow.string() + ) + age_array = pyarrow.array([17, 24, 21, 15], type=pyarrow.int64()) + arrow_schema = pyarrow.schema( + [ + pyarrow.field("name", pyarrow.string(), True), + pyarrow.field("age", pyarrow.int64(), True), + ] + ) + record_batch = pyarrow.RecordBatch.from_arrays( + [name_array, age_array], schema=arrow_schema + ) connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] bqstorage_client.create_read_session.return_value = session + bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + page = bigquery_storage.types.ReadRowsResponse() + if BQ_STORAGE_VERSIONS.is_read_session_optional: + page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + page.arrow_record_batch.serialized_record_batch = ( + record_batch.serialize().to_pybytes() + ) + bqstorage_base_client.read_rows.return_value = [page] + reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream( + [page], bqstorage_base_client, stream_id, 0, {} + ) + bqstorage_client.read_rows.return_value = reader - job.to_dataframe(bqstorage_client=bqstorage_client) + dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) + assert len(dataframe) == 4 destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] ) @@ -530,6 +570,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): read_session=expected_session, max_stream_count=0, # Use default number of streams for best performance. ) + bqstorage_client.read_rows.assert_called_once_with(stream_id) def test_to_dataframe_bqstorage_no_pyarrow_compression(): @@ -630,7 +671,7 @@ def test_to_dataframe_column_dtypes(): assert df.km.dtype.name == "float16" assert df.payment_type.dtype.name == "object" assert df.complete.dtype.name == "boolean" - assert df.date.dtype.name == "object" + assert df.date.dtype.name == "dbdate" def test_to_dataframe_column_date_dtypes(): @@ -655,13 +696,13 @@ def test_to_dataframe_column_date_dtypes(): ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) assert len(df) == 1 # verify the number of rows exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] assert list(df) == exp_columns # verify the column names - assert df.date.dtype.name == "datetime64[ns]" + assert df.date.dtype.name == "dbdate" @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @@ -916,7 +957,6 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): dtypes = dict(xxx=numpy.dtype("int64")) progress_bar_type = "normal" create_bqstorage_client = False - date_as_object = False max_results = 42 geography_column = "g" @@ -925,7 +965,6 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, max_results=max_results, geography_column=geography_column, ) @@ -939,7 +978,6 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_column=geography_column, ) assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 04b5f2b85..8bd1fe1df 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5206,14 +5206,39 @@ def test_insert_rows_from_dataframe(self): self.PROJECT, self.DS_ID, self.TABLE_REF.table_id ) - dataframe = pandas.DataFrame( - [ - {"name": "Little One", "age": 10, "adult": False}, - {"name": "Young Gun", "age": 20, "adult": True}, - {"name": "Dad", "age": 30, "adult": True}, - {"name": "Stranger", "age": 40, "adult": True}, - ] - ) + data = [ + { + "name": "Little One", + "age": 10, + "adult": False, + "bdate": datetime.date(2011, 1, 2), + "btime": datetime.time(19, 1, 10), + }, + { + "name": "Young Gun", + "age": 20, + "adult": True, + "bdate": datetime.date(2001, 1, 2), + "btime": datetime.time(19, 1, 20), + }, + { + "name": "Dad", + "age": 30, + "adult": True, + "bdate": datetime.date(1991, 1, 2), + "btime": datetime.time(19, 1, 30), + }, + { + "name": "Stranger", + "age": 40, + "adult": True, + "bdate": datetime.date(1981, 1, 2), + "btime": datetime.time(19, 1, 40), + }, + ] + dataframe = pandas.DataFrame(data) + dataframe["bdate"] = dataframe["bdate"].astype("dbdate") + dataframe["btime"] = dataframe["btime"].astype("dbtime") # create client creds = _make_credentials() @@ -5226,6 +5251,8 @@ def test_insert_rows_from_dataframe(self): SchemaField("name", "STRING", mode="REQUIRED"), SchemaField("age", "INTEGER", mode="REQUIRED"), SchemaField("adult", "BOOLEAN", mode="REQUIRED"), + SchemaField("bdata", "DATE", mode="REQUIRED"), + SchemaField("btime", "TIME", mode="REQUIRED"), ] table = Table(self.TABLE_REF, schema=schema) @@ -5238,32 +5265,14 @@ def test_insert_rows_from_dataframe(self): for chunk_errors in error_info: assert chunk_errors == [] - EXPECTED_SENT_DATA = [ - { - "rows": [ - { - "insertId": "0", - "json": {"name": "Little One", "age": "10", "adult": "false"}, - }, - { - "insertId": "1", - "json": {"name": "Young Gun", "age": "20", "adult": "true"}, - }, - { - "insertId": "2", - "json": {"name": "Dad", "age": "30", "adult": "true"}, - }, - ] - }, - { - "rows": [ - { - "insertId": "3", - "json": {"name": "Stranger", "age": "40", "adult": "true"}, - } - ] - }, - ] + for row in data: + row["age"] = str(row["age"]) + row["adult"] = str(row["adult"]).lower() + row["bdate"] = row["bdate"].isoformat() + row["btime"] = row["btime"].isoformat() + + rows = [dict(insertId=str(i), json=row) for i, row in enumerate(data)] + EXPECTED_SENT_DATA = [dict(rows=rows[:3]), dict(rows=rows[3:])] actual_calls = conn.api_request.call_args_list @@ -7084,6 +7093,28 @@ def test_load_table_from_dataframe_w_automatic_schema(self): dtype="datetime64[ns]", ).dt.tz_localize(datetime.timezone.utc), ), + ( + "date_col", + pandas.Series( + [ + datetime.date(2010, 1, 2), + datetime.date(2011, 2, 3), + datetime.date(2012, 3, 14), + ], + dtype="dbdate", + ), + ), + ( + "time_col", + pandas.Series( + [ + datetime.time(3, 44, 50), + datetime.time(14, 50, 59), + datetime.time(15, 16), + ], + dtype="dbtime", + ), + ), ] ) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -7124,6 +7155,8 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("bool_col", "BOOLEAN"), SchemaField("dt_col", "TIMESTAMP"), SchemaField("ts_col", "TIMESTAMP"), + SchemaField("date_col", "DATE"), + SchemaField("time_col", "TIME"), ) @unittest.skipIf(pandas is None, "Requires `pandas`") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a34b0d56b..c9a3d2815 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -3115,7 +3115,37 @@ def test_to_dataframe_column_dtypes(self): self.assertEqual(df.km.dtype.name, "float16") self.assertEqual(df.payment_type.dtype.name, "object") self.assertEqual(df.complete.dtype.name, "boolean") - self.assertEqual(df.date.dtype.name, "object") + self.assertEqual(df.date.dtype.name, "dbdate") + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_datetime_objects(self): + # When converting date or timestamp values to nanosecond + # precision, the result can be out of pyarrow bounds. To avoid + # the error when converting to Pandas, we use object type if + # necessary. + + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("ts", "TIMESTAMP"), + SchemaField("date", "DATE"), + ] + row_data = [ + ["-20000000000000000", "1111-01-01"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one(_mock_client(), api_request, path, schema) + + df = row_iterator.to_dataframe(create_bqstorage_client=False) + + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(df["ts"].dtype.name, "object") + self.assertEqual(df["date"].dtype.name, "object") + self.assertEqual(df["ts"][0].date(), datetime.date(1336, 3, 23)) + self.assertEqual(df["date"][0], datetime.date(1111, 1, 1)) @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): @@ -4036,7 +4066,6 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): dtypes = dict(xxx=numpy.dtype("int64")) progress_bar_type = "normal" create_bqstorage_client = False - date_as_object = False geography_column = "g" to_dataframe.return_value = pandas.DataFrame( @@ -4048,7 +4077,6 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): dtypes=dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, geography_column=geography_column, ) @@ -4057,7 +4085,6 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): dtypes, progress_bar_type, create_bqstorage_client, - date_as_object, geography_as_object=True, ) diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index a223e6652..8e37ed504 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -101,13 +101,13 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): assert df.dtypes["bignumeric_col"].name == "object" assert df.dtypes["bool_col"].name == "boolean" assert df.dtypes["bytes_col"].name == "object" - assert df.dtypes["date_col"].name == "object" + assert df.dtypes["date_col"].name == "dbdate" assert df.dtypes["datetime_col"].name == "datetime64[ns]" assert df.dtypes["float64_col"].name == "float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["numeric_col"].name == "object" assert df.dtypes["string_col"].name == "object" - assert df.dtypes["time_col"].name == "object" + assert df.dtypes["time_col"].name == "dbtime" assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" # Check for expected values. From 070729fb711ecc71b890955d78265e4d388d568c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 11 Nov 2021 17:28:19 +0100 Subject: [PATCH 23/35] process: mark the package as type-checked (#1058) --- google/cloud/bigquery/py.typed | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 google/cloud/bigquery/py.typed diff --git a/google/cloud/bigquery/py.typed b/google/cloud/bigquery/py.typed new file mode 100644 index 000000000..e73777993 --- /dev/null +++ b/google/cloud/bigquery/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery package uses inline types. From 3cae0660c0fc8ca13cea40bc4b64830851c52b21 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Nov 2021 21:40:15 +0100 Subject: [PATCH 24/35] feat: default to DATETIME type when loading timezone-naive datetimes from Pandas (#1061) * Make systest expect DATETIME for naive datetimes * Fix SchemaField repr() when field type not set * Adjust DATETIME detection logic in dataframes * Fix assertions in one of the samples tests --- google/cloud/bigquery/_pandas_helpers.py | 49 ++++++++- google/cloud/bigquery/schema.py | 24 +++-- samples/tests/test_load_table_dataframe.py | 10 +- tests/system/test_pandas.py | 31 +++--- tests/unit/test__pandas_helpers.py | 117 +++++++++++++++++++++ tests/unit/test_client.py | 4 +- tests/unit/test_schema.py | 5 + 7 files changed, 201 insertions(+), 39 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 39fa74dea..ecb36bf15 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -15,7 +15,9 @@ """Shared helper functions for connecting BigQuery and pandas.""" import concurrent.futures +from datetime import datetime import functools +from itertools import islice import logging import queue import warnings @@ -85,9 +87,7 @@ def _to_wkb(v): _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", - # TODO: Update to DATETIME in V3 - # https://github.com/googleapis/python-bigquery/issues/985 - "datetime64[ns]": "TIMESTAMP", + "datetime64[ns]": "DATETIME", "float32": "FLOAT", "float64": "FLOAT", "int8": "INTEGER", @@ -379,6 +379,36 @@ def _first_valid(series): return series.at[first_valid_index] +def _first_array_valid(series): + """Return the first "meaningful" element from the array series. + + Here, "meaningful" means the first non-None element in one of the arrays that can + be used for type detextion. + """ + first_valid_index = series.first_valid_index() + if first_valid_index is None: + return None + + valid_array = series.at[first_valid_index] + valid_item = next((item for item in valid_array if not pandas.isna(item)), None) + + if valid_item is not None: + return valid_item + + # Valid item is None because all items in the "valid" array are invalid. Try + # to find a true valid array manually. + for array in islice(series, first_valid_index + 1, None): + try: + array_iter = iter(array) + except TypeError: + continue # Not an array, apparently, e.g. None, thus skip. + valid_item = next((item for item in array_iter if not pandas.isna(item)), None) + if valid_item is not None: + break + + return valid_item + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -482,6 +512,19 @@ def augment_schema(dataframe, current_bq_schema): # `pyarrow.ListType` detected_mode = "REPEATED" detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id) + + # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds + # it to such datetimes, causing them to be recognized as TIMESTAMP type. + # We thus additionally check the actual data to see if we need to overrule + # that and choose DATETIME instead. + # Note that this should only be needed for datetime values inside a list, + # since scalar datetime values have a proper Pandas dtype that allows + # distinguishing between timezone-naive and timezone-aware values before + # even requiring the additional schema augment logic in this method. + if detected_type == "TIMESTAMP": + valid_item = _first_array_valid(dataframe[field.name]) + if isinstance(valid_item, datetime) and valid_item.tzinfo is None: + detected_type = "DATETIME" else: detected_mode = field.mode detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 88a7d8f3c..4ccd6e70b 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -257,16 +257,20 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ - field_type = self.field_type.upper() - if field_type == "STRING" or field_type == "BYTES": - if self.max_length is not None: - field_type = f"{field_type}({self.max_length})" - elif field_type.endswith("NUMERIC"): - if self.precision is not None: - if self.scale is not None: - field_type = f"{field_type}({self.precision}, {self.scale})" - else: - field_type = f"{field_type}({self.precision})" + field_type = self.field_type.upper() if self.field_type is not None else None + + # Type can temporarily be set to None if the code needs a SchemaField instance, + # but has npt determined the exact type of the field yet. + if field_type is not None: + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" policy_tags = ( None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) diff --git a/samples/tests/test_load_table_dataframe.py b/samples/tests/test_load_table_dataframe.py index 228666046..777967959 100644 --- a/samples/tests/test_load_table_dataframe.py +++ b/samples/tests/test_load_table_dataframe.py @@ -44,7 +44,7 @@ def test_load_table_dataframe(capsys, client, random_table_id): "INTEGER", "FLOAT", "TIMESTAMP", - "TIMESTAMP", + "DATETIME", ] df = client.list_rows(table).to_dataframe() @@ -64,9 +64,9 @@ def test_load_table_dataframe(capsys, client, random_table_id): pandas.Timestamp("1983-05-09T11:00:00+00:00"), ] assert df["dvd_release"].tolist() == [ - pandas.Timestamp("2003-10-22T10:00:00+00:00"), - pandas.Timestamp("2002-07-16T09:00:00+00:00"), - pandas.Timestamp("2008-01-14T08:00:00+00:00"), - pandas.Timestamp("2002-01-22T07:00:00+00:00"), + pandas.Timestamp("2003-10-22T10:00:00"), + pandas.Timestamp("2002-07-16T09:00:00"), + pandas.Timestamp("2008-01-14T08:00:00"), + pandas.Timestamp("2002-01-22T07:00:00"), ] assert df["wikidata_id"].tolist() == [u"Q16403", u"Q25043", u"Q24953", u"Q24980"] diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index bc8e43370..6f06c6feb 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -65,7 +65,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ).dt.tz_localize(datetime.timezone.utc), ), ( - "dt_col", + "dt_col_no_tz", pandas.Series( [ datetime.datetime(2010, 1, 2, 3, 44, 50), @@ -130,7 +130,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i ), ), ( - "array_dt_col", + "array_dt_col_no_tz", pandas.Series( [ [datetime.datetime(2010, 1, 2, 3, 44, 50)], @@ -196,9 +196,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i assert tuple(table.schema) == ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("ts_col", "TIMESTAMP"), - # TODO: Update to DATETIME in V3 - # https://github.com/googleapis/python-bigquery/issues/985 - bigquery.SchemaField("dt_col", "TIMESTAMP"), + bigquery.SchemaField("dt_col_no_tz", "DATETIME"), bigquery.SchemaField("float32_col", "FLOAT"), bigquery.SchemaField("float64_col", "FLOAT"), bigquery.SchemaField("int8_col", "INTEGER"), @@ -212,9 +210,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("array_bool_col", "BOOLEAN", mode="REPEATED"), bigquery.SchemaField("array_ts_col", "TIMESTAMP", mode="REPEATED"), - # TODO: Update to DATETIME in V3 - # https://github.com/googleapis/python-bigquery/issues/985 - bigquery.SchemaField("array_dt_col", "TIMESTAMP", mode="REPEATED"), + bigquery.SchemaField("array_dt_col_no_tz", "DATETIME", mode="REPEATED"), bigquery.SchemaField("array_float32_col", "FLOAT", mode="REPEATED"), bigquery.SchemaField("array_float64_col", "FLOAT", mode="REPEATED"), bigquery.SchemaField("array_int8_col", "INTEGER", mode="REPEATED"), @@ -225,6 +221,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i bigquery.SchemaField("array_uint16_col", "INTEGER", mode="REPEATED"), bigquery.SchemaField("array_uint32_col", "INTEGER", mode="REPEATED"), ) + assert numpy.array( sorted(map(list, bigquery_client.list_rows(table)), key=lambda r: r[5]), dtype="object", @@ -237,13 +234,11 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc), datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc), ], - # dt_col - # TODO: Remove tzinfo in V3. - # https://github.com/googleapis/python-bigquery/issues/985 + # dt_col_no_tz [ - datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc), - datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc), - datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc), + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), ], # float32_col [1.0, 2.0, 3.0], @@ -280,12 +275,10 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i [datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc)], ], # array_dt_col - # TODO: Remove tzinfo in V3. - # https://github.com/googleapis/python-bigquery/issues/985 [ - [datetime.datetime(2010, 1, 2, 3, 44, 50, tzinfo=datetime.timezone.utc)], - [datetime.datetime(2011, 2, 3, 14, 50, 59, tzinfo=datetime.timezone.utc)], - [datetime.datetime(2012, 3, 14, 15, 16, tzinfo=datetime.timezone.utc)], + [datetime.datetime(2010, 1, 2, 3, 44, 50)], + [datetime.datetime(2011, 2, 3, 14, 50, 59)], + [datetime.datetime(2012, 3, 14, 15, 16)], ], # array_float32_col [[1.0], [2.0], [3.0]], diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 36becf182..e8d9562e6 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1208,6 +1208,46 @@ def test_dataframe_to_bq_schema_geography(module_under_test): ) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test__first_array_valid_no_valid_items(module_under_test): + series = pandas.Series([None, pandas.NA, float("NaN")]) + result = module_under_test._first_array_valid(series) + assert result is None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test__first_array_valid_valid_item_exists(module_under_test): + series = pandas.Series([None, [0], [1], None]) + result = module_under_test._first_array_valid(series) + assert result == 0 + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test__first_array_valid_all_nan_items_in_first_valid_candidate(module_under_test): + import numpy + + series = pandas.Series( + [ + None, + [None, float("NaN"), pandas.NA, pandas.NaT, numpy.nan], + None, + [None, None], + [None, float("NaN"), pandas.NA, pandas.NaT, numpy.nan, 42, None], + [1, 2, 3], + None, + ] + ) + result = module_under_test._first_array_valid(series) + assert result == 42 + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test__first_array_valid_no_arrays_with_valid_items(module_under_test): + series = pandas.Series([[None, None], [None, None]]) + result = module_under_test._first_array_valid(series) + assert result is None + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( @@ -1274,6 +1314,59 @@ def test_augment_schema_type_detection_succeeds(module_under_test): assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_augment_schema_repeated_fields(module_under_test): + dataframe = pandas.DataFrame( + data=[ + # Include some values useless for type detection to make sure the logic + # indeed finds the value that is suitable. + {"string_array": None, "timestamp_array": None, "datetime_array": None}, + { + "string_array": [None], + "timestamp_array": [None], + "datetime_array": [None], + }, + {"string_array": None, "timestamp_array": None, "datetime_array": None}, + { + "string_array": [None, "foo"], + "timestamp_array": [ + None, + datetime.datetime( + 2005, 5, 31, 14, 25, 55, tzinfo=datetime.timezone.utc + ), + ], + "datetime_array": [None, datetime.datetime(2005, 5, 31, 14, 25, 55)], + }, + {"string_array": None, "timestamp_array": None, "datetime_array": None}, + ] + ) + + current_schema = ( + schema.SchemaField("string_array", field_type=None, mode="NULLABLE"), + schema.SchemaField("timestamp_array", field_type=None, mode="NULLABLE"), + schema.SchemaField("datetime_array", field_type=None, mode="NULLABLE"), + ) + + with warnings.catch_warnings(record=True) as warned: + augmented_schema = module_under_test.augment_schema(dataframe, current_schema) + + # there should be no relevant warnings + unwanted_warnings = [ + warning for warning in warned if "Pyarrow could not" in str(warning) + ] + assert not unwanted_warnings + + # the augmented schema must match the expected + expected_schema = ( + schema.SchemaField("string_array", field_type="STRING", mode="REPEATED"), + schema.SchemaField("timestamp_array", field_type="TIMESTAMP", mode="REPEATED"), + schema.SchemaField("datetime_array", field_type="DATETIME", mode="REPEATED"), + ) + + by_name = operator.attrgetter("name") + assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_augment_schema_type_detection_fails(module_under_test): dataframe = pandas.DataFrame( @@ -1310,6 +1403,30 @@ def test_augment_schema_type_detection_fails(module_under_test): assert "struct_field" in warning_msg and "struct_field_2" in warning_msg +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_augment_schema_type_detection_fails_array_data(module_under_test): + dataframe = pandas.DataFrame( + data=[{"all_none_array": [None, float("NaN")], "empty_array": []}] + ) + current_schema = [ + schema.SchemaField("all_none_array", field_type=None, mode="NULLABLE"), + schema.SchemaField("empty_array", field_type=None, mode="NULLABLE"), + ] + + with warnings.catch_warnings(record=True) as warned: + augmented_schema = module_under_test.augment_schema(dataframe, current_schema) + + assert augmented_schema is None + + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning) + ] + assert len(expected_warnings) == 1 + warning_msg = str(expected_warnings[0]) + assert "pyarrow" in warning_msg.lower() + assert "all_none_array" in warning_msg and "empty_array" in warning_msg + + def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): pandas = pytest.importorskip("pandas") diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8bd1fe1df..0adb004fd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7153,7 +7153,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("int_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "TIMESTAMP"), + SchemaField("dt_col", "DATETIME"), SchemaField("ts_col", "TIMESTAMP"), SchemaField("date_col", "DATE"), SchemaField("time_col", "TIME"), @@ -7660,7 +7660,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): SchemaField("int_as_float_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "TIMESTAMP"), + SchemaField("dt_col", "DATETIME"), SchemaField("ts_col", "TIMESTAMP"), SchemaField("string_col", "STRING"), SchemaField("bytes_col", "BYTES"), diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index e092b90ee..863ef1e5f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -529,6 +529,11 @@ def test___repr__(self): expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" self.assertEqual(repr(field1), expected) + def test___repr__type_not_set(self): + field1 = self._make_one("field1", field_type=None) + expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)" + self.assertEqual(repr(field1), expected) + def test___repr__evaluable_no_policy_tags(self): field = self._make_one("field1", "STRING", "REQUIRED", "Description") field_repr = repr(field) From dad555d9b304baf319ab0b34cb2c93510aa4f312 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Nov 2021 14:54:28 -0600 Subject: [PATCH 25/35] chore: release 3.0.0b1 (pre-release) See: https://www.python.org/dev/peps/pep-0440/#pre-releases --- google/cloud/bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 877ea53d8..a3c392423 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.30.1" +__version__ = "3.0.0b1" From 3b3ebff56004e061f58ecdd06fc9020423b890e2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 2 Dec 2021 14:15:15 -0600 Subject: [PATCH 26/35] feat: add `api_method` parameter to `Client.query` to select `INSERT` or `QUERY` API (#967) * feat: add `api_method` parameter to `Client.query` to select `insert` or `query` API Work in Progress. This commit only refactors to allow jobs.insert to be selected. Supporting jobs.query will require more transformations to QueryJobConfig, QueryJob, and RowIterator. * WIP: begin implementation of jobs.query usage * remove extra files * insert query with jobs.query * fix merge between job config and query request * add tests * update todo with thoughts on future perf update * clarify TODO comment * add placeholders for needed tests * add schema property * feat: add `QueryJob.schema` property for dry run queries * add more job properties * add tests for differences in API error behavior between jobs.query and jobs.insert * update docs to show differences * cover error conversion * restore missing modules * add unit tests * adjust query job construction * avoid conflicting table IDs * mock query response * fix unit test coverage * fix type errors * fix docs formatting * comments and additional unit tests --- google/cloud/bigquery/_job_helpers.py | 259 +++++++++++++++ google/cloud/bigquery/client.py | 95 ++---- google/cloud/bigquery/enums.py | 39 +++ tests/system/conftest.py | 71 +++- tests/system/test_client.py | 361 --------------------- tests/system/test_query.py | 450 +++++++++++++++++++++++++- tests/unit/test__job_helpers.py | 329 +++++++++++++++++++ tests/unit/test_client.py | 183 +++++++++-- 8 files changed, 1313 insertions(+), 474 deletions(-) create mode 100644 google/cloud/bigquery/_job_helpers.py create mode 100644 tests/unit/test__job_helpers.py diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py new file mode 100644 index 000000000..33fc72261 --- /dev/null +++ b/google/cloud/bigquery/_job_helpers.py @@ -0,0 +1,259 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helpers for interacting with the job REST APIs from the client.""" + +import copy +import uuid +from typing import Any, Dict, TYPE_CHECKING, Optional + +import google.api_core.exceptions as core_exceptions +from google.api_core import retry as retries + +from google.cloud.bigquery import job + +# Avoid circular imports +if TYPE_CHECKING: # pragma: NO COVER + from google.cloud.bigquery.client import Client + + +# The purpose of _TIMEOUT_BUFFER_MILLIS is to allow the server-side timeout to +# happen before the client-side timeout. This is not strictly neccessary, as the +# client retries client-side timeouts, but the hope by making the server-side +# timeout slightly shorter is that it can save the server from some unncessary +# processing time. +# +# 250 milliseconds is chosen arbitrarily, though should be about the right +# order of magnitude for network latency and switching delays. It is about the +# amount of time for light to circumnavigate the world twice. +_TIMEOUT_BUFFER_MILLIS = 250 + + +def make_job_id(job_id: Optional[str] = None, prefix: Optional[str] = None) -> str: + """Construct an ID for a new job. + + Args: + job_id: the user-provided job ID. + prefix: the user-provided prefix for a job ID. + + Returns: + str: A job ID + """ + if job_id is not None: + return job_id + elif prefix is not None: + return str(prefix) + str(uuid.uuid4()) + else: + return str(uuid.uuid4()) + + +def query_jobs_insert( + client: "Client", + query: str, + job_config: Optional[job.QueryJobConfig], + job_id: Optional[str], + job_id_prefix: Optional[str], + location: str, + project: str, + retry: retries.Retry, + timeout: Optional[float], + job_retry: retries.Retry, +) -> job.QueryJob: + """Initiate a query using jobs.insert. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + """ + job_id_given = job_id is not None + job_id_save = job_id + job_config_save = job_config + + def do_query(): + # Make a copy now, so that original doesn't get changed by the process + # below and to facilitate retry + job_config = copy.deepcopy(job_config_save) + + job_id = make_job_id(job_id_save, job_id_prefix) + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob(job_ref, query, client=client, job_config=job_config) + + try: + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = client.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job + else: + return query_job + + future = do_query() + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + if not job_id_given: + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future + + +def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any]: + """Transform from Job resource to QueryRequest resource. + + Most of the keys in job.configuration.query are in common with + QueryRequest. If any configuration property is set that is not available in + jobs.query, it will result in a server-side error. + """ + request_body = {} + job_config_resource = job_config.to_api_repr() if job_config else {} + query_config_resource = job_config_resource.get("query", {}) + + request_body.update(query_config_resource) + + # These keys are top level in job resource and query resource. + if "labels" in job_config_resource: + request_body["labels"] = job_config_resource["labels"] + if "dryRun" in job_config_resource: + request_body["dryRun"] = job_config_resource["dryRun"] + + # Default to standard SQL. + request_body.setdefault("useLegacySql", False) + + # Since jobs.query can return results, ensure we use the lossless timestamp + # format. See: https://github.com/googleapis/python-bigquery/issues/395 + request_body.setdefault("formatOptions", {}) + request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore + + return request_body + + +def _to_query_job( + client: "Client", + query: str, + request_config: Optional[job.QueryJobConfig], + query_response: Dict[str, Any], +) -> job.QueryJob: + job_ref_resource = query_response["jobReference"] + job_ref = job._JobReference._from_api_repr(job_ref_resource) + query_job = job.QueryJob(job_ref, query, client=client) + query_job._properties.setdefault("configuration", {}) + + # Not all relevant properties are in the jobs.query response. Populate some + # expected properties based on the job configuration. + if request_config is not None: + query_job._properties["configuration"].update(request_config.to_api_repr()) + + query_job._properties["configuration"].setdefault("query", {}) + query_job._properties["configuration"]["query"]["query"] = query + query_job._properties["configuration"]["query"].setdefault("useLegacySql", False) + + query_job._properties.setdefault("statistics", {}) + query_job._properties["statistics"].setdefault("query", {}) + query_job._properties["statistics"]["query"]["cacheHit"] = query_response.get( + "cacheHit" + ) + query_job._properties["statistics"]["query"]["schema"] = query_response.get( + "schema" + ) + query_job._properties["statistics"]["query"][ + "totalBytesProcessed" + ] = query_response.get("totalBytesProcessed") + + # Set errors if any were encountered. + query_job._properties.setdefault("status", {}) + if "errors" in query_response: + # Set errors but not errorResult. If there was an error that failed + # the job, jobs.query behaves like jobs.getQueryResults and returns a + # non-success HTTP status code. + errors = query_response["errors"] + query_job._properties["status"]["errors"] = errors + + # Transform job state so that QueryJob doesn't try to restart the query. + job_complete = query_response.get("jobComplete") + if job_complete: + query_job._properties["status"]["state"] = "DONE" + # TODO: https://github.com/googleapis/python-bigquery/issues/589 + # Set the first page of results if job is "complete" and there is + # only 1 page of results. Otherwise, use the existing logic that + # refreshes the job stats. + # + # This also requires updates to `to_dataframe` and the DB API connector + # so that they don't try to read from a destination table if all the + # results are present. + else: + query_job._properties["status"]["state"] = "PENDING" + + return query_job + + +def query_jobs_query( + client: "Client", + query: str, + job_config: Optional[job.QueryJobConfig], + location: str, + project: str, + retry: retries.Retry, + timeout: Optional[float], + job_retry: retries.Retry, +) -> job.QueryJob: + """Initiate a query using jobs.query. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + """ + path = f"/projects/{project}/queries" + request_body = _to_query_request(job_config) + + if timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS) + request_body["location"] = location + request_body["query"] = query + + def do_query(): + request_body["requestId"] = make_job_id() + span_attributes = {"path": path} + api_response = client._call_api( + retry, + span_name="BigQuery.query", + span_attributes=span_attributes, + method="POST", + path=path, + data=request_body, + timeout=timeout, + ) + return _to_query_job(client, query, job_config, api_response) + + future = do_query() + + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 6d94f5bbc..76ccafaf4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -60,6 +60,8 @@ DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, ) +from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none @@ -69,6 +71,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job @@ -3164,6 +3167,7 @@ def query( retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, + api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, ) -> job.QueryJob: """Run a SQL query. @@ -3215,6 +3219,11 @@ def query( called on the job returned. The ``job_retry`` specified here becomes the default ``job_retry`` for ``result()``, where it can also be specified. + api_method (Union[str, enums.QueryApiMethod]): + Method with which to start the query job. + + See :class:`google.cloud.bigquery.enums.QueryApiMethod` for + details on the difference between the query start methods. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. @@ -3238,7 +3247,10 @@ def query( " provided." ) - job_id_save = job_id + if job_id_given and api_method == enums.QueryApiMethod.QUERY: + raise TypeError( + "`job_id` was provided, but the 'QUERY' `api_method` was requested." + ) if project is None: project = self.project @@ -3269,50 +3281,25 @@ def query( # Note that we haven't modified the original job_config (or # _default_query_job_config) up to this point. - job_config_save = job_config - - def do_query(): - # Make a copy now, so that original doesn't get changed by the process - # below and to facilitate retry - job_config = copy.deepcopy(job_config_save) - - job_id = _make_job_id(job_id_save, job_id_prefix) - job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - - try: - query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.Conflict as create_exc: - # The thought is if someone is providing their own job IDs and they get - # their job ID generation wrong, this could end up returning results for - # the wrong query. We thus only try to recover if job ID was not given. - if job_id_given: - raise create_exc - - try: - query_job = self.get_job( - job_id, - project=project, - location=location, - retry=retry, - timeout=timeout, - ) - except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc - else: - return query_job - else: - return query_job - - future = do_query() - # The future might be in a failed state now, but if it's - # unrecoverable, we'll find out when we ask for it's result, at which - # point, we may retry. - if not job_id_given: - future._retry_do_query = do_query # in case we have to retry later - future._job_retry = job_retry - - return future + if api_method == enums.QueryApiMethod.QUERY: + return _job_helpers.query_jobs_query( + self, query, job_config, location, project, retry, timeout, job_retry, + ) + elif api_method == enums.QueryApiMethod.INSERT: + return _job_helpers.query_jobs_insert( + self, + query, + job_config, + job_id, + job_id_prefix, + location, + project, + retry, + timeout, + job_retry, + ) + else: + raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}") def insert_rows( self, @@ -3985,24 +3972,6 @@ def _extract_job_reference(job, project=None, location=None): return (project, location, job_id) -def _make_job_id(job_id: Optional[str], prefix: Optional[str] = None) -> str: - """Construct an ID for a new job. - - Args: - job_id: the user-provided job ID. - prefix: the user-provided prefix for a job ID. - - Returns: - str: A job ID - """ - if job_id is not None: - return job_id - elif prefix is not None: - return str(prefix) + str(uuid.uuid4()) - else: - return str(uuid.uuid4()) - - def _check_mode(stream): """Check that a stream was opened in read-binary mode. diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 8c24f71e7..c4a43126a 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -122,6 +122,45 @@ class QueryPriority(object): """Specifies batch priority.""" +class QueryApiMethod(str, enum.Enum): + """API method used to start the query. The default value is + :attr:`INSERT`. + """ + + INSERT = "INSERT" + """Submit a query job by using the `jobs.insert REST API method + `_. + + This supports all job configuration options. + """ + + QUERY = "QUERY" + """Submit a query job by using the `jobs.query REST API method + `_. + + Differences from ``INSERT``: + + * Many parameters and job configuration options, including job ID and + destination table, cannot be used + with this API method. See the `jobs.query REST API documentation + `_ for + the complete list of supported configuration options. + + * API blocks up to a specified timeout, waiting for the query to + finish. + + * The full job resource (including job statistics) may not be available. + Call :meth:`~google.cloud.bigquery.job.QueryJob.reload` or + :meth:`~google.cloud.bigquery.client.Client.get_job` to get full job + statistics and configuration. + + * :meth:`~google.cloud.bigquery.Client.query` can raise API exceptions if + the query fails, whereas the same errors don't appear until calling + :meth:`~google.cloud.bigquery.job.QueryJob.result` when the ``INSERT`` + API method is used. + """ + + class SchemaUpdateOption(object): """Specifies an update to the destination table schema as a side effect of a load job. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 7eec76a32..784a1dd5c 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -13,7 +13,9 @@ # limitations under the License. import pathlib +import random import re +from typing import Tuple import pytest import test_utils.prefixer @@ -26,6 +28,7 @@ prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") DATA_DIR = pathlib.Path(__file__).parent.parent / "data" +TOKYO_LOCATION = "asia-northeast1" @pytest.fixture(scope="session", autouse=True) @@ -62,6 +65,16 @@ def dataset_id(bigquery_client): bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) +@pytest.fixture(scope="session") +def dataset_id_tokyo(bigquery_client: bigquery.Client, project_id: str): + dataset_id = prefixer.create_prefix() + "_tokyo" + dataset = bigquery.Dataset(f"{project_id}.{dataset_id}") + dataset.location = TOKYO_LOCATION + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) + + @pytest.fixture() def dataset_client(bigquery_client, dataset_id): import google.cloud.bigquery.job @@ -78,38 +91,64 @@ def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" -@pytest.fixture(scope="session") -def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): +def load_scalars_table( + bigquery_client: bigquery.Client, + project_id: str, + dataset_id: str, + data_path: str = "scalars.jsonl", +) -> str: schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + table_id = data_path.replace(".", "_") + hex(random.randrange(1000000)) job_config = bigquery.LoadJobConfig() job_config.schema = schema job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON - full_table_id = f"{project_id}.{dataset_id}.scalars" - with open(DATA_DIR / "scalars.jsonl", "rb") as data_file: + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + with open(DATA_DIR / data_path, "rb") as data_file: job = bigquery_client.load_table_from_file( data_file, full_table_id, job_config=job_config ) job.result() + return full_table_id + + +@pytest.fixture(scope="session") +def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + full_table_id = load_scalars_table(bigquery_client, project_id, dataset_id) yield full_table_id - bigquery_client.delete_table(full_table_id) + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +@pytest.fixture(scope="session") +def scalars_table_tokyo( + bigquery_client: bigquery.Client, project_id: str, dataset_id_tokyo: str +): + full_table_id = load_scalars_table(bigquery_client, project_id, dataset_id_tokyo) + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) @pytest.fixture(scope="session") def scalars_extreme_table( bigquery_client: bigquery.Client, project_id: str, dataset_id: str ): - schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") - job_config = bigquery.LoadJobConfig() - job_config.schema = schema - job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON - full_table_id = f"{project_id}.{dataset_id}.scalars_extreme" - with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file: - job = bigquery_client.load_table_from_file( - data_file, full_table_id, job_config=job_config - ) - job.result() + full_table_id = load_scalars_table( + bigquery_client, project_id, dataset_id, data_path="scalars_extreme.jsonl" + ) yield full_table_id - bigquery_client.delete_table(full_table_id) + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +@pytest.fixture(scope="session", params=["US", TOKYO_LOCATION]) +def scalars_table_multi_location( + request, scalars_table: str, scalars_table_tokyo: str +) -> Tuple[str, str]: + if request.param == "US": + full_table_id = scalars_table + elif request.param == TOKYO_LOCATION: + full_table_id = scalars_table_tokyo + else: + raise ValueError(f"got unexpected location: {request.param}") + return request.param, full_table_id @pytest.fixture diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 8059f21db..8f28d5a8b 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -13,7 +13,6 @@ # limitations under the License. import base64 -import concurrent.futures import csv import datetime import decimal @@ -692,64 +691,6 @@ def _fetch_single_page(table, selected_fields=None): page = next(iterator.pages) return list(page) - def _create_table_many_columns(self, rowcount): - # Generate a table of maximum width via CREATE TABLE AS SELECT. - # first column is named 'rowval', and has a value from 1..rowcount - # Subsequent column is named col_ and contains the value N*rowval, - # where N is between 1 and 9999 inclusive. - dsname = _make_dataset_id("wide_schema") - dataset = self.temp_dataset(dsname) - table_id = "many_columns" - table_ref = dataset.table(table_id) - self.to_delete.insert(0, table_ref) - colprojections = ",".join( - ["r * {} as col_{}".format(n, n) for n in range(1, 10000)] - ) - sql = """ - CREATE TABLE {}.{} - AS - SELECT - r as rowval, - {} - FROM - UNNEST(GENERATE_ARRAY(1,{},1)) as r - """.format( - dsname, table_id, colprojections, rowcount - ) - query_job = Config.CLIENT.query(sql) - query_job.result() - self.assertEqual(query_job.statement_type, "CREATE_TABLE_AS_SELECT") - self.assertEqual(query_job.ddl_operation_performed, "CREATE") - self.assertEqual(query_job.ddl_target_table, table_ref) - - return table_ref - - def test_query_many_columns(self): - # Test working with the widest schema BigQuery supports, 10k columns. - row_count = 2 - table_ref = self._create_table_many_columns(row_count) - rows = list( - Config.CLIENT.query( - "SELECT * FROM `{}.{}`".format(table_ref.dataset_id, table_ref.table_id) - ) - ) - - self.assertEqual(len(rows), row_count) - - # check field representations adhere to expected values. - correctwidth = 0 - badvals = 0 - for r in rows: - vals = r._xxx_values - rowval = vals[0] - if len(vals) == 10000: - correctwidth = correctwidth + 1 - for n in range(1, 10000): - if vals[n] != rowval * (n): - badvals = badvals + 1 - self.assertEqual(correctwidth, row_count) - self.assertEqual(badvals, 0) - def test_insert_rows_then_dump_table(self): NOW_SECONDS = 1448911495.484366 NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC) @@ -1368,25 +1309,6 @@ def test_query_w_wrong_config(self): with self.assertRaises(Exception): Config.CLIENT.query(good_query, job_config=bad_config).result() - def test_query_w_timeout(self): - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = False - - query_job = Config.CLIENT.query( - "SELECT * FROM `bigquery-public-data.github_repos.commits`;", - job_id_prefix="test_query_w_timeout_", - location="US", - job_config=job_config, - ) - - with self.assertRaises(concurrent.futures.TimeoutError): - query_job.result(timeout=1) - - # Even though the query takes >1 second, the call to getQueryResults - # should succeed. - self.assertFalse(query_job.done(timeout=1)) - self.assertIsNotNone(Config.CLIENT.cancel_job(query_job)) - def test_query_w_page_size(self): page_size = 45 query_job = Config.CLIENT.query( @@ -1408,83 +1330,6 @@ def test_query_w_start_index(self): self.assertEqual(result1.extra_params["startIndex"], start_index) self.assertEqual(len(list(result1)), total_rows - start_index) - def test_query_statistics(self): - """ - A system test to exercise some of the extended query statistics. - - Note: We construct a query that should need at least three stages by - specifying a JOIN query. Exact plan and stats are effectively - non-deterministic, so we're largely interested in confirming values - are present. - """ - - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = False - - query_job = Config.CLIENT.query( - """ - SELECT - COUNT(1) - FROM - ( - SELECT - year, - wban_number - FROM `bigquery-public-data.samples.gsod` - LIMIT 1000 - ) lside - INNER JOIN - ( - SELECT - year, - state - FROM `bigquery-public-data.samples.natality` - LIMIT 1000 - ) rside - ON - lside.year = rside.year - """, - location="US", - job_config=job_config, - ) - - # run the job to completion - query_job.result() - - # Assert top-level stats - self.assertFalse(query_job.cache_hit) - self.assertIsNotNone(query_job.destination) - self.assertTrue(query_job.done) - self.assertFalse(query_job.dry_run) - self.assertIsNone(query_job.num_dml_affected_rows) - self.assertEqual(query_job.priority, "INTERACTIVE") - self.assertGreater(query_job.total_bytes_billed, 1) - self.assertGreater(query_job.total_bytes_processed, 1) - self.assertEqual(query_job.statement_type, "SELECT") - self.assertGreater(query_job.slot_millis, 1) - - # Make assertions on the shape of the query plan. - plan = query_job.query_plan - self.assertGreaterEqual(len(plan), 3) - first_stage = plan[0] - self.assertIsNotNone(first_stage.start) - self.assertIsNotNone(first_stage.end) - self.assertIsNotNone(first_stage.entry_id) - self.assertIsNotNone(first_stage.name) - self.assertGreater(first_stage.parallel_inputs, 0) - self.assertGreater(first_stage.completed_parallel_inputs, 0) - self.assertGreater(first_stage.shuffle_output_bytes, 0) - self.assertEqual(first_stage.status, "COMPLETE") - - # Query plan is a digraph. Ensure it has inter-stage links, - # but not every stage has inputs. - stages_with_inputs = 0 - for entry in plan: - if len(entry.input_stages) > 0: - stages_with_inputs = stages_with_inputs + 1 - self.assertGreater(stages_with_inputs, 0) - self.assertGreater(len(plan), stages_with_inputs) - def test_dml_statistics(self): table_schema = ( bigquery.SchemaField("foo", "STRING"), @@ -1774,212 +1619,6 @@ def test_dbapi_w_dml(self): ) self.assertEqual(Config.CURSOR.rowcount, 1) - def test_query_w_query_params(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ArrayQueryParameter - from google.cloud.bigquery.query import ScalarQueryParameter - from google.cloud.bigquery.query import ScalarQueryParameterType - from google.cloud.bigquery.query import StructQueryParameter - from google.cloud.bigquery.query import StructQueryParameterType - - question = "What is the answer to life, the universe, and everything?" - question_param = ScalarQueryParameter( - name="question", type_="STRING", value=question - ) - answer = 42 - answer_param = ScalarQueryParameter(name="answer", type_="INT64", value=answer) - pi = 3.1415926 - pi_param = ScalarQueryParameter(name="pi", type_="FLOAT64", value=pi) - pi_numeric = decimal.Decimal("3.141592654") - pi_numeric_param = ScalarQueryParameter( - name="pi_numeric_param", type_="NUMERIC", value=pi_numeric - ) - bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)) - bignum_param = ScalarQueryParameter( - name="bignum_param", type_="BIGNUMERIC", value=bignum - ) - truthy = True - truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) - beef = b"DEADBEEF" - beef_param = ScalarQueryParameter(name="beef", type_="BYTES", value=beef) - naive = datetime.datetime(2016, 12, 5, 12, 41, 9) - naive_param = ScalarQueryParameter(name="naive", type_="DATETIME", value=naive) - naive_date_param = ScalarQueryParameter( - name="naive_date", type_="DATE", value=naive.date() - ) - naive_time_param = ScalarQueryParameter( - name="naive_time", type_="TIME", value=naive.time() - ) - zoned = naive.replace(tzinfo=UTC) - zoned_param = ScalarQueryParameter(name="zoned", type_="TIMESTAMP", value=zoned) - array_param = ArrayQueryParameter( - name="array_param", array_type="INT64", values=[1, 2] - ) - struct_param = StructQueryParameter("hitchhiker", question_param, answer_param) - phred_name = "Phred Phlyntstone" - phred_name_param = ScalarQueryParameter( - name="name", type_="STRING", value=phred_name - ) - phred_age = 32 - phred_age_param = ScalarQueryParameter( - name="age", type_="INT64", value=phred_age - ) - phred_param = StructQueryParameter(None, phred_name_param, phred_age_param) - bharney_name = "Bharney Rhubbyl" - bharney_name_param = ScalarQueryParameter( - name="name", type_="STRING", value=bharney_name - ) - bharney_age = 31 - bharney_age_param = ScalarQueryParameter( - name="age", type_="INT64", value=bharney_age - ) - bharney_param = StructQueryParameter( - None, bharney_name_param, bharney_age_param - ) - characters_param = ArrayQueryParameter( - name=None, array_type="RECORD", values=[phred_param, bharney_param] - ) - empty_struct_array_param = ArrayQueryParameter( - name="empty_array_param", - values=[], - array_type=StructQueryParameterType( - ScalarQueryParameterType(name="foo", type_="INT64"), - ScalarQueryParameterType(name="bar", type_="STRING"), - ), - ) - hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) - sidekick_param = StructQueryParameter( - "sidekick", bharney_name_param, bharney_age_param - ) - roles_param = StructQueryParameter("roles", hero_param, sidekick_param) - friends_param = ArrayQueryParameter( - name="friends", array_type="STRING", values=[phred_name, bharney_name] - ) - with_friends_param = StructQueryParameter(None, friends_param) - top_left_param = StructQueryParameter( - "top_left", - ScalarQueryParameter("x", "INT64", 12), - ScalarQueryParameter("y", "INT64", 102), - ) - bottom_right_param = StructQueryParameter( - "bottom_right", - ScalarQueryParameter("x", "INT64", 22), - ScalarQueryParameter("y", "INT64", 92), - ) - rectangle_param = StructQueryParameter( - "rectangle", top_left_param, bottom_right_param - ) - examples = [ - { - "sql": "SELECT @question", - "expected": question, - "query_parameters": [question_param], - }, - { - "sql": "SELECT @answer", - "expected": answer, - "query_parameters": [answer_param], - }, - {"sql": "SELECT @pi", "expected": pi, "query_parameters": [pi_param]}, - { - "sql": "SELECT @pi_numeric_param", - "expected": pi_numeric, - "query_parameters": [pi_numeric_param], - }, - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - }, - { - "sql": "SELECT @truthy", - "expected": truthy, - "query_parameters": [truthy_param], - }, - {"sql": "SELECT @beef", "expected": beef, "query_parameters": [beef_param]}, - { - "sql": "SELECT @naive", - "expected": naive, - "query_parameters": [naive_param], - }, - { - "sql": "SELECT @naive_date", - "expected": naive.date(), - "query_parameters": [naive_date_param], - }, - { - "sql": "SELECT @naive_time", - "expected": naive.time(), - "query_parameters": [naive_time_param], - }, - { - "sql": "SELECT @zoned", - "expected": zoned, - "query_parameters": [zoned_param], - }, - { - "sql": "SELECT @array_param", - "expected": [1, 2], - "query_parameters": [array_param], - }, - { - "sql": "SELECT (@hitchhiker.question, @hitchhiker.answer)", - "expected": ({"_field_1": question, "_field_2": answer}), - "query_parameters": [struct_param], - }, - { - "sql": "SELECT " - "((@rectangle.bottom_right.x - @rectangle.top_left.x) " - "* (@rectangle.top_left.y - @rectangle.bottom_right.y))", - "expected": 100, - "query_parameters": [rectangle_param], - }, - { - "sql": "SELECT ?", - "expected": [ - {"name": phred_name, "age": phred_age}, - {"name": bharney_name, "age": bharney_age}, - ], - "query_parameters": [characters_param], - }, - { - "sql": "SELECT @empty_array_param", - "expected": [], - "query_parameters": [empty_struct_array_param], - }, - { - "sql": "SELECT @roles", - "expected": { - "hero": {"name": phred_name, "age": phred_age}, - "sidekick": {"name": bharney_name, "age": bharney_age}, - }, - "query_parameters": [roles_param], - }, - { - "sql": "SELECT ?", - "expected": {"friends": [phred_name, bharney_name]}, - "query_parameters": [with_friends_param], - }, - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - }, - ] - - for example in examples: - jconfig = QueryJobConfig() - jconfig.query_parameters = example["query_parameters"] - query_job = Config.CLIENT.query( - example["sql"], - job_config=jconfig, - job_id_prefix="test_query_w_query_params", - ) - rows = list(query_job.result()) - self.assertEqual(len(rows), 1) - self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example["expected"]) - def test_dbapi_w_query_parameters(self): examples = [ { diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 649120a7e..f76b1e6ca 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -12,15 +12,430 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures +import datetime +import decimal +from typing import Tuple + +from google.api_core import exceptions +import pytest + from google.cloud import bigquery +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameterType +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import StructQueryParameterType + + +@pytest.fixture(params=["INSERT", "QUERY"]) +def query_api_method(request): + return request.param -def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str): +@pytest.fixture(scope="session") +def table_with_9999_columns_10_rows(bigquery_client, project_id, dataset_id): + """Generate a table of maximum width via CREATE TABLE AS SELECT. + + The first column is named 'rowval', and has a value from 1..rowcount + Subsequent columns are named col_ and contain the value N*rowval, where + N is between 1 and 9999 inclusive. + """ + table_id = "many_columns" + row_count = 10 + col_projections = ",".join(f"r * {n} as col_{n}" for n in range(1, 10000)) + sql = f""" + CREATE TABLE `{project_id}.{dataset_id}.{table_id}` + AS + SELECT + r as rowval, + {col_projections} + FROM + UNNEST(GENERATE_ARRAY(1,{row_count},1)) as r + """ + query_job = bigquery_client.query(sql) + query_job.result() + + return f"{project_id}.{dataset_id}.{table_id}" + + +def test_query_many_columns( + bigquery_client, table_with_9999_columns_10_rows, query_api_method +): + # Test working with the widest schema BigQuery supports, 10k columns. + query_job = bigquery_client.query( + f"SELECT * FROM `{table_with_9999_columns_10_rows}`", + api_method=query_api_method, + ) + rows = list(query_job) + assert len(rows) == 10 + + # check field representations adhere to expected values. + for row in rows: + rowval = row["rowval"] + for column in range(1, 10000): + assert row[f"col_{column}"] == rowval * column + + +def test_query_w_timeout(bigquery_client, query_api_method): + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + + query_job = bigquery_client.query( + "SELECT * FROM `bigquery-public-data.github_repos.commits`;", + location="US", + job_config=job_config, + api_method=query_api_method, + ) + + with pytest.raises(concurrent.futures.TimeoutError): + query_job.result(timeout=1) + + # Even though the query takes >1 second, the call to getQueryResults + # should succeed. + assert not query_job.done(timeout=1) + assert bigquery_client.cancel_job(query_job) is not None + + +def test_query_statistics(bigquery_client, query_api_method): + """ + A system test to exercise some of the extended query statistics. + + Note: We construct a query that should need at least three stages by + specifying a JOIN query. Exact plan and stats are effectively + non-deterministic, so we're largely interested in confirming values + are present. + """ + + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + + query_job = bigquery_client.query( + """ + SELECT + COUNT(1) + FROM + ( + SELECT + year, + wban_number + FROM `bigquery-public-data.samples.gsod` + LIMIT 1000 + ) lside + INNER JOIN + ( + SELECT + year, + state + FROM `bigquery-public-data.samples.natality` + LIMIT 1000 + ) rside + ON + lside.year = rside.year + """, + location="US", + job_config=job_config, + api_method=query_api_method, + ) + + # run the job to completion + query_job.result() + + # Must reload job to get stats if jobs.query was used. + if query_api_method == "QUERY": + query_job.reload() + + # Assert top-level stats + assert not query_job.cache_hit + assert query_job.destination is not None + assert query_job.done + assert not query_job.dry_run + assert query_job.num_dml_affected_rows is None + assert query_job.priority == "INTERACTIVE" + assert query_job.total_bytes_billed > 1 + assert query_job.total_bytes_processed > 1 + assert query_job.statement_type == "SELECT" + assert query_job.slot_millis > 1 + + # Make assertions on the shape of the query plan. + plan = query_job.query_plan + assert len(plan) >= 3 + first_stage = plan[0] + assert first_stage.start is not None + assert first_stage.end is not None + assert first_stage.entry_id is not None + assert first_stage.name is not None + assert first_stage.parallel_inputs > 0 + assert first_stage.completed_parallel_inputs > 0 + assert first_stage.shuffle_output_bytes > 0 + assert first_stage.status == "COMPLETE" + + # Query plan is a digraph. Ensure it has inter-stage links, + # but not every stage has inputs. + stages_with_inputs = 0 + for entry in plan: + if len(entry.input_stages) > 0: + stages_with_inputs = stages_with_inputs + 1 + assert stages_with_inputs > 0 + assert len(plan) > stages_with_inputs + + +@pytest.mark.parametrize( + ("sql", "expected", "query_parameters"), + ( + ( + "SELECT @question", + "What is the answer to life, the universe, and everything?", + [ + ScalarQueryParameter( + name="question", + type_="STRING", + value="What is the answer to life, the universe, and everything?", + ) + ], + ), + ( + "SELECT @answer", + 42, + [ScalarQueryParameter(name="answer", type_="INT64", value=42)], + ), + ( + "SELECT @pi", + 3.1415926, + [ScalarQueryParameter(name="pi", type_="FLOAT64", value=3.1415926)], + ), + ( + "SELECT @pi_numeric_param", + decimal.Decimal("3.141592654"), + [ + ScalarQueryParameter( + name="pi_numeric_param", + type_="NUMERIC", + value=decimal.Decimal("3.141592654"), + ) + ], + ), + ( + "SELECT @bignum_param", + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + [ + ScalarQueryParameter( + name="bignum_param", + type_="BIGNUMERIC", + value=decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + ) + ], + ), + ( + "SELECT @truthy", + True, + [ScalarQueryParameter(name="truthy", type_="BOOL", value=True)], + ), + ( + "SELECT @beef", + b"DEADBEEF", + [ScalarQueryParameter(name="beef", type_="BYTES", value=b"DEADBEEF")], + ), + ( + "SELECT @naive", + datetime.datetime(2016, 12, 5, 12, 41, 9), + [ + ScalarQueryParameter( + name="naive", + type_="DATETIME", + value=datetime.datetime(2016, 12, 5, 12, 41, 9), + ) + ], + ), + ( + "SELECT @naive_date", + datetime.date(2016, 12, 5), + [ + ScalarQueryParameter( + name="naive_date", type_="DATE", value=datetime.date(2016, 12, 5) + ) + ], + ), + ( + "SELECT @naive_time", + datetime.time(12, 41, 9, 62500), + [ + ScalarQueryParameter( + name="naive_time", + type_="TIME", + value=datetime.time(12, 41, 9, 62500), + ) + ], + ), + ( + "SELECT @zoned", + datetime.datetime(2016, 12, 5, 12, 41, 9, tzinfo=datetime.timezone.utc), + [ + ScalarQueryParameter( + name="zoned", + type_="TIMESTAMP", + value=datetime.datetime( + 2016, 12, 5, 12, 41, 9, tzinfo=datetime.timezone.utc + ), + ) + ], + ), + ( + "SELECT @array_param", + [1, 2], + [ + ArrayQueryParameter( + name="array_param", array_type="INT64", values=[1, 2] + ) + ], + ), + ( + "SELECT (@hitchhiker.question, @hitchhiker.answer)", + ({"_field_1": "What is the answer?", "_field_2": 42}), + [ + StructQueryParameter( + "hitchhiker", + ScalarQueryParameter( + name="question", type_="STRING", value="What is the answer?", + ), + ScalarQueryParameter(name="answer", type_="INT64", value=42,), + ), + ], + ), + ( + "SELECT " + "((@rectangle.bottom_right.x - @rectangle.top_left.x) " + "* (@rectangle.top_left.y - @rectangle.bottom_right.y))", + 100, + [ + StructQueryParameter( + "rectangle", + StructQueryParameter( + "top_left", + ScalarQueryParameter("x", "INT64", 12), + ScalarQueryParameter("y", "INT64", 102), + ), + StructQueryParameter( + "bottom_right", + ScalarQueryParameter("x", "INT64", 22), + ScalarQueryParameter("y", "INT64", 92), + ), + ) + ], + ), + ( + "SELECT ?", + [ + {"name": "Phred Phlyntstone", "age": 32}, + {"name": "Bharney Rhubbyl", "age": 31}, + ], + [ + ArrayQueryParameter( + name=None, + array_type="RECORD", + values=[ + StructQueryParameter( + None, + ScalarQueryParameter( + name="name", type_="STRING", value="Phred Phlyntstone" + ), + ScalarQueryParameter(name="age", type_="INT64", value=32), + ), + StructQueryParameter( + None, + ScalarQueryParameter( + name="name", type_="STRING", value="Bharney Rhubbyl" + ), + ScalarQueryParameter(name="age", type_="INT64", value=31), + ), + ], + ) + ], + ), + ( + "SELECT @empty_array_param", + [], + [ + ArrayQueryParameter( + name="empty_array_param", + values=[], + array_type=StructQueryParameterType( + ScalarQueryParameterType(name="foo", type_="INT64"), + ScalarQueryParameterType(name="bar", type_="STRING"), + ), + ) + ], + ), + ( + "SELECT @roles", + { + "hero": {"name": "Phred Phlyntstone", "age": 32}, + "sidekick": {"name": "Bharney Rhubbyl", "age": 31}, + }, + [ + StructQueryParameter( + "roles", + StructQueryParameter( + "hero", + ScalarQueryParameter( + name="name", type_="STRING", value="Phred Phlyntstone" + ), + ScalarQueryParameter(name="age", type_="INT64", value=32), + ), + StructQueryParameter( + "sidekick", + ScalarQueryParameter( + name="name", type_="STRING", value="Bharney Rhubbyl" + ), + ScalarQueryParameter(name="age", type_="INT64", value=31), + ), + ), + ], + ), + ( + "SELECT ?", + {"friends": ["Jack", "Jill"]}, + [ + StructQueryParameter( + None, + ArrayQueryParameter( + name="friends", array_type="STRING", values=["Jack", "Jill"] + ), + ) + ], + ), + ), +) +def test_query_parameters( + bigquery_client, query_api_method, sql, expected, query_parameters +): + jconfig = bigquery.QueryJobConfig() + jconfig.query_parameters = query_parameters + query_job = bigquery_client.query( + sql, job_config=jconfig, api_method=query_api_method, + ) + rows = list(query_job.result()) + assert len(rows) == 1 + assert len(rows[0]) == 1 + assert rows[0][0] == expected + + +def test_dry_run( + bigquery_client: bigquery.Client, + query_api_method: str, + scalars_table_multi_location: Tuple[str, str], +): + location, full_table_id = scalars_table_multi_location query_config = bigquery.QueryJobConfig() query_config.dry_run = True - query_string = f"SELECT * FROM {scalars_table}" - query_job = bigquery_client.query(query_string, job_config=query_config,) + query_string = f"SELECT * FROM {full_table_id}" + query_job = bigquery_client.query( + query_string, + location=location, + job_config=query_config, + api_method=query_api_method, + ) # Note: `query_job.result()` is not necessary on a dry run query. All # necessary information is returned in the initial response. @@ -29,7 +444,30 @@ def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str): assert len(query_job.schema) > 0 -def test_session(bigquery_client: bigquery.Client): +def test_query_error_w_api_method_query(bigquery_client: bigquery.Client): + """No job is returned from jobs.query if the query fails.""" + + with pytest.raises(exceptions.NotFound, match="not_a_real_dataset"): + bigquery_client.query( + "SELECT * FROM not_a_real_dataset.doesnt_exist", api_method="QUERY" + ) + + +def test_query_error_w_api_method_default(bigquery_client: bigquery.Client): + """Test that an exception is not thrown until fetching the results. + + For backwards compatibility, jobs.insert is the default API method. With + jobs.insert, a failed query job is "sucessfully" created. An exception is + thrown when fetching the results. + """ + + query_job = bigquery_client.query("SELECT * FROM not_a_real_dataset.doesnt_exist") + + with pytest.raises(exceptions.NotFound, match="not_a_real_dataset"): + query_job.result() + + +def test_session(bigquery_client: bigquery.Client, query_api_method: str): initial_config = bigquery.QueryJobConfig() initial_config.create_session = True initial_query = """ @@ -37,7 +475,9 @@ def test_session(bigquery_client: bigquery.Client): AS SELECT * FROM UNNEST([1, 2, 3, 4, 5]) AS id; """ - initial_job = bigquery_client.query(initial_query, job_config=initial_config) + initial_job = bigquery_client.query( + initial_query, job_config=initial_config, api_method=query_api_method + ) initial_job.result() session_id = initial_job.session_info.session_id assert session_id is not None diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py new file mode 100644 index 000000000..63dde75e7 --- /dev/null +++ b/tests/unit/test__job_helpers.py @@ -0,0 +1,329 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, Optional +from unittest import mock + +from google.api_core import retry as retries +import pytest + +from google.cloud.bigquery.client import Client +from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery.job.query import QueryJob, QueryJobConfig +from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter + + +def make_query_request(additional_properties: Optional[Dict[str, Any]] = None): + request = {"useLegacySql": False, "formatOptions": {"useInt64Timestamp": True}} + if additional_properties is not None: + request.update(additional_properties) + return request + + +def make_query_response( + completed: bool = False, + job_id: str = "abcd-efg-hijk-lmnop", + location="US", + project_id="test-project", + errors=None, +) -> Dict[str, Any]: + response = { + "jobReference": { + "projectId": project_id, + "jobId": job_id, + "location": location, + }, + "jobComplete": completed, + } + if errors is not None: + response["errors"] = errors + return response + + +@pytest.mark.parametrize( + ("job_config", "expected"), + ( + (None, make_query_request()), + (QueryJobConfig(), make_query_request()), + ( + QueryJobConfig(default_dataset="my-project.my_dataset"), + make_query_request( + { + "defaultDataset": { + "projectId": "my-project", + "datasetId": "my_dataset", + } + } + ), + ), + (QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), + ( + QueryJobConfig(use_query_cache=False), + make_query_request({"useQueryCache": False}), + ), + ( + QueryJobConfig(use_legacy_sql=True), + make_query_request({"useLegacySql": True}), + ), + ( + QueryJobConfig( + query_parameters=[ + ScalarQueryParameter("named_param1", "STRING", "param-value"), + ScalarQueryParameter("named_param2", "INT64", 123), + ] + ), + make_query_request( + { + "parameterMode": "NAMED", + "queryParameters": [ + { + "name": "named_param1", + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "param-value"}, + }, + { + "name": "named_param2", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + }, + ], + } + ), + ), + ( + QueryJobConfig( + query_parameters=[ + ScalarQueryParameter(None, "STRING", "param-value"), + ScalarQueryParameter(None, "INT64", 123), + ] + ), + make_query_request( + { + "parameterMode": "POSITIONAL", + "queryParameters": [ + { + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "param-value"}, + }, + { + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + }, + ], + } + ), + ), + ( + QueryJobConfig( + connection_properties=[ + ConnectionProperty(key="time_zone", value="America/Chicago"), + ConnectionProperty(key="session_id", value="abcd-efgh-ijkl-mnop"), + ] + ), + make_query_request( + { + "connectionProperties": [ + {"key": "time_zone", "value": "America/Chicago"}, + {"key": "session_id", "value": "abcd-efgh-ijkl-mnop"}, + ] + } + ), + ), + ( + QueryJobConfig(labels={"abc": "def"}), + make_query_request({"labels": {"abc": "def"}}), + ), + ( + QueryJobConfig(maximum_bytes_billed=987654), + make_query_request({"maximumBytesBilled": "987654"}), + ), + ), +) +def test__to_query_request(job_config, expected): + result = _job_helpers._to_query_request(job_config) + assert result == expected + + +def test__to_query_job_defaults(): + mock_client = mock.create_autospec(Client) + response = make_query_response( + job_id="test-job", project_id="some-project", location="asia-northeast1" + ) + job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + assert job.query == "query-str" + assert job._client is mock_client + assert job.job_id == "test-job" + assert job.project == "some-project" + assert job.location == "asia-northeast1" + assert job.error_result is None + assert job.errors is None + + +def test__to_query_job_dry_run(): + mock_client = mock.create_autospec(Client) + response = make_query_response( + job_id="test-job", project_id="some-project", location="asia-northeast1" + ) + job_config: QueryJobConfig = QueryJobConfig() + job_config.dry_run = True + job: QueryJob = _job_helpers._to_query_job( + mock_client, "query-str", job_config, response + ) + assert job.dry_run is True + + +@pytest.mark.parametrize( + ("completed", "expected_state"), ((True, "DONE"), (False, "PENDING"),), +) +def test__to_query_job_sets_state(completed, expected_state): + mock_client = mock.create_autospec(Client) + response = make_query_response(completed=completed) + job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + assert job.state == expected_state + + +def test__to_query_job_sets_errors(): + mock_client = mock.create_autospec(Client) + response = make_query_response( + errors=[ + # https://cloud.google.com/bigquery/docs/reference/rest/v2/ErrorProto + {"reason": "backendError", "message": "something went wrong"}, + {"message": "something else went wrong"}, + ] + ) + job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response) + assert len(job.errors) == 2 + # If we got back a response instead of an HTTP error status code, most + # likely the job didn't completely fail. + assert job.error_result is None + + +def test_query_jobs_query_defaults(): + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": { + "projectId": "test-project", + "jobId": "abc", + "location": "asia-northeast1", + } + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "asia-northeast1", + "test-project", + mock_retry, + None, + mock_job_retry, + ) + + assert mock_client._call_api.call_count == 1 + call_args, call_kwargs = mock_client._call_api.call_args + assert call_args[0] is mock_retry + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + assert call_kwargs["path"] == "/projects/test-project/queries" + assert call_kwargs["method"] == "POST" + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + request = call_kwargs["data"] + assert request["requestId"] is not None + assert request["query"] == "SELECT * FROM test" + assert request["location"] == "asia-northeast1" + assert request["formatOptions"]["useInt64Timestamp"] is True + assert "timeoutMs" not in request + + +def test_query_jobs_query_sets_format_options(): + """Since jobs.query can return results, ensure we use the lossless + timestamp format. + + See: https://github.com/googleapis/python-bigquery/issues/395 + """ + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": {"projectId": "test-project", "jobId": "abc", "location": "US"} + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "US", + "test-project", + mock_retry, + None, + mock_job_retry, + ) + + assert mock_client._call_api.call_count == 1 + _, call_kwargs = mock_client._call_api.call_args + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + request = call_kwargs["data"] + assert request["formatOptions"]["useInt64Timestamp"] is True + + +@pytest.mark.parametrize( + ("timeout", "expected_timeout"), + ((-1, 0), (0, 0), (1, 1000 - _job_helpers._TIMEOUT_BUFFER_MILLIS),), +) +def test_query_jobs_query_sets_timeout(timeout, expected_timeout): + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": {"projectId": "test-project", "jobId": "abc", "location": "US"} + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "US", + "test-project", + mock_retry, + timeout, + mock_job_retry, + ) + + assert mock_client._call_api.call_count == 1 + _, call_kwargs = mock_client._call_api.call_args + # See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest + request = call_kwargs["data"] + assert request["timeoutMs"] == expected_timeout + + +def test_make_job_id_wo_suffix(): + job_id = _job_helpers.make_job_id("job_id") + assert job_id == "job_id" + + +def test_make_job_id_w_suffix(): + with mock.patch("uuid.uuid4", side_effect=["212345"]): + job_id = _job_helpers.make_job_id(None, prefix="job_id") + + assert job_id == "job_id212345" + + +def test_make_job_id_random(): + with mock.patch("uuid.uuid4", side_effect=["212345"]): + job_id = _job_helpers.make_job_id(None) + + assert job_id == "212345" + + +def test_make_job_id_w_job_id_overrides_prefix(): + job_id = _job_helpers.make_job_id("job_id", prefix="unused_prefix") + assert job_id == "job_id" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 0adb004fd..8ebf5137e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4016,6 +4016,160 @@ def test_query_defaults(self): self.assertEqual(sent_config["query"], QUERY) self.assertFalse(sent_config["useLegacySql"]) + def test_query_w_api_method_query(self): + query = "select count(*) from persons" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + job = client.query(query, location="EU", api_method="QUERY") + + self.assertEqual(job.query, query) + self.assertEqual(job.job_id, "abcd") + self.assertEqual(job.location, "EU") + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "useLegacySql": False, + "location": "EU", + "formatOptions": {"useInt64Timestamp": True}, + "requestId": mock.ANY, + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_w_api_method_query_legacy_sql(self): + from google.cloud.bigquery import QueryJobConfig + + query = "select count(*) from persons" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + job_config = QueryJobConfig() + job_config.use_legacy_sql = True + job_config.maximum_bytes_billed = 100 + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + job = client.query( + query, location="EU", job_config=job_config, api_method="QUERY" + ) + + self.assertEqual(job.query, query) + self.assertEqual(job.job_id, "abcd") + self.assertEqual(job.location, "EU") + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "useLegacySql": True, + "location": "EU", + "formatOptions": {"useInt64Timestamp": True}, + "requestId": mock.ANY, + "maximumBytesBilled": "100", + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_w_api_method_query_parameters(self): + from google.cloud.bigquery import QueryJobConfig, ScalarQueryParameter + + query = "select count(*) from persons" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.query_parameters = [ScalarQueryParameter("param1", "INTEGER", 123)] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + job = client.query( + query, location="EU", job_config=job_config, api_method="QUERY" + ) + + self.assertEqual(job.query, query) + self.assertEqual(job.job_id, "abcd") + self.assertEqual(job.location, "EU") + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "dryRun": True, + "useLegacySql": False, + "location": "EU", + "formatOptions": {"useInt64Timestamp": True}, + "requestId": mock.ANY, + "parameterMode": "NAMED", + "queryParameters": [ + { + "name": "param1", + "parameterType": {"type": "INTEGER"}, + "parameterValue": {"value": "123"}, + }, + ], + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_w_api_method_query_and_job_id_fails(self): + query = "select count(*) from persons" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection({}) + + with self.assertRaises(TypeError) as exc: + client.query(query, job_id="abcd", api_method="QUERY") + self.assertIn( + "`job_id` was provided, but the 'QUERY' `api_method` was requested", + exc.exception.args[0], + ) + + def test_query_w_api_method_unknown(self): + query = "select count(*) from persons" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection({}) + + with self.assertRaises(ValueError) as exc: + client.query(query, api_method="UNKNOWN") + self.assertIn("Got unexpected value for api_method: ", exc.exception.args[0]) + def test_query_w_explicit_timeout(self): query = "select count(*) from persons" resource = { @@ -6213,35 +6367,6 @@ def test_context_manager_exit_closes_client(self): fake_close.assert_called_once() -class Test_make_job_id(unittest.TestCase): - def _call_fut(self, job_id, prefix=None): - from google.cloud.bigquery.client import _make_job_id - - return _make_job_id(job_id, prefix=prefix) - - def test__make_job_id_wo_suffix(self): - job_id = self._call_fut("job_id") - - self.assertEqual(job_id, "job_id") - - def test__make_job_id_w_suffix(self): - with mock.patch("uuid.uuid4", side_effect=["212345"]): - job_id = self._call_fut(None, prefix="job_id") - - self.assertEqual(job_id, "job_id212345") - - def test__make_random_job_id(self): - with mock.patch("uuid.uuid4", side_effect=["212345"]): - job_id = self._call_fut(None) - - self.assertEqual(job_id, "212345") - - def test__make_job_id_w_job_id_overrides_prefix(self): - job_id = self._call_fut("job_id", prefix="unused_prefix") - - self.assertEqual(job_id, "job_id") - - class TestClientUpload(object): # NOTE: This is a "partner" to `TestClient` meant to test some of the # "load_table_from_file" portions of `Client`. It also uses From 7e3721ed5ea9bac2238fed795304ca5ff809791c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 14 Dec 2021 17:00:00 +0100 Subject: [PATCH 27/35] fix: improve type annotations for mypy validation (#1081) docs: add mypy-valid type annotations to samples --- .github/.OwlBot.lock.yaml | 2 +- CHANGELOG.md | 18 ++ google/cloud/bigquery/_pandas_helpers.py | 8 +- google/cloud/bigquery/client.py | 192 +++++++++++------- google/cloud/bigquery/dataset.py | 6 +- .../bigquery/encryption_configuration.py | 2 +- google/cloud/bigquery/external_config.py | 8 +- google/cloud/bigquery/job/copy_.py | 2 +- google/cloud/bigquery/job/load.py | 2 +- google/cloud/bigquery/job/query.py | 4 +- google/cloud/bigquery/magics/magics.py | 23 +++ google/cloud/bigquery/query.py | 11 +- google/cloud/bigquery/routine/routine.py | 10 +- google/cloud/bigquery/table.py | 76 ++++++- noxfile.py | 23 +++ samples/AUTHORING_GUIDE.md | 2 +- samples/CONTRIBUTING.md | 2 +- samples/add_empty_column.py | 2 +- samples/browse_table_data.py | 14 +- samples/client_list_jobs.py | 2 +- samples/client_load_partitioned_table.py | 2 +- samples/client_query.py | 2 +- samples/client_query_add_column.py | 2 +- samples/client_query_batch.py | 16 +- samples/client_query_destination_table.py | 2 +- ...lient_query_destination_table_clustered.py | 2 +- .../client_query_destination_table_cmek.py | 2 +- .../client_query_destination_table_legacy.py | 2 +- samples/client_query_dry_run.py | 7 +- samples/client_query_legacy_sql.py | 2 +- samples/client_query_relax_column.py | 2 +- samples/client_query_w_array_params.py | 2 +- samples/client_query_w_named_params.py | 2 +- samples/client_query_w_positional_params.py | 2 +- samples/client_query_w_struct_params.py | 2 +- samples/client_query_w_timestamp_params.py | 2 +- samples/copy_table.py | 2 +- samples/copy_table_cmek.py | 2 +- samples/copy_table_multiple_source.py | 4 +- samples/create_dataset.py | 2 +- samples/create_job.py | 7 +- samples/create_routine.py | 7 +- samples/create_routine_ddl.py | 2 +- samples/create_table.py | 2 +- samples/create_table_clustered.py | 7 +- samples/create_table_range_partitioned.py | 7 +- samples/dataset_exists.py | 2 +- samples/delete_dataset.py | 2 +- samples/delete_dataset_labels.py | 7 +- samples/delete_model.py | 2 +- samples/delete_routine.py | 2 +- samples/delete_table.py | 2 +- samples/download_public_data.py | 2 +- samples/download_public_data_sandbox.py | 2 +- samples/geography/conftest.py | 13 +- samples/geography/insert_geojson.py | 10 +- samples/geography/insert_geojson_test.py | 2 +- samples/geography/insert_wkt.py | 10 +- samples/geography/insert_wkt_test.py | 2 +- samples/geography/mypy.ini | 8 + samples/geography/to_geodataframe.py | 10 +- samples/geography/to_geodataframe_test.py | 2 +- samples/get_dataset.py | 2 +- samples/get_dataset_labels.py | 2 +- samples/get_model.py | 2 +- samples/get_routine.py | 7 +- samples/get_table.py | 2 +- samples/label_dataset.py | 2 +- samples/list_datasets.py | 2 +- samples/list_datasets_by_label.py | 2 +- samples/list_models.py | 2 +- samples/list_routines.py | 2 +- samples/list_tables.py | 2 +- samples/load_table_clustered.py | 7 +- samples/load_table_dataframe.py | 7 +- samples/load_table_file.py | 7 +- samples/load_table_uri_autodetect_csv.py | 2 +- samples/load_table_uri_autodetect_json.py | 2 +- samples/load_table_uri_avro.py | 2 +- samples/load_table_uri_cmek.py | 2 +- samples/load_table_uri_csv.py | 2 +- samples/load_table_uri_json.py | 2 +- samples/load_table_uri_orc.py | 2 +- samples/load_table_uri_parquet.py | 2 +- samples/load_table_uri_truncate_avro.py | 2 +- samples/load_table_uri_truncate_csv.py | 2 +- samples/load_table_uri_truncate_json.py | 2 +- samples/load_table_uri_truncate_orc.py | 2 +- samples/load_table_uri_truncate_parquet.py | 2 +- samples/magics/_helpers.py | 2 +- samples/magics/conftest.py | 12 +- samples/magics/mypy.ini | 8 + samples/magics/query.py | 7 +- samples/magics/query_params_scalars.py | 7 +- samples/magics/query_params_scalars_test.py | 2 +- samples/magics/query_test.py | 2 +- samples/magics/requirements.txt | 1 + samples/mypy.ini | 12 ++ samples/query_external_gcs_temporary_table.py | 6 +- .../query_external_sheets_permanent_table.py | 8 +- .../query_external_sheets_temporary_table.py | 8 +- samples/query_no_cache.py | 2 +- samples/query_pagination.py | 2 +- samples/query_script.py | 2 +- samples/query_to_arrow.py | 7 +- samples/snippets/README.rst | 27 +-- .../snippets/authenticate_service_account.py | 6 +- .../authenticate_service_account_test.py | 10 +- samples/snippets/authorized_view_tutorial.py | 9 +- .../snippets/authorized_view_tutorial_test.py | 11 +- samples/snippets/conftest.py | 29 ++- .../create_table_external_hive_partitioned.py | 7 +- ...te_table_external_hive_partitioned_test.py | 9 +- samples/snippets/dataset_access_test.py | 59 ++++++ samples/snippets/delete_job.py | 2 +- samples/snippets/delete_job_test.py | 11 +- samples/snippets/jupyter_tutorial_test.py | 17 +- samples/snippets/load_table_uri_firestore.py | 2 +- .../snippets/load_table_uri_firestore_test.py | 9 +- samples/snippets/materialized_view.py | 25 ++- samples/snippets/materialized_view_test.py | 24 ++- samples/snippets/mypy.ini | 8 + samples/snippets/natality_tutorial.py | 7 +- samples/snippets/natality_tutorial_test.py | 11 +- samples/snippets/quickstart.py | 8 +- samples/snippets/quickstart_test.py | 13 +- samples/snippets/requirements.txt | 1 + samples/snippets/revoke_dataset_access.py | 52 +++++ samples/snippets/simple_app.py | 2 +- samples/snippets/simple_app_test.py | 7 +- samples/snippets/test_update_with_dml.py | 10 +- samples/snippets/update_dataset_access.py | 70 +++++++ samples/snippets/update_with_dml.py | 12 +- samples/snippets/user_credentials.py | 2 +- samples/snippets/user_credentials_test.py | 9 +- samples/snippets/view.py | 43 +++- samples/snippets/view_test.py | 31 ++- samples/table_exists.py | 2 +- samples/table_insert_rows.py | 2 +- ...le_insert_rows_explicit_none_insert_ids.py | 2 +- samples/tests/conftest.py | 25 +-- samples/tests/test_add_empty_column.py | 7 +- samples/tests/test_browse_table_data.py | 9 +- samples/tests/test_client_list_jobs.py | 10 +- .../test_client_load_partitioned_table.py | 9 +- samples/tests/test_client_query.py | 7 +- samples/tests/test_client_query_add_column.py | 9 +- samples/tests/test_client_query_batch.py | 7 +- .../test_client_query_destination_table.py | 9 +- ...lient_query_destination_table_clustered.py | 9 +- ...est_client_query_destination_table_cmek.py | 9 +- ...t_client_query_destination_table_legacy.py | 9 +- samples/tests/test_client_query_dry_run.py | 7 +- samples/tests/test_client_query_legacy_sql.py | 6 +- .../tests/test_client_query_relax_column.py | 9 +- .../tests/test_client_query_w_array_params.py | 7 +- .../tests/test_client_query_w_named_params.py | 7 +- .../test_client_query_w_positional_params.py | 7 +- .../test_client_query_w_struct_params.py | 7 +- .../test_client_query_w_timestamp_params.py | 7 +- samples/tests/test_copy_table.py | 13 +- samples/tests/test_copy_table_cmek.py | 12 +- .../tests/test_copy_table_multiple_source.py | 12 +- samples/tests/test_create_dataset.py | 9 +- samples/tests/test_create_job.py | 10 +- samples/tests/test_create_table.py | 9 +- samples/tests/test_create_table_clustered.py | 9 +- .../test_create_table_range_partitioned.py | 9 +- samples/tests/test_dataset_exists.py | 11 +- samples/tests/test_dataset_label_samples.py | 9 +- samples/tests/test_delete_dataset.py | 7 +- samples/tests/test_delete_table.py | 7 +- samples/tests/test_download_public_data.py | 4 +- .../test_download_public_data_sandbox.py | 4 +- samples/tests/test_get_dataset.py | 7 +- samples/tests/test_get_table.py | 9 +- samples/tests/test_list_datasets.py | 10 +- samples/tests/test_list_datasets_by_label.py | 10 +- samples/tests/test_list_tables.py | 9 +- samples/tests/test_load_table_clustered.py | 12 +- samples/tests/test_load_table_dataframe.py | 9 +- samples/tests/test_load_table_file.py | 7 +- .../test_load_table_uri_autodetect_csv.py | 9 +- .../test_load_table_uri_autodetect_json.py | 9 +- samples/tests/test_load_table_uri_avro.py | 9 +- samples/tests/test_load_table_uri_cmek.py | 9 +- samples/tests/test_load_table_uri_csv.py | 9 +- samples/tests/test_load_table_uri_json.py | 9 +- samples/tests/test_load_table_uri_orc.py | 9 +- samples/tests/test_load_table_uri_parquet.py | 9 +- .../test_load_table_uri_truncate_avro.py | 9 +- .../tests/test_load_table_uri_truncate_csv.py | 9 +- .../test_load_table_uri_truncate_json.py | 9 +- .../tests/test_load_table_uri_truncate_orc.py | 9 +- .../test_load_table_uri_truncate_parquet.py | 9 +- samples/tests/test_model_samples.py | 9 +- ...test_query_external_gcs_temporary_table.py | 9 +- ...t_query_external_sheets_permanent_table.py | 9 +- ...t_query_external_sheets_temporary_table.py | 9 +- samples/tests/test_query_no_cache.py | 6 +- samples/tests/test_query_pagination.py | 7 +- samples/tests/test_query_script.py | 7 +- samples/tests/test_query_to_arrow.py | 2 +- samples/tests/test_routine_samples.py | 25 ++- samples/tests/test_table_exists.py | 9 +- samples/tests/test_table_insert_rows.py | 9 +- ...le_insert_rows_explicit_none_insert_ids.py | 9 +- samples/tests/test_undelete_table.py | 11 +- samples/tests/test_update_dataset_access.py | 9 +- ...te_dataset_default_partition_expiration.py | 9 +- ...update_dataset_default_table_expiration.py | 9 +- .../tests/test_update_dataset_description.py | 9 +- ...t_update_table_require_partition_filter.py | 9 +- samples/undelete_table.py | 4 +- samples/update_dataset_access.py | 2 +- ...te_dataset_default_partition_expiration.py | 2 +- ...update_dataset_default_table_expiration.py | 2 +- samples/update_dataset_description.py | 2 +- samples/update_model.py | 2 +- samples/update_routine.py | 7 +- .../update_table_require_partition_filter.py | 2 +- tests/system/test_pandas.py | 56 ++--- tests/unit/test_client.py | 18 +- tests/unit/test_magics.py | 139 ++++++++++++- tests/unit/test_table.py | 187 +++++++++++++++++ 225 files changed, 1938 insertions(+), 433 deletions(-) create mode 100644 samples/geography/mypy.ini create mode 100644 samples/magics/mypy.ini create mode 100644 samples/mypy.ini create mode 100644 samples/snippets/dataset_access_test.py create mode 100644 samples/snippets/mypy.ini create mode 100644 samples/snippets/revoke_dataset_access.py create mode 100644 samples/snippets/update_dataset_access.py diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 108063d4d..0b3c8cd98 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:4ee57a76a176ede9087c14330c625a71553cf9c72828b2c0ca12f5338171ba60 + digest: sha256:2f90537dd7df70f6b663cd654b1fa5dee483cf6a4edcfd46072b2775be8a23ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e10ad826..5ba219d20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.31.0](https://www.github.com/googleapis/python-bigquery/compare/v2.30.1...v2.31.0) (2021-11-24) + + +### Features + +* allow cell magic body to be a $variable ([#1053](https://www.github.com/googleapis/python-bigquery/issues/1053)) ([3a681e0](https://www.github.com/googleapis/python-bigquery/commit/3a681e046819df18118aa0b2b5733416d004c9b3)) +* promote `RowIterator.to_arrow_iterable` to public method ([#1073](https://www.github.com/googleapis/python-bigquery/issues/1073)) ([21cd710](https://www.github.com/googleapis/python-bigquery/commit/21cd71022d60c32104f8f90ee2ca445fbb43f7f3)) + + +### Bug Fixes + +* apply timeout to all resumable upload requests ([#1070](https://www.github.com/googleapis/python-bigquery/issues/1070)) ([3314dfb](https://www.github.com/googleapis/python-bigquery/commit/3314dfbed62488503dc41b11e403a672fcf71048)) + + +### Dependencies + +* support OpenTelemetry >= 1.1.0 ([#1050](https://www.github.com/googleapis/python-bigquery/issues/1050)) ([4616cd5](https://www.github.com/googleapis/python-bigquery/commit/4616cd58d3c6da641fb881ce99a87dcdedc20ba2)) + ### [2.30.1](https://www.github.com/googleapis/python-bigquery/compare/v2.30.0...v2.30.1) (2021-11-04) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index ecb36bf15..7917b989b 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -912,7 +912,12 @@ def _download_table_bqstorage( def download_arrow_bqstorage( - project_id, table, bqstorage_client, preserve_order=False, selected_fields=None, + project_id, + table, + bqstorage_client, + preserve_order=False, + selected_fields=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): return _download_table_bqstorage( project_id, @@ -921,6 +926,7 @@ def download_arrow_bqstorage( preserve_order=preserve_order, selected_fields=selected_fields, page_to_item=_bqstorage_page_to_arrow, + max_queue_size=max_queue_size, ) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 76ccafaf4..b3929be5c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -31,9 +31,10 @@ import typing from typing import ( Any, - BinaryIO, Dict, + IO, Iterable, + Mapping, List, Optional, Sequence, @@ -107,10 +108,15 @@ from google.cloud.bigquery import _helpers TimeoutType = Union[float, None] +ResumableTimeoutType = Union[ + None, float, Tuple[float, float] +] # for resumable media methods if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] + import pandas # type: ignore + import requests # required by api-core _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 @@ -215,7 +221,7 @@ def __init__( default_query_job_config=None, client_info=None, client_options=None, - ): + ) -> None: super(Client, self).__init__( project=project, credentials=credentials, @@ -2320,7 +2326,7 @@ def load_table_from_uri( def load_table_from_file( self, - file_obj: BinaryIO, + file_obj: IO[bytes], destination: Union[Table, TableReference, TableListItem, str], rewind: bool = False, size: int = None, @@ -2330,7 +2336,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2338,42 +2344,42 @@ def load_table_from_file( returns a :class:`~google.cloud.bigquery.job.LoadJob`. Args: - file_obj (file): A file handle opened in binary mode for reading. - destination (Union[ \ - google.cloud.bigquery.table.Table, \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.table.TableListItem, \ - str, \ - ]): + file_obj: + A file handle opened in binary mode for reading. + destination: Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - rewind (Optional[bool]): + rewind: If True, seek to the beginning of the file handle before reading the file. - size (Optional[int]): + size: The number of bytes to read from the file handle. If size is ``None`` or large, resumable upload will be used. Otherwise, multipart upload will be used. - num_retries (Optional[int]): Number of upload retries. Defaults to 6. - job_id (Optional[str]): Name of the job. - job_id_prefix (Optional[str]): + num_retries: Number of upload retries. Defaults to 6. + job_id: Name of the job. + job_id_prefix: The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location (Optional[str]): + location: Location where to run the job. Must match the location of the destination table. - project (Optional[str]): + project: Project ID of the project of where to run the job. Defaults to the client's project. - job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + job_config: Extra configuration options for the job. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request + may be repeated several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2425,7 +2431,7 @@ def load_table_from_file( def load_table_from_dataframe( self, - dataframe, + dataframe: "pandas.DataFrame", destination: Union[Table, TableReference, str], num_retries: int = _DEFAULT_NUM_RETRIES, job_id: str = None, @@ -2434,7 +2440,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2453,9 +2459,9 @@ def load_table_from_dataframe( https://github.com/googleapis/python-bigquery/issues/19 Args: - dataframe (pandas.DataFrame): + dataframe: A :class:`~pandas.DataFrame` containing the data to load. - destination (google.cloud.bigquery.table.TableReference): + destination: The destination table to use for loading the data. If it is an existing table, the schema of the :class:`~pandas.DataFrame` must match the schema of the destination table. If the table @@ -2467,19 +2473,19 @@ def load_table_from_dataframe( :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - num_retries (Optional[int]): Number of upload retries. - job_id (Optional[str]): Name of the job. - job_id_prefix (Optional[str]): + num_retries: Number of upload retries. + job_id: Name of the job. + job_id_prefix: The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location (Optional[str]): + location: Location where to run the job. Must match the location of the destination table. - project (Optional[str]): + project: Project ID of the project of where to run the job. Defaults to the client's project. - job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + job_config: Extra configuration options for the job. To override the default pandas data type conversions, supply @@ -2496,7 +2502,7 @@ def load_table_from_dataframe( :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are supported. - parquet_compression (Optional[str]): + parquet_compression: [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. @@ -2509,9 +2515,13 @@ def load_table_from_dataframe( passed as the ``compression`` argument to the underlying ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2672,7 +2682,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: TimeoutType = DEFAULT_TIMEOUT, + timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2696,36 +2706,35 @@ def load_table_from_json( client = bigquery.Client() client.load_table_from_file(data_as_file, ...) - destination (Union[ \ - google.cloud.bigquery.table.Table, \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.table.TableListItem, \ - str, \ - ]): + destination: Table into which data is to be loaded. If a string is passed in, this method attempts to create a table reference from a string using :func:`google.cloud.bigquery.table.TableReference.from_string`. Keyword Arguments: - num_retries (Optional[int]): Number of upload retries. - job_id (Optional[str]): Name of the job. - job_id_prefix (Optional[str]): + num_retries: Number of upload retries. + job_id: Name of the job. + job_id_prefix: The user-provided prefix for a randomly generated job ID. This parameter will be ignored if a ``job_id`` is also given. - location (Optional[str]): + location: Location where to run the job. Must match the location of the destination table. - project (Optional[str]): + project: Project ID of the project of where to run the job. Defaults to the client's project. - job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): + job_config: Extra configuration options for the job. The ``source_format`` setting is always set to :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. + + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2774,60 +2783,77 @@ def load_table_from_json( ) def _do_resumable_upload( - self, stream, metadata, num_retries, timeout, project=None - ): + self, + stream: IO[bytes], + metadata: Mapping[str, str], + num_retries: int, + timeout: Optional[ResumableTimeoutType], + project: Optional[str] = None, + ) -> "requests.Response": """Perform a resumable upload. Args: - stream (IO[bytes]): A bytes IO object open for reading. + stream: A bytes IO object open for reading. - metadata (Dict): The metadata associated with the upload. + metadata: The metadata associated with the upload. - num_retries (int): + num_retries: Number of upload retries. (Deprecated: This argument will be removed in a future release.) - timeout (float): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. - project (Optional[str]): + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + project: Project ID of the project of where to run the upload. Defaults to the client's project. Returns: - requests.Response: - The "200 OK" response object returned after the final chunk - is uploaded. + The "200 OK" response object returned after the final chunk + is uploaded. """ upload, transport = self._initiate_resumable_upload( stream, metadata, num_retries, timeout, project=project ) while not upload.finished: - response = upload.transmit_next_chunk(transport) + response = upload.transmit_next_chunk(transport, timeout=timeout) return response def _initiate_resumable_upload( - self, stream, metadata, num_retries, timeout, project=None + self, + stream: IO[bytes], + metadata: Mapping[str, str], + num_retries: int, + timeout: Optional[ResumableTimeoutType], + project: Optional[str] = None, ): """Initiate a resumable upload. Args: - stream (IO[bytes]): A bytes IO object open for reading. + stream: A bytes IO object open for reading. - metadata (Dict): The metadata associated with the upload. + metadata: The metadata associated with the upload. - num_retries (int): + num_retries: Number of upload retries. (Deprecated: This argument will be removed in a future release.) - timeout (float): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. - project (Optional[str]): + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + project: Project ID of the project of where to run the upload. Defaults to the client's project. @@ -2876,29 +2902,39 @@ def _initiate_resumable_upload( return upload, transport def _do_multipart_upload( - self, stream, metadata, size, num_retries, timeout, project=None + self, + stream: IO[bytes], + metadata: Mapping[str, str], + size: int, + num_retries: int, + timeout: Optional[ResumableTimeoutType], + project: Optional[str] = None, ): """Perform a multipart upload. Args: - stream (IO[bytes]): A bytes IO object open for reading. + stream: A bytes IO object open for reading. - metadata (Dict): The metadata associated with the upload. + metadata: The metadata associated with the upload. - size (int): + size: The number of bytes to be uploaded (which will be read from ``stream``). If not provided, the upload will be concluded once ``stream`` is exhausted (or :data:`None`). - num_retries (int): + num_retries: Number of upload retries. (Deprecated: This argument will be removed in a future release.) - timeout (float): + timeout: The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. Depending on the retry strategy, a request may + be repeated several times using the same timeout each time. - project (Optional[str]): + Can also be passed as a tuple (connect_timeout, read_timeout). + See :meth:`requests.Session.request` documentation for details. + + project: Project ID of the project of where to run the upload. Defaults to the client's project. @@ -3426,7 +3462,9 @@ def insert_rows_json( self, table: Union[Table, TableReference, TableListItem, str], json_rows: Sequence[Dict], - row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID, + row_ids: Union[ + Iterable[Optional[str]], AutoRowIDs, None + ] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, template_suffix: str = None, diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index ff015d605..9eabfb6f8 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -27,7 +27,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -def _get_table_reference(self, table_id): +def _get_table_reference(self, table_id: str) -> TableReference: """Constructs a TableReference. Args: @@ -144,7 +144,7 @@ class AccessEntry(object): ) """Allowed entity types.""" - def __init__(self, role, entity_type, entity_id): + def __init__(self, role, entity_type, entity_id) -> None: if entity_type not in self.ENTITY_TYPES: message = "Entity type %r not among: %s" % ( entity_type, @@ -407,7 +407,7 @@ class Dataset(object): "default_encryption_configuration": "defaultEncryptionConfiguration", } - def __init__(self, dataset_ref): + def __init__(self, dataset_ref) -> None: if isinstance(dataset_ref, str): dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} diff --git a/google/cloud/bigquery/encryption_configuration.py b/google/cloud/bigquery/encryption_configuration.py index ba04ae2c4..d0b6f3677 100644 --- a/google/cloud/bigquery/encryption_configuration.py +++ b/google/cloud/bigquery/encryption_configuration.py @@ -24,7 +24,7 @@ class EncryptionConfiguration(object): kms_key_name (str): resource ID of Cloud KMS key used for encryption """ - def __init__(self, kms_key_name=None): + def __init__(self, kms_key_name=None) -> None: self._properties = {} if kms_key_name is not None: self._properties["kmsKeyName"] = kms_key_name diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index e6f6a97c3..cabf2436b 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,7 +22,7 @@ import base64 import copy -from typing import FrozenSet, Iterable, Optional, Union +from typing import Any, Dict, FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json @@ -572,8 +572,8 @@ class HivePartitioningOptions(object): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions """ - def __init__(self): - self._properties = {} + def __init__(self) -> None: + self._properties: Dict[str, Any] = {} @property def mode(self): @@ -654,7 +654,7 @@ class ExternalConfig(object): See :attr:`source_format`. """ - def __init__(self, source_format): + def __init__(self, source_format) -> None: self._properties = {"sourceFormat": source_format} @property diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index f0dd3d668..29558c01f 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -52,7 +52,7 @@ class CopyJobConfig(_JobConfig): the property name as the name of a keyword argument. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: super(CopyJobConfig, self).__init__("copy", **kwargs) @property diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index b12c3e621..e9f8fe14a 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -38,7 +38,7 @@ class LoadJobConfig(_JobConfig): the property name as the name of a keyword argument. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: super(LoadJobConfig, self).__init__("load", **kwargs) @property diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 6b8b5ce12..61e860de5 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -231,7 +231,7 @@ class QueryJobConfig(_JobConfig): the property name as the name of a keyword argument. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: super(QueryJobConfig, self).__init__("query", **kwargs) @property @@ -1067,7 +1067,7 @@ def ddl_target_table(self): return prop @property - def num_dml_affected_rows(self): + def num_dml_affected_rows(self) -> Optional[int]: """Return the number of DML rows affected by the job. See: diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index d38655e28..f26d7ce11 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -596,6 +596,29 @@ def _cell_magic(line, query): _handle_error(error, args.destination_var) return + # Check if query is given as a reference to a variable. + if query.startswith("$"): + query_var_name = query[1:] + + if not query_var_name: + missing_msg = 'Missing query variable name, empty "$" is not allowed.' + raise NameError(missing_msg) + + if query_var_name.isidentifier(): + ip = IPython.get_ipython() + query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel + + if query is ip: + raise NameError( + f"Unknown query, variable {query_var_name} does not exist." + ) + else: + if not isinstance(query, (str, bytes)): + raise TypeError( + f"Query variable {query_var_name} must be a string " + "or a bytes-like value." + ) + # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index c2230e0fa..ad7c60f7d 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -520,7 +520,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): values (List[appropriate type]): The parameter array values. """ - def __init__(self, name, array_type, values): + def __init__(self, name, array_type, values) -> None: self.name = name self.values = values @@ -683,10 +683,13 @@ class StructQueryParameter(_AbstractQueryParameter): ]]): The sub-parameters for the struct """ - def __init__(self, name, *sub_params): + def __init__(self, name, *sub_params) -> None: self.name = name - types = self.struct_types = OrderedDict() - values = self.struct_values = {} + self.struct_types: Dict[str, Any] = OrderedDict() + self.struct_values: Dict[str, Any] = {} + + types = self.struct_types + values = self.struct_values for sub in sub_params: if isinstance(sub, self.__class__): types[sub.name] = "STRUCT" diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 18a38c3cc..3c0919003 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -16,7 +16,7 @@ """Define resources for the BigQuery Routines API.""" -from typing import Optional +from typing import Any, Dict, Optional import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers @@ -69,7 +69,7 @@ class Routine(object): "determinism_level": "determinismLevel", } - def __init__(self, routine_ref, **kwargs): + def __init__(self, routine_ref, **kwargs) -> None: if isinstance(routine_ref, str): routine_ref = RoutineReference.from_string(routine_ref) @@ -214,7 +214,7 @@ def return_type(self, value: StandardSqlDataType): self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @property - def return_table_type(self) -> StandardSqlTableType: + def return_table_type(self) -> Optional[StandardSqlTableType]: """The return type of a Table Valued Function (TVF) routine. .. versionadded:: 2.22.0 @@ -352,8 +352,8 @@ class RoutineArgument(object): "mode": "mode", } - def __init__(self, **kwargs): - self._properties = {} + def __init__(self, **kwargs) -> None: + self._properties: Dict[str, Any] = {} for property_name in kwargs: setattr(self, property_name, kwargs[property_name]) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index f434688e7..57a774c3e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -369,7 +369,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", } - def __init__(self, table_ref, schema=None): + def __init__(self, table_ref, schema=None) -> None: table_ref = _table_arg_to_table_ref(table_ref) self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} # Let the @property do validation. @@ -1318,7 +1318,7 @@ class Row(object): # Choose unusual field names to try to avoid conflict with schema fields. __slots__ = ("_xxx_values", "_xxx_field_to_index") - def __init__(self, values, field_to_index): + def __init__(self, values, field_to_index) -> None: self._xxx_values = values self._xxx_field_to_index = field_to_index @@ -1612,8 +1612,42 @@ def _to_page_iterable( ) yield from result_pages - def _to_arrow_iterable(self, bqstorage_client=None): - """Create an iterable of arrow RecordBatches, to process the table as a stream.""" + def to_arrow_iterable( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + ) -> Iterator["pyarrow.RecordBatch"]: + """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + max_queue_size (Optional[int]): + The maximum number of result pages to hold in the internal queue when + streaming query results over the BigQuery Storage API. Ignored if + Storage API is not used. + + By default, the max queue size is set to the number of BQ Storage streams + created by the server. If ``max_queue_size`` is :data:`None`, the queue + size is infinite. + + Returns: + pyarrow.RecordBatch: + A generator of :class:`~pyarrow.RecordBatch`. + + .. versionadded:: 2.31.0 + """ + self._maybe_warn_max_results(bqstorage_client) + bqstorage_download = functools.partial( _pandas_helpers.download_arrow_bqstorage, self._project, @@ -1621,6 +1655,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): bqstorage_client, preserve_order=self._preserve_order, selected_fields=self._selected_fields, + max_queue_size=max_queue_size, ) tabledata_list_download = functools.partial( _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema @@ -1705,7 +1740,7 @@ def to_arrow( ) record_batches = [] - for record_batch in self._to_arrow_iterable( + for record_batch in self.to_arrow_iterable( bqstorage_client=bqstorage_client ): record_batches.append(record_batch) @@ -2202,6 +2237,27 @@ def to_dataframe_iterable( raise ValueError(_NO_PANDAS_ERROR) return iter((pandas.DataFrame(),)) + def to_arrow_iterable( + self, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, + max_queue_size: Optional[int] = None, + ) -> Iterator["pyarrow.RecordBatch"]: + """Create an iterable of pandas DataFrames, to process the table as a stream. + + .. versionadded:: 2.31.0 + + Args: + bqstorage_client: + Ignored. Added for compatibility with RowIterator. + + max_queue_size: + Ignored. Added for compatibility with RowIterator. + + Returns: + An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. + """ + return iter((pyarrow.record_batch([]),)) + def __iter__(self): return iter(()) @@ -2230,7 +2286,7 @@ class PartitionRange(object): Private. Used to construct object from API resource. """ - def __init__(self, start=None, end=None, interval=None, _properties=None): + def __init__(self, start=None, end=None, interval=None, _properties=None) -> None: if _properties is None: _properties = {} self._properties = _properties @@ -2305,10 +2361,10 @@ class RangePartitioning(object): Private. Used to construct object from API resource. """ - def __init__(self, range_=None, field=None, _properties=None): + def __init__(self, range_=None, field=None, _properties=None) -> None: if _properties is None: _properties = {} - self._properties = _properties + self._properties: Dict[str, Any] = _properties if range_ is not None: self.range_ = range_ @@ -2414,8 +2470,8 @@ class TimePartitioning(object): def __init__( self, type_=None, field=None, expiration_ms=None, require_partition_filter=None - ): - self._properties = {} + ) -> None: + self._properties: Dict[str, Any] = {} if type_ is None: self.type_ = TimePartitioningType.DAY else: diff --git a/noxfile.py b/noxfile.py index 505911861..c6197bfed 100644 --- a/noxfile.py +++ b/noxfile.py @@ -43,6 +43,7 @@ "lint_setup_py", "blacken", "mypy", + "mypy_samples", "pytype", "docs", ] @@ -186,6 +187,28 @@ def system(session): session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) +@nox.session(python=DEFAULT_PYTHON_VERSION) +def mypy_samples(session): + """Run type checks with mypy.""" + session.install("-e", ".[all]") + + session.install("ipython", "pytest") + session.install(MYPY_VERSION) + + # Just install the dependencies' type info directly, since "mypy --install-types" + # might require an additional pass. + session.install("types-mock", "types-pytz") + session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions + + session.run( + "mypy", + "--config-file", + str(CURRENT_DIRECTORY / "samples" / "mypy.ini"), + "--no-incremental", # Required by warn-unused-configs from mypy.ini to work + "samples/", + ) + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def snippets(session): """Run the snippets test suite.""" diff --git a/samples/AUTHORING_GUIDE.md b/samples/AUTHORING_GUIDE.md index 55c97b32f..8249522ff 100644 --- a/samples/AUTHORING_GUIDE.md +++ b/samples/AUTHORING_GUIDE.md @@ -1 +1 @@ -See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/AUTHORING_GUIDE.md \ No newline at end of file +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/samples/CONTRIBUTING.md b/samples/CONTRIBUTING.md index 34c882b6f..f5fe2e6ba 100644 --- a/samples/CONTRIBUTING.md +++ b/samples/CONTRIBUTING.md @@ -1 +1 @@ -See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/CONTRIBUTING.md \ No newline at end of file +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/CONTRIBUTING.md \ No newline at end of file diff --git a/samples/add_empty_column.py b/samples/add_empty_column.py index cd7cf5018..6d449d6e2 100644 --- a/samples/add_empty_column.py +++ b/samples/add_empty_column.py @@ -13,7 +13,7 @@ # limitations under the License. -def add_empty_column(table_id): +def add_empty_column(table_id: str) -> None: # [START bigquery_add_empty_column] from google.cloud import bigquery diff --git a/samples/browse_table_data.py b/samples/browse_table_data.py index 29a1c2ff6..6a56253bf 100644 --- a/samples/browse_table_data.py +++ b/samples/browse_table_data.py @@ -13,7 +13,7 @@ # limitations under the License. -def browse_table_data(table_id): +def browse_table_data(table_id: str) -> None: # [START bigquery_browse_table] @@ -41,15 +41,17 @@ def browse_table_data(table_id): table = client.get_table(table_id) # Make an API request. fields = table.schema[:2] # First two columns. rows_iter = client.list_rows(table_id, selected_fields=fields, max_results=10) - rows = list(rows_iter) print("Selected {} columns from table {}.".format(len(rows_iter.schema), table_id)) + + rows = list(rows_iter) print("Downloaded {} rows from table {}".format(len(rows), table_id)) # Print row data in tabular format. - rows = client.list_rows(table, max_results=10) - format_string = "{!s:<16} " * len(rows.schema) - field_names = [field.name for field in rows.schema] + rows_iter = client.list_rows(table, max_results=10) + format_string = "{!s:<16} " * len(rows_iter.schema) + field_names = [field.name for field in rows_iter.schema] print(format_string.format(*field_names)) # Prints column headers. - for row in rows: + + for row in rows_iter: print(format_string.format(*row)) # Prints row data. # [END bigquery_browse_table] diff --git a/samples/client_list_jobs.py b/samples/client_list_jobs.py index b2344e23c..7f1e39cb8 100644 --- a/samples/client_list_jobs.py +++ b/samples/client_list_jobs.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_list_jobs(): +def client_list_jobs() -> None: # [START bigquery_list_jobs] diff --git a/samples/client_load_partitioned_table.py b/samples/client_load_partitioned_table.py index e4e8a296c..9956f3f00 100644 --- a/samples/client_load_partitioned_table.py +++ b/samples/client_load_partitioned_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_load_partitioned_table(table_id): +def client_load_partitioned_table(table_id: str) -> None: # [START bigquery_load_table_partitioned] from google.cloud import bigquery diff --git a/samples/client_query.py b/samples/client_query.py index 7fedc3f90..091d3f98b 100644 --- a/samples/client_query.py +++ b/samples/client_query.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query(): +def client_query() -> None: # [START bigquery_query] diff --git a/samples/client_query_add_column.py b/samples/client_query_add_column.py index ff7d5aa68..2da200bc5 100644 --- a/samples/client_query_add_column.py +++ b/samples/client_query_add_column.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_add_column(table_id): +def client_query_add_column(table_id: str) -> None: # [START bigquery_add_column_query_append] from google.cloud import bigquery diff --git a/samples/client_query_batch.py b/samples/client_query_batch.py index e1680f4a1..df164d1be 100644 --- a/samples/client_query_batch.py +++ b/samples/client_query_batch.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + +if typing.TYPE_CHECKING: + from google.cloud import bigquery -def client_query_batch(): + +def client_query_batch() -> "bigquery.QueryJob": # [START bigquery_query_batch] from google.cloud import bigquery @@ -37,9 +42,12 @@ def client_query_batch(): # Check on the progress by getting the job's updated state. Once the state # is `DONE`, the results are ready. - query_job = client.get_job( - query_job.job_id, location=query_job.location - ) # Make an API request. + query_job = typing.cast( + "bigquery.QueryJob", + client.get_job( + query_job.job_id, location=query_job.location + ), # Make an API request. + ) print("Job {} is currently in state {}".format(query_job.job_id, query_job.state)) # [END bigquery_query_batch] diff --git a/samples/client_query_destination_table.py b/samples/client_query_destination_table.py index 303ce5a0c..b200f1cc6 100644 --- a/samples/client_query_destination_table.py +++ b/samples/client_query_destination_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table(table_id): +def client_query_destination_table(table_id: str) -> None: # [START bigquery_query_destination_table] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py index 5a109ed10..c4ab305f5 100644 --- a/samples/client_query_destination_table_clustered.py +++ b/samples/client_query_destination_table_clustered.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table_clustered(table_id): +def client_query_destination_table_clustered(table_id: str) -> None: # [START bigquery_query_clustered_table] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_cmek.py b/samples/client_query_destination_table_cmek.py index 24d4f2222..0fd44d189 100644 --- a/samples/client_query_destination_table_cmek.py +++ b/samples/client_query_destination_table_cmek.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table_cmek(table_id, kms_key_name): +def client_query_destination_table_cmek(table_id: str, kms_key_name: str) -> None: # [START bigquery_query_destination_table_cmek] from google.cloud import bigquery diff --git a/samples/client_query_destination_table_legacy.py b/samples/client_query_destination_table_legacy.py index c8fdd606f..ee45d9a01 100644 --- a/samples/client_query_destination_table_legacy.py +++ b/samples/client_query_destination_table_legacy.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_destination_table_legacy(table_id): +def client_query_destination_table_legacy(table_id: str) -> None: # [START bigquery_query_legacy_large_results] from google.cloud import bigquery diff --git a/samples/client_query_dry_run.py b/samples/client_query_dry_run.py index 1f7bd0c9c..418b43cb5 100644 --- a/samples/client_query_dry_run.py +++ b/samples/client_query_dry_run.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def client_query_dry_run(): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def client_query_dry_run() -> "bigquery.QueryJob": # [START bigquery_query_dry_run] from google.cloud import bigquery diff --git a/samples/client_query_legacy_sql.py b/samples/client_query_legacy_sql.py index 3f9465779..c054e1f28 100644 --- a/samples/client_query_legacy_sql.py +++ b/samples/client_query_legacy_sql.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_legacy_sql(): +def client_query_legacy_sql() -> None: # [START bigquery_query_legacy] from google.cloud import bigquery diff --git a/samples/client_query_relax_column.py b/samples/client_query_relax_column.py index 5e2ec8056..c96a1e7aa 100644 --- a/samples/client_query_relax_column.py +++ b/samples/client_query_relax_column.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_relax_column(table_id): +def client_query_relax_column(table_id: str) -> None: # [START bigquery_relax_column_query_append] from google.cloud import bigquery diff --git a/samples/client_query_w_array_params.py b/samples/client_query_w_array_params.py index 4077be2c7..669713182 100644 --- a/samples/client_query_w_array_params.py +++ b/samples/client_query_w_array_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_array_params(): +def client_query_w_array_params() -> None: # [START bigquery_query_params_arrays] from google.cloud import bigquery diff --git a/samples/client_query_w_named_params.py b/samples/client_query_w_named_params.py index a0de8f63a..f42be1dc8 100644 --- a/samples/client_query_w_named_params.py +++ b/samples/client_query_w_named_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_named_params(): +def client_query_w_named_params() -> None: # [START bigquery_query_params_named] from google.cloud import bigquery diff --git a/samples/client_query_w_positional_params.py b/samples/client_query_w_positional_params.py index ee316044b..b088b305e 100644 --- a/samples/client_query_w_positional_params.py +++ b/samples/client_query_w_positional_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_positional_params(): +def client_query_w_positional_params() -> None: # [START bigquery_query_params_positional] from google.cloud import bigquery diff --git a/samples/client_query_w_struct_params.py b/samples/client_query_w_struct_params.py index 041a3a0e3..6c5b78113 100644 --- a/samples/client_query_w_struct_params.py +++ b/samples/client_query_w_struct_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_struct_params(): +def client_query_w_struct_params() -> None: # [START bigquery_query_params_structs] from google.cloud import bigquery diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index 41a27770e..07d64cc94 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -13,7 +13,7 @@ # limitations under the License. -def client_query_w_timestamp_params(): +def client_query_w_timestamp_params() -> None: # [START bigquery_query_params_timestamps] import datetime diff --git a/samples/copy_table.py b/samples/copy_table.py index 91c58e109..8c6153fef 100644 --- a/samples/copy_table.py +++ b/samples/copy_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def copy_table(source_table_id, destination_table_id): +def copy_table(source_table_id: str, destination_table_id: str) -> None: # [START bigquery_copy_table] diff --git a/samples/copy_table_cmek.py b/samples/copy_table_cmek.py index 52ccb5f7b..f2e8a90f9 100644 --- a/samples/copy_table_cmek.py +++ b/samples/copy_table_cmek.py @@ -13,7 +13,7 @@ # limitations under the License. -def copy_table_cmek(dest_table_id, orig_table_id, kms_key_name): +def copy_table_cmek(dest_table_id: str, orig_table_id: str, kms_key_name: str) -> None: # [START bigquery_copy_table_cmek] from google.cloud import bigquery diff --git a/samples/copy_table_multiple_source.py b/samples/copy_table_multiple_source.py index d86e380d0..1163b1664 100644 --- a/samples/copy_table_multiple_source.py +++ b/samples/copy_table_multiple_source.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Sequence -def copy_table_multiple_source(dest_table_id, table_ids): + +def copy_table_multiple_source(dest_table_id: str, table_ids: Sequence[str]) -> None: # [START bigquery_copy_table_multiple_source] diff --git a/samples/create_dataset.py b/samples/create_dataset.py index 6af3c67eb..dea91798d 100644 --- a/samples/create_dataset.py +++ b/samples/create_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def create_dataset(dataset_id): +def create_dataset(dataset_id: str) -> None: # [START bigquery_create_dataset] from google.cloud import bigquery diff --git a/samples/create_job.py b/samples/create_job.py index feed04ca0..39922f7ae 100644 --- a/samples/create_job.py +++ b/samples/create_job.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_job(): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_job() -> "bigquery.QueryJob": # [START bigquery_create_job] from google.cloud import bigquery diff --git a/samples/create_routine.py b/samples/create_routine.py index b8746905d..96dc24210 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_routine(routine_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_routine(routine_id: str) -> "bigquery.Routine": # [START bigquery_create_routine] from google.cloud import bigquery diff --git a/samples/create_routine_ddl.py b/samples/create_routine_ddl.py index c191bd385..56c7cfe24 100644 --- a/samples/create_routine_ddl.py +++ b/samples/create_routine_ddl.py @@ -13,7 +13,7 @@ # limitations under the License. -def create_routine_ddl(routine_id): +def create_routine_ddl(routine_id: str) -> None: # [START bigquery_create_routine_ddl] diff --git a/samples/create_table.py b/samples/create_table.py index d62e86681..eaac54696 100644 --- a/samples/create_table.py +++ b/samples/create_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def create_table(table_id): +def create_table(table_id: str) -> None: # [START bigquery_create_table] from google.cloud import bigquery diff --git a/samples/create_table_clustered.py b/samples/create_table_clustered.py index 2b45b747e..1686c519a 100644 --- a/samples/create_table_clustered.py +++ b/samples/create_table_clustered.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_table_clustered(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_table_clustered(table_id: str) -> "bigquery.Table": # [START bigquery_create_table_clustered] from google.cloud import bigquery diff --git a/samples/create_table_range_partitioned.py b/samples/create_table_range_partitioned.py index 260041aa5..4dc45ed58 100644 --- a/samples/create_table_range_partitioned.py +++ b/samples/create_table_range_partitioned.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_table_range_partitioned(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_table_range_partitioned(table_id: str) -> "bigquery.Table": # [START bigquery_create_table_range_partitioned] from google.cloud import bigquery diff --git a/samples/dataset_exists.py b/samples/dataset_exists.py index b4db9353b..221899a65 100644 --- a/samples/dataset_exists.py +++ b/samples/dataset_exists.py @@ -13,7 +13,7 @@ # limitations under the License. -def dataset_exists(dataset_id): +def dataset_exists(dataset_id: str) -> None: # [START bigquery_dataset_exists] from google.cloud import bigquery diff --git a/samples/delete_dataset.py b/samples/delete_dataset.py index e25740baa..b340ed57a 100644 --- a/samples/delete_dataset.py +++ b/samples/delete_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_dataset(dataset_id): +def delete_dataset(dataset_id: str) -> None: # [START bigquery_delete_dataset] diff --git a/samples/delete_dataset_labels.py b/samples/delete_dataset_labels.py index a52de2967..ec5df09c1 100644 --- a/samples/delete_dataset_labels.py +++ b/samples/delete_dataset_labels.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def delete_dataset_labels(dataset_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def delete_dataset_labels(dataset_id: str) -> "bigquery.Dataset": # [START bigquery_delete_label_dataset] diff --git a/samples/delete_model.py b/samples/delete_model.py index 0190315c6..2703ba3f5 100644 --- a/samples/delete_model.py +++ b/samples/delete_model.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_model(model_id): +def delete_model(model_id: str) -> None: """Sample ID: go/samples-tracker/1534""" # [START bigquery_delete_model] diff --git a/samples/delete_routine.py b/samples/delete_routine.py index 679cbee4b..7362a5fea 100644 --- a/samples/delete_routine.py +++ b/samples/delete_routine.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_routine(routine_id): +def delete_routine(routine_id: str) -> None: # [START bigquery_delete_routine] diff --git a/samples/delete_table.py b/samples/delete_table.py index 3d0a6f0ba..9e7ee170a 100644 --- a/samples/delete_table.py +++ b/samples/delete_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_table(table_id): +def delete_table(table_id: str) -> None: # [START bigquery_delete_table] diff --git a/samples/download_public_data.py b/samples/download_public_data.py index d10ed161a..a488bbbb5 100644 --- a/samples/download_public_data.py +++ b/samples/download_public_data.py @@ -13,7 +13,7 @@ # limitations under the License. -def download_public_data(): +def download_public_data() -> None: # [START bigquery_pandas_public_data] diff --git a/samples/download_public_data_sandbox.py b/samples/download_public_data_sandbox.py index afb50b15c..ce5200b4e 100644 --- a/samples/download_public_data_sandbox.py +++ b/samples/download_public_data_sandbox.py @@ -13,7 +13,7 @@ # limitations under the License. -def download_public_data_sandbox(): +def download_public_data_sandbox() -> None: # [START bigquery_pandas_public_data_sandbox] diff --git a/samples/geography/conftest.py b/samples/geography/conftest.py index 265900f5a..14823d10a 100644 --- a/samples/geography/conftest.py +++ b/samples/geography/conftest.py @@ -13,30 +13,31 @@ # limitations under the License. import datetime +from typing import Iterator import uuid from google.cloud import bigquery import pytest -def temp_suffix(): +def temp_suffix() -> str: now = datetime.datetime.now() return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(scope="session") -def bigquery_client(): +def bigquery_client() -> bigquery.Client: bigquery_client = bigquery.Client() return bigquery_client @pytest.fixture(scope="session") -def project_id(bigquery_client): +def project_id(bigquery_client: bigquery.Client) -> str: return bigquery_client.project @pytest.fixture -def dataset_id(bigquery_client): +def dataset_id(bigquery_client: bigquery.Client) -> Iterator[str]: dataset_id = f"geography_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -44,7 +45,9 @@ def dataset_id(bigquery_client): @pytest.fixture -def table_id(bigquery_client, project_id, dataset_id): +def table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: table_id = f"{project_id}.{dataset_id}.geography_{temp_suffix()}" table = bigquery.Table(table_id) table.schema = [ diff --git a/samples/geography/insert_geojson.py b/samples/geography/insert_geojson.py index 23f249c15..2db407b55 100644 --- a/samples/geography/insert_geojson.py +++ b/samples/geography/insert_geojson.py @@ -12,8 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Mapping, Optional, Sequence + + +def insert_geojson( + override_values: Optional[Mapping[str, str]] = None +) -> Sequence[Dict[str, object]]: + + if override_values is None: + override_values = {} -def insert_geojson(override_values={}): # [START bigquery_insert_geojson] import geojson from google.cloud import bigquery diff --git a/samples/geography/insert_geojson_test.py b/samples/geography/insert_geojson_test.py index 5ef15ee13..507201872 100644 --- a/samples/geography/insert_geojson_test.py +++ b/samples/geography/insert_geojson_test.py @@ -15,6 +15,6 @@ from . import insert_geojson -def test_insert_geojson(table_id): +def test_insert_geojson(table_id: str) -> None: errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) assert not errors diff --git a/samples/geography/insert_wkt.py b/samples/geography/insert_wkt.py index d7d3accde..25c7ee727 100644 --- a/samples/geography/insert_wkt.py +++ b/samples/geography/insert_wkt.py @@ -12,8 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Mapping, Optional, Sequence + + +def insert_wkt( + override_values: Optional[Mapping[str, str]] = None +) -> Sequence[Dict[str, object]]: + + if override_values is None: + override_values = {} -def insert_wkt(override_values={}): # [START bigquery_insert_geography_wkt] from google.cloud import bigquery import shapely.geometry diff --git a/samples/geography/insert_wkt_test.py b/samples/geography/insert_wkt_test.py index 8bcb62cec..a7c3d4ed3 100644 --- a/samples/geography/insert_wkt_test.py +++ b/samples/geography/insert_wkt_test.py @@ -15,6 +15,6 @@ from . import insert_wkt -def test_insert_wkt(table_id): +def test_insert_wkt(table_id: str) -> None: errors = insert_wkt.insert_wkt(override_values={"table_id": table_id}) assert not errors diff --git a/samples/geography/mypy.ini b/samples/geography/mypy.ini new file mode 100644 index 000000000..41898432f --- /dev/null +++ b/samples/geography/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-geojson,pandas,shapely.*] +ignore_missing_imports = True diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py index fa8073fef..e36331f27 100644 --- a/samples/geography/to_geodataframe.py +++ b/samples/geography/to_geodataframe.py @@ -12,12 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery -client = bigquery.Client() +if typing.TYPE_CHECKING: + import pandas + + +client: bigquery.Client = bigquery.Client() -def get_austin_service_requests_as_geography(): +def get_austin_service_requests_as_geography() -> "pandas.DataFrame": # [START bigquery_query_results_geodataframe] sql = """ diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py index 7a2ba6937..7499d7001 100644 --- a/samples/geography/to_geodataframe_test.py +++ b/samples/geography/to_geodataframe_test.py @@ -17,7 +17,7 @@ from .to_geodataframe import get_austin_service_requests_as_geography -def test_get_austin_service_requests_as_geography(): +def test_get_austin_service_requests_as_geography() -> None: geopandas = pytest.importorskip("geopandas") df = get_austin_service_requests_as_geography() assert isinstance(df, geopandas.GeoDataFrame) diff --git a/samples/get_dataset.py b/samples/get_dataset.py index 54ba05781..5654cbdce 100644 --- a/samples/get_dataset.py +++ b/samples/get_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_dataset(dataset_id): +def get_dataset(dataset_id: str) -> None: # [START bigquery_get_dataset] diff --git a/samples/get_dataset_labels.py b/samples/get_dataset_labels.py index 18a9ca985..d97ee3c01 100644 --- a/samples/get_dataset_labels.py +++ b/samples/get_dataset_labels.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_dataset_labels(dataset_id): +def get_dataset_labels(dataset_id: str) -> None: # [START bigquery_get_dataset_labels] diff --git a/samples/get_model.py b/samples/get_model.py index 1570ef816..dab4146ab 100644 --- a/samples/get_model.py +++ b/samples/get_model.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_model(model_id): +def get_model(model_id: str) -> None: """Sample ID: go/samples-tracker/1510""" # [START bigquery_get_model] diff --git a/samples/get_routine.py b/samples/get_routine.py index 72715ee1b..031d9a127 100644 --- a/samples/get_routine.py +++ b/samples/get_routine.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def get_routine(routine_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def get_routine(routine_id: str) -> "bigquery.Routine": # [START bigquery_get_routine] diff --git a/samples/get_table.py b/samples/get_table.py index 0d1d809ba..6195aaf9a 100644 --- a/samples/get_table.py +++ b/samples/get_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def get_table(table_id): +def get_table(table_id: str) -> None: # [START bigquery_get_table] diff --git a/samples/label_dataset.py b/samples/label_dataset.py index bd4cd6721..a59743e5d 100644 --- a/samples/label_dataset.py +++ b/samples/label_dataset.py @@ -13,7 +13,7 @@ # limitations under the License. -def label_dataset(dataset_id): +def label_dataset(dataset_id: str) -> None: # [START bigquery_label_dataset] diff --git a/samples/list_datasets.py b/samples/list_datasets.py index 6a1b93d00..c1b6639a9 100644 --- a/samples/list_datasets.py +++ b/samples/list_datasets.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_datasets(): +def list_datasets() -> None: # [START bigquery_list_datasets] diff --git a/samples/list_datasets_by_label.py b/samples/list_datasets_by_label.py index 1b310049b..d1f264872 100644 --- a/samples/list_datasets_by_label.py +++ b/samples/list_datasets_by_label.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_datasets_by_label(): +def list_datasets_by_label() -> None: # [START bigquery_list_datasets_by_label] diff --git a/samples/list_models.py b/samples/list_models.py index 7251c001a..df8ae0e1b 100644 --- a/samples/list_models.py +++ b/samples/list_models.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_models(dataset_id): +def list_models(dataset_id: str) -> None: """Sample ID: go/samples-tracker/1512""" # [START bigquery_list_models] diff --git a/samples/list_routines.py b/samples/list_routines.py index 718d40d68..bee7c23be 100644 --- a/samples/list_routines.py +++ b/samples/list_routines.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_routines(dataset_id): +def list_routines(dataset_id: str) -> None: # [START bigquery_list_routines] diff --git a/samples/list_tables.py b/samples/list_tables.py index 9ab527a49..df846961d 100644 --- a/samples/list_tables.py +++ b/samples/list_tables.py @@ -13,7 +13,7 @@ # limitations under the License. -def list_tables(dataset_id): +def list_tables(dataset_id: str) -> None: # [START bigquery_list_tables] diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py index 20d412cb3..87b6c76ce 100644 --- a/samples/load_table_clustered.py +++ b/samples/load_table_clustered.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def load_table_clustered(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def load_table_clustered(table_id: str) -> "bigquery.Table": # [START bigquery_load_table_clustered] from google.cloud import bigquery diff --git a/samples/load_table_dataframe.py b/samples/load_table_dataframe.py index 91dd6e9f0..6cd06f266 100644 --- a/samples/load_table_dataframe.py +++ b/samples/load_table_dataframe.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def load_table_dataframe(table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def load_table_dataframe(table_id: str) -> "bigquery.Table": # [START bigquery_load_table_dataframe] import datetime diff --git a/samples/load_table_file.py b/samples/load_table_file.py index b7e45dac3..81df368f0 100644 --- a/samples/load_table_file.py +++ b/samples/load_table_file.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def load_table_file(file_path, table_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def load_table_file(file_path: str, table_id: str) -> "bigquery.Table": # [START bigquery_load_from_file] from google.cloud import bigquery diff --git a/samples/load_table_uri_autodetect_csv.py b/samples/load_table_uri_autodetect_csv.py index 09a5d708d..c412c63f1 100644 --- a/samples/load_table_uri_autodetect_csv.py +++ b/samples/load_table_uri_autodetect_csv.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_autodetect_csv(table_id): +def load_table_uri_autodetect_csv(table_id: str) -> None: # [START bigquery_load_table_gcs_csv_autodetect] from google.cloud import bigquery diff --git a/samples/load_table_uri_autodetect_json.py b/samples/load_table_uri_autodetect_json.py index 61b7aab12..9d0bc3f22 100644 --- a/samples/load_table_uri_autodetect_json.py +++ b/samples/load_table_uri_autodetect_json.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_autodetect_json(table_id): +def load_table_uri_autodetect_json(table_id: str) -> None: # [START bigquery_load_table_gcs_json_autodetect] from google.cloud import bigquery diff --git a/samples/load_table_uri_avro.py b/samples/load_table_uri_avro.py index 5c25eed22..e9f7c39ed 100644 --- a/samples/load_table_uri_avro.py +++ b/samples/load_table_uri_avro.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_avro(table_id): +def load_table_uri_avro(table_id: str) -> None: # [START bigquery_load_table_gcs_avro] from google.cloud import bigquery diff --git a/samples/load_table_uri_cmek.py b/samples/load_table_uri_cmek.py index 8bd84993c..4dfc0d3b4 100644 --- a/samples/load_table_uri_cmek.py +++ b/samples/load_table_uri_cmek.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_cmek(table_id, kms_key_name): +def load_table_uri_cmek(table_id: str, kms_key_name: str) -> None: # [START bigquery_load_table_gcs_json_cmek] from google.cloud import bigquery diff --git a/samples/load_table_uri_csv.py b/samples/load_table_uri_csv.py index 0736a560c..9cb8c6f20 100644 --- a/samples/load_table_uri_csv.py +++ b/samples/load_table_uri_csv.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_csv(table_id): +def load_table_uri_csv(table_id: str) -> None: # [START bigquery_load_table_gcs_csv] from google.cloud import bigquery diff --git a/samples/load_table_uri_json.py b/samples/load_table_uri_json.py index 3c21972c8..409a83e8e 100644 --- a/samples/load_table_uri_json.py +++ b/samples/load_table_uri_json.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_json(table_id): +def load_table_uri_json(table_id: str) -> None: # [START bigquery_load_table_gcs_json] from google.cloud import bigquery diff --git a/samples/load_table_uri_orc.py b/samples/load_table_uri_orc.py index 3ab6ff45a..7babd2630 100644 --- a/samples/load_table_uri_orc.py +++ b/samples/load_table_uri_orc.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_orc(table_id): +def load_table_uri_orc(table_id: str) -> None: # [START bigquery_load_table_gcs_orc] from google.cloud import bigquery diff --git a/samples/load_table_uri_parquet.py b/samples/load_table_uri_parquet.py index 3dce5e8ef..6ea032f71 100644 --- a/samples/load_table_uri_parquet.py +++ b/samples/load_table_uri_parquet.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_parquet(table_id): +def load_table_uri_parquet(table_id: str) -> None: # [START bigquery_load_table_gcs_parquet] from google.cloud import bigquery diff --git a/samples/load_table_uri_truncate_avro.py b/samples/load_table_uri_truncate_avro.py index 1aa0aa49c..51c6636fa 100644 --- a/samples/load_table_uri_truncate_avro.py +++ b/samples/load_table_uri_truncate_avro.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_avro(table_id): +def load_table_uri_truncate_avro(table_id: str) -> None: # [START bigquery_load_table_gcs_avro_truncate] import io diff --git a/samples/load_table_uri_truncate_csv.py b/samples/load_table_uri_truncate_csv.py index 198cdc281..ee8b34043 100644 --- a/samples/load_table_uri_truncate_csv.py +++ b/samples/load_table_uri_truncate_csv.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_csv(table_id): +def load_table_uri_truncate_csv(table_id: str) -> None: # [START bigquery_load_table_gcs_csv_truncate] import io diff --git a/samples/load_table_uri_truncate_json.py b/samples/load_table_uri_truncate_json.py index d67d93e7b..e85e0808e 100644 --- a/samples/load_table_uri_truncate_json.py +++ b/samples/load_table_uri_truncate_json.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_json(table_id): +def load_table_uri_truncate_json(table_id: str) -> None: # [START bigquery_load_table_gcs_json_truncate] import io diff --git a/samples/load_table_uri_truncate_orc.py b/samples/load_table_uri_truncate_orc.py index 90543b791..c730099d1 100644 --- a/samples/load_table_uri_truncate_orc.py +++ b/samples/load_table_uri_truncate_orc.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_orc(table_id): +def load_table_uri_truncate_orc(table_id: str) -> None: # [START bigquery_load_table_gcs_orc_truncate] import io diff --git a/samples/load_table_uri_truncate_parquet.py b/samples/load_table_uri_truncate_parquet.py index e036fc180..3a0a55c8a 100644 --- a/samples/load_table_uri_truncate_parquet.py +++ b/samples/load_table_uri_truncate_parquet.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_truncate_parquet(table_id): +def load_table_uri_truncate_parquet(table_id: str) -> None: # [START bigquery_load_table_gcs_parquet_truncate] import io diff --git a/samples/magics/_helpers.py b/samples/magics/_helpers.py index 18a513b99..c7248ee3d 100644 --- a/samples/magics/_helpers.py +++ b/samples/magics/_helpers.py @@ -13,7 +13,7 @@ # limitations under the License. -def strip_region_tags(sample_text): +def strip_region_tags(sample_text: str) -> str: """Remove blank lines and region tags from sample text""" magic_lines = [ line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line diff --git a/samples/magics/conftest.py b/samples/magics/conftest.py index bf8602235..55ea30f90 100644 --- a/samples/magics/conftest.py +++ b/samples/magics/conftest.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Iterator + import pytest +if typing.TYPE_CHECKING: + from IPython.core.interactiveshell import TerminalInteractiveShell + interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") tools = pytest.importorskip("IPython.testing.tools") @pytest.fixture(scope="session") -def ipython(): +def ipython() -> "TerminalInteractiveShell": config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True shell = interactiveshell.TerminalInteractiveShell.instance(config=config) @@ -27,7 +33,9 @@ def ipython(): @pytest.fixture(autouse=True) -def ipython_interactive(ipython): +def ipython_interactive( + ipython: "TerminalInteractiveShell", +) -> Iterator["TerminalInteractiveShell"]: """Activate IPython's builtin hooks for the duration of the test scope. diff --git a/samples/magics/mypy.ini b/samples/magics/mypy.ini new file mode 100644 index 000000000..af328dc5e --- /dev/null +++ b/samples/magics/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-IPython.*,nox,noxfile_config,pandas] +ignore_missing_imports = True diff --git a/samples/magics/query.py b/samples/magics/query.py index c2739eace..4d3b4418b 100644 --- a/samples/magics/query.py +++ b/samples/magics/query.py @@ -12,12 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import IPython from . import _helpers +if typing.TYPE_CHECKING: + import pandas + -def query(): +def query() -> "pandas.DataFrame": ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") diff --git a/samples/magics/query_params_scalars.py b/samples/magics/query_params_scalars.py index a26f25aea..e833ef93b 100644 --- a/samples/magics/query_params_scalars.py +++ b/samples/magics/query_params_scalars.py @@ -12,12 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import IPython from . import _helpers +if typing.TYPE_CHECKING: + import pandas + -def query_with_parameters(): +def query_with_parameters() -> "pandas.DataFrame": ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") diff --git a/samples/magics/query_params_scalars_test.py b/samples/magics/query_params_scalars_test.py index 9b4159667..4f481cbe9 100644 --- a/samples/magics/query_params_scalars_test.py +++ b/samples/magics/query_params_scalars_test.py @@ -17,7 +17,7 @@ from . import query_params_scalars -def test_query_with_parameters(): +def test_query_with_parameters() -> None: df = query_params_scalars.query_with_parameters() assert isinstance(df, pandas.DataFrame) assert len(df) == 10 diff --git a/samples/magics/query_test.py b/samples/magics/query_test.py index d20797908..1aaa9c1bb 100644 --- a/samples/magics/query_test.py +++ b/samples/magics/query_test.py @@ -17,7 +17,7 @@ from . import query -def test_query(): +def test_query() -> None: df = query.query() assert isinstance(df, pandas.DataFrame) assert len(df) == 3 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 0d36904c4..70b7936cb 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -10,3 +10,4 @@ pandas==1.1.5; python_version < '3.7' pandas==1.3.4; python_version >= '3.7' pyarrow==6.0.0 pytz==2021.1 +typing-extensions==3.10.0.2 diff --git a/samples/mypy.ini b/samples/mypy.ini new file mode 100644 index 000000000..29757e47d --- /dev/null +++ b/samples/mypy.ini @@ -0,0 +1,12 @@ +[mypy] +# Should match DEFAULT_PYTHON_VERSION from root noxfile.py +python_version = 3.8 +exclude = noxfile\.py +strict = True +warn_unused_configs = True + +[mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*] +ignore_missing_imports = True + +[mypy-pandas,pyarrow,shapely.*,test_utils.*] +ignore_missing_imports = True diff --git a/samples/query_external_gcs_temporary_table.py b/samples/query_external_gcs_temporary_table.py index 3c3caf695..9bcb86aab 100644 --- a/samples/query_external_gcs_temporary_table.py +++ b/samples/query_external_gcs_temporary_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_external_gcs_temporary_table(): +def query_external_gcs_temporary_table() -> None: # [START bigquery_query_external_gcs_temp] from google.cloud import bigquery @@ -30,7 +30,9 @@ def query_external_gcs_temporary_table(): bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ] - external_config.options.skip_leading_rows = 1 + assert external_config.csv_options is not None + external_config.csv_options.skip_leading_rows = 1 + table_id = "us_states" job_config = bigquery.QueryJobConfig(table_definitions={table_id: external_config}) diff --git a/samples/query_external_sheets_permanent_table.py b/samples/query_external_sheets_permanent_table.py index 31143d1b0..a5855e66a 100644 --- a/samples/query_external_sheets_permanent_table.py +++ b/samples/query_external_sheets_permanent_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_external_sheets_permanent_table(dataset_id): +def query_external_sheets_permanent_table(dataset_id: str) -> None: # [START bigquery_query_external_sheets_perm] from google.cloud import bigquery @@ -56,8 +56,10 @@ def query_external_sheets_permanent_table(dataset_id): "/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing" ) external_config.source_uris = [sheet_url] - external_config.options.skip_leading_rows = 1 # Optionally skip header row. - external_config.options.range = ( + options = external_config.google_sheets_options + assert options is not None + options.skip_leading_rows = 1 # Optionally skip header row. + options.range = ( "us-states!A20:B49" # Optionally set range of the sheet to query from. ) table.external_data_configuration = external_config diff --git a/samples/query_external_sheets_temporary_table.py b/samples/query_external_sheets_temporary_table.py index a9d58e388..944d3b826 100644 --- a/samples/query_external_sheets_temporary_table.py +++ b/samples/query_external_sheets_temporary_table.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_external_sheets_temporary_table(): +def query_external_sheets_temporary_table() -> None: # [START bigquery_query_external_sheets_temp] # [START bigquery_auth_drive_scope] @@ -53,8 +53,10 @@ def query_external_sheets_temporary_table(): bigquery.SchemaField("name", "STRING"), bigquery.SchemaField("post_abbr", "STRING"), ] - external_config.options.skip_leading_rows = 1 # Optionally skip header row. - external_config.options.range = ( + options = external_config.google_sheets_options + assert options is not None + options.skip_leading_rows = 1 # Optionally skip header row. + options.range = ( "us-states!A20:B49" # Optionally set range of the sheet to query from. ) table_id = "us_states" diff --git a/samples/query_no_cache.py b/samples/query_no_cache.py index e380f0b15..f39c01dbc 100644 --- a/samples/query_no_cache.py +++ b/samples/query_no_cache.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_no_cache(): +def query_no_cache() -> None: # [START bigquery_query_no_cache] from google.cloud import bigquery diff --git a/samples/query_pagination.py b/samples/query_pagination.py index 57a4212cf..2e1654050 100644 --- a/samples/query_pagination.py +++ b/samples/query_pagination.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_pagination(): +def query_pagination() -> None: # [START bigquery_query_pagination] diff --git a/samples/query_script.py b/samples/query_script.py index 9390d352d..89ff55187 100644 --- a/samples/query_script.py +++ b/samples/query_script.py @@ -13,7 +13,7 @@ # limitations under the License. -def query_script(): +def query_script() -> None: # [START bigquery_query_script] from google.cloud import bigquery diff --git a/samples/query_to_arrow.py b/samples/query_to_arrow.py index 4a57992d1..157a93638 100644 --- a/samples/query_to_arrow.py +++ b/samples/query_to_arrow.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def query_to_arrow(): +if typing.TYPE_CHECKING: + import pyarrow + + +def query_to_arrow() -> "pyarrow.Table": # [START bigquery_query_to_arrow] diff --git a/samples/snippets/README.rst b/samples/snippets/README.rst index 7c3e19e68..05af1e812 100644 --- a/samples/snippets/README.rst +++ b/samples/snippets/README.rst @@ -1,4 +1,3 @@ - .. This file is automatically generated. Do not edit this file directly. Google BigQuery Python Samples @@ -16,11 +15,14 @@ This directory contains samples for Google BigQuery. `Google BigQuery`_ is Googl .. _Google BigQuery: https://cloud.google.com/bigquery/docs +To run the sample, you need to have the `BigQuery Admin` role. + + + Setup ------------------------------------------------------------------------------- - Authentication ++++++++++++++ @@ -31,9 +33,6 @@ credentials for applications. .. _Authentication Getting Started Guide: https://cloud.google.com/docs/authentication/getting-started - - - Install Dependencies ++++++++++++++++++++ @@ -64,15 +63,9 @@ Install Dependencies .. _pip: https://pip.pypa.io/ .. _virtualenv: https://virtualenv.pypa.io/ - - - - - Samples ------------------------------------------------------------------------------- - Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -89,8 +82,6 @@ To run this sample: $ python quickstart.py - - Simple Application +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -107,8 +98,6 @@ To run this sample: $ python simple_app.py - - User Credentials +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -124,7 +113,6 @@ To run this sample: $ python user_credentials.py - usage: user_credentials.py [-h] [--launch-browser] project Command-line application to run a query using user credentials. @@ -143,10 +131,6 @@ To run this sample: - - - - The client library ------------------------------------------------------------------------------- @@ -162,5 +146,4 @@ to `browse the source`_ and `report issues`_. https://github.com/GoogleCloudPlatform/google-cloud-python/issues - -.. _Google Cloud SDK: https://cloud.google.com/sdk/ +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/samples/snippets/authenticate_service_account.py b/samples/snippets/authenticate_service_account.py index c07848bee..e44766886 100644 --- a/samples/snippets/authenticate_service_account.py +++ b/samples/snippets/authenticate_service_account.py @@ -13,9 +13,13 @@ # limitations under the License. import os +import typing + +if typing.TYPE_CHECKING: + from google.cloud import bigquery -def main(): +def main() -> "bigquery.Client": key_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") # [START bigquery_client_json_credentials] diff --git a/samples/snippets/authenticate_service_account_test.py b/samples/snippets/authenticate_service_account_test.py index 131c69d2c..4b5711f80 100644 --- a/samples/snippets/authenticate_service_account_test.py +++ b/samples/snippets/authenticate_service_account_test.py @@ -12,19 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Any + import google.auth import authenticate_service_account +if typing.TYPE_CHECKING: + import pytest + -def mock_credentials(*args, **kwargs): +def mock_credentials(*args: Any, **kwargs: Any) -> google.auth.credentials.Credentials: credentials, _ = google.auth.default( ["https://www.googleapis.com/auth/cloud-platform"] ) return credentials -def test_main(monkeypatch): +def test_main(monkeypatch: "pytest.MonkeyPatch") -> None: monkeypatch.setattr( "google.oauth2.service_account.Credentials.from_service_account_file", mock_credentials, diff --git a/samples/snippets/authorized_view_tutorial.py b/samples/snippets/authorized_view_tutorial.py index b6a20c6ec..d3aeb1da1 100644 --- a/samples/snippets/authorized_view_tutorial.py +++ b/samples/snippets/authorized_view_tutorial.py @@ -14,12 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Optional -def run_authorized_view_tutorial(override_values={}): + +def run_authorized_view_tutorial( + override_values: Optional[Dict[str, str]] = None +) -> None: # Note to user: This is a group email for testing purposes. Replace with # your own group email address when running this code. analyst_group_email = "example-analyst-group@google.com" + if override_values is None: + override_values = {} + # [START bigquery_authorized_view_tutorial] # Create a source dataset # [START bigquery_avt_create_source_dataset] diff --git a/samples/snippets/authorized_view_tutorial_test.py b/samples/snippets/authorized_view_tutorial_test.py index eb247c5eb..cae870486 100644 --- a/samples/snippets/authorized_view_tutorial_test.py +++ b/samples/snippets/authorized_view_tutorial_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator, List import uuid from google.cloud import bigquery @@ -21,19 +22,21 @@ @pytest.fixture(scope="module") -def client(): +def client() -> bigquery.Client: return bigquery.Client() @pytest.fixture -def datasets_to_delete(client): - doomed = [] +def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: + doomed: List[str] = [] yield doomed for item in doomed: client.delete_dataset(item, delete_contents=True, not_found_ok=True) -def test_authorized_view_tutorial(client, datasets_to_delete): +def test_authorized_view_tutorial( + client: bigquery.Client, datasets_to_delete: List[str] +) -> None: override_values = { "source_dataset_id": "github_source_data_{}".format( str(uuid.uuid4()).replace("-", "_") diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 74984f902..3bbfe1c74 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator + from google.cloud import bigquery import pytest import test_utils.prefixer @@ -21,7 +23,7 @@ @pytest.fixture(scope="session", autouse=True) -def cleanup_datasets(bigquery_client: bigquery.Client): +def cleanup_datasets(bigquery_client: bigquery.Client) -> None: for dataset in bigquery_client.list_datasets(): if prefixer.should_cleanup(dataset.dataset_id): bigquery_client.delete_dataset( @@ -30,18 +32,18 @@ def cleanup_datasets(bigquery_client: bigquery.Client): @pytest.fixture(scope="session") -def bigquery_client(): +def bigquery_client() -> bigquery.Client: bigquery_client = bigquery.Client() return bigquery_client @pytest.fixture(scope="session") -def project_id(bigquery_client): +def project_id(bigquery_client: bigquery.Client) -> str: return bigquery_client.project @pytest.fixture(scope="session") -def dataset_id(bigquery_client: bigquery.Client, project_id: str): +def dataset_id(bigquery_client: bigquery.Client, project_id: str) -> Iterator[str]: dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) @@ -51,7 +53,14 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): @pytest.fixture(scope="session") -def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): +def entity_id(bigquery_client: bigquery.Client, dataset_id: str) -> str: + return "cloud-developer-relations@google.com" + + +@pytest.fixture(scope="session") +def dataset_id_us_east1( + bigquery_client: bigquery.Client, project_id: str, +) -> Iterator[str]: dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) @@ -64,7 +73,7 @@ def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): @pytest.fixture(scope="session") def table_id_us_east1( bigquery_client: bigquery.Client, project_id: str, dataset_id_us_east1: str -): +) -> Iterator[str]: table_id = prefixer.create_prefix() full_table_id = f"{project_id}.{dataset_id_us_east1}.{table_id}" table = bigquery.Table( @@ -76,7 +85,9 @@ def table_id_us_east1( @pytest.fixture -def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): +def random_table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: """Create a new table ID each time, so random_table_id can be used as target for load jobs. """ @@ -87,5 +98,7 @@ def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_i @pytest.fixture -def bigquery_client_patch(monkeypatch, bigquery_client): +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/create_table_external_hive_partitioned.py b/samples/snippets/create_table_external_hive_partitioned.py index 2ff8a2220..1170c57da 100644 --- a/samples/snippets/create_table_external_hive_partitioned.py +++ b/samples/snippets/create_table_external_hive_partitioned.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def create_table_external_hive_partitioned(table_id: str): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_table_external_hive_partitioned(table_id: str) -> "bigquery.Table": original_table_id = table_id # [START bigquery_create_table_external_hivepartitioned] # Demonstrates creating an external table with hive partitioning. diff --git a/samples/snippets/create_table_external_hive_partitioned_test.py b/samples/snippets/create_table_external_hive_partitioned_test.py index c3cdddb55..3ff39c881 100644 --- a/samples/snippets/create_table_external_hive_partitioned_test.py +++ b/samples/snippets/create_table_external_hive_partitioned_test.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import create_table_external_hive_partitioned +if typing.TYPE_CHECKING: + import pytest + -def test_create_table_external_hive_partitioned(capsys, random_table_id): +def test_create_table_external_hive_partitioned( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: table = create_table_external_hive_partitioned.create_table_external_hive_partitioned( random_table_id ) diff --git a/samples/snippets/dataset_access_test.py b/samples/snippets/dataset_access_test.py new file mode 100644 index 000000000..4d1a70eb1 --- /dev/null +++ b/samples/snippets/dataset_access_test.py @@ -0,0 +1,59 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import revoke_dataset_access +import update_dataset_access + +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + + +def test_dataset_access_permissions( + capsys: "pytest.CaptureFixture[str]", + dataset_id: str, + entity_id: str, + bigquery_client: "bigquery.Client", +) -> None: + original_dataset = bigquery_client.get_dataset(dataset_id) + update_dataset_access.update_dataset_access(dataset_id, entity_id) + full_dataset_id = "{}.{}".format( + original_dataset.project, original_dataset.dataset_id + ) + + out, err = capsys.readouterr() + assert ( + "Updated dataset '{}' with modified user permissions.".format(full_dataset_id) + in out + ) + + updated_dataset = bigquery_client.get_dataset(dataset_id) + updated_dataset_entries = list(updated_dataset.access_entries) + updated_dataset_entity_ids = {entry.entity_id for entry in updated_dataset_entries} + assert entity_id in updated_dataset_entity_ids + revoke_dataset_access.revoke_dataset_access(dataset_id, entity_id) + revoked_dataset = bigquery_client.get_dataset(dataset_id) + revoked_dataset_entries = list(revoked_dataset.access_entries) + + full_dataset_id = f"{updated_dataset.project}.{updated_dataset.dataset_id}" + out, err = capsys.readouterr() + assert ( + f"Revoked dataset access for '{entity_id}' to ' dataset '{full_dataset_id}.'" + in out + ) + assert len(revoked_dataset_entries) == len(updated_dataset_entries) - 1 + revoked_dataset_entity_ids = {entry.entity_id for entry in revoked_dataset_entries} + assert entity_id not in revoked_dataset_entity_ids diff --git a/samples/snippets/delete_job.py b/samples/snippets/delete_job.py index abed0c90d..7c8640baf 100644 --- a/samples/snippets/delete_job.py +++ b/samples/snippets/delete_job.py @@ -13,7 +13,7 @@ # limitations under the License. -def delete_job_metadata(job_id: str, location: str): +def delete_job_metadata(job_id: str, location: str) -> None: orig_job_id = job_id orig_location = location # [START bigquery_delete_job] diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py index c9baa817d..0bc83e4a6 100644 --- a/samples/snippets/delete_job_test.py +++ b/samples/snippets/delete_job_test.py @@ -12,14 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery import delete_job +if typing.TYPE_CHECKING: + import pytest + def test_delete_job_metadata( - capsys, bigquery_client: bigquery.Client, table_id_us_east1: str -): + capsys: "pytest.CaptureFixture[str]", + bigquery_client: bigquery.Client, + table_id_us_east1: str, +) -> None: query_job: bigquery.QueryJob = bigquery_client.query( f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1", ) diff --git a/samples/snippets/jupyter_tutorial_test.py b/samples/snippets/jupyter_tutorial_test.py index 7fe1cde85..9d42a4eda 100644 --- a/samples/snippets/jupyter_tutorial_test.py +++ b/samples/snippets/jupyter_tutorial_test.py @@ -11,8 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import typing +from typing import Iterator + import pytest +if typing.TYPE_CHECKING: + from IPython.terminal.interactiveshell import TerminalInteractiveShell + IPython = pytest.importorskip("IPython") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") tools = pytest.importorskip("IPython.testing.tools") @@ -23,7 +30,7 @@ @pytest.fixture(scope="session") -def ipython(): +def ipython() -> "TerminalInteractiveShell": config = tools.default_config() config.TerminalInteractiveShell.simple_prompt = True shell = interactiveshell.TerminalInteractiveShell.instance(config=config) @@ -31,7 +38,9 @@ def ipython(): @pytest.fixture() -def ipython_interactive(request, ipython): +def ipython_interactive( + request: pytest.FixtureRequest, ipython: "TerminalInteractiveShell" +) -> Iterator["TerminalInteractiveShell"]: """Activate IPython's builtin hooks for the duration of the test scope. @@ -40,7 +49,7 @@ def ipython_interactive(request, ipython): yield ipython -def _strip_region_tags(sample_text): +def _strip_region_tags(sample_text: str) -> str: """Remove blank lines and region tags from sample text""" magic_lines = [ line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line @@ -48,7 +57,7 @@ def _strip_region_tags(sample_text): return "\n".join(magic_lines) -def test_jupyter_tutorial(ipython): +def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None: matplotlib.use("agg") ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") diff --git a/samples/snippets/load_table_uri_firestore.py b/samples/snippets/load_table_uri_firestore.py index bf9d01349..6c33fd0ff 100644 --- a/samples/snippets/load_table_uri_firestore.py +++ b/samples/snippets/load_table_uri_firestore.py @@ -13,7 +13,7 @@ # limitations under the License. -def load_table_uri_firestore(table_id): +def load_table_uri_firestore(table_id: str) -> None: orig_table_id = table_id # [START bigquery_load_table_gcs_firestore] # TODO(developer): Set table_id to the ID of the table to create. diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py index ffa02cdf9..552fa2e35 100644 --- a/samples/snippets/load_table_uri_firestore_test.py +++ b/samples/snippets/load_table_uri_firestore_test.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import load_table_uri_firestore +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_firestore(capsys, random_table_id): +def test_load_table_uri_firestore( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_firestore.load_table_uri_firestore(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/snippets/materialized_view.py b/samples/snippets/materialized_view.py index 429bd98b4..adb3688a4 100644 --- a/samples/snippets/materialized_view.py +++ b/samples/snippets/materialized_view.py @@ -12,8 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Dict, Optional + +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def create_materialized_view( + override_values: Optional[Dict[str, str]] = None +) -> "bigquery.Table": + if override_values is None: + override_values = {} -def create_materialized_view(override_values={}): # [START bigquery_create_materialized_view] from google.cloud import bigquery @@ -41,7 +52,12 @@ def create_materialized_view(override_values={}): return view -def update_materialized_view(override_values={}): +def update_materialized_view( + override_values: Optional[Dict[str, str]] = None +) -> "bigquery.Table": + if override_values is None: + override_values = {} + # [START bigquery_update_materialized_view] import datetime from google.cloud import bigquery @@ -69,7 +85,10 @@ def update_materialized_view(override_values={}): return view -def delete_materialized_view(override_values={}): +def delete_materialized_view(override_values: Optional[Dict[str, str]] = None) -> None: + if override_values is None: + override_values = {} + # [START bigquery_delete_materialized_view] from google.cloud import bigquery diff --git a/samples/snippets/materialized_view_test.py b/samples/snippets/materialized_view_test.py index 75c6b2106..70869346f 100644 --- a/samples/snippets/materialized_view_test.py +++ b/samples/snippets/materialized_view_test.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +from typing import Iterator import uuid from google.api_core import exceptions @@ -22,18 +23,20 @@ import materialized_view -def temp_suffix(): +def temp_suffix() -> str: now = datetime.datetime.now() return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(autouse=True) -def bigquery_client_patch(monkeypatch, bigquery_client): +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) @pytest.fixture(scope="module") -def dataset_id(bigquery_client): +def dataset_id(bigquery_client: bigquery.Client) -> Iterator[str]: dataset_id = f"mvdataset_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -41,7 +44,9 @@ def dataset_id(bigquery_client): @pytest.fixture(scope="module") -def base_table_id(bigquery_client, project_id, dataset_id): +def base_table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: base_table_id = f"{project_id}.{dataset_id}.base_{temp_suffix()}" # Schema from materialized views guide: # https://cloud.google.com/bigquery/docs/materialized-views#create @@ -56,13 +61,20 @@ def base_table_id(bigquery_client, project_id, dataset_id): @pytest.fixture(scope="module") -def view_id(bigquery_client, project_id, dataset_id): +def view_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: view_id = f"{project_id}.{dataset_id}.mview_{temp_suffix()}" yield view_id bigquery_client.delete_table(view_id, not_found_ok=True) -def test_materialized_view(capsys, bigquery_client, base_table_id, view_id): +def test_materialized_view( + capsys: pytest.CaptureFixture[str], + bigquery_client: bigquery.Client, + base_table_id: str, + view_id: str, +) -> None: override_values = { "base_table_id": base_table_id, "view_id": view_id, diff --git a/samples/snippets/mypy.ini b/samples/snippets/mypy.ini new file mode 100644 index 000000000..3cc4b8965 --- /dev/null +++ b/samples/snippets/mypy.ini @@ -0,0 +1,8 @@ +[mypy] +; We require type annotations in all samples. +strict = True +exclude = noxfile\.py +warn_unused_configs = True + +[mypy-google.auth,google.oauth2,google_auth_oauthlib,IPython.*,test_utils.*] +ignore_missing_imports = True diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py index ed08b279a..b330a3c21 100644 --- a/samples/snippets/natality_tutorial.py +++ b/samples/snippets/natality_tutorial.py @@ -14,8 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Optional + + +def run_natality_tutorial(override_values: Optional[Dict[str, str]] = None) -> None: + if override_values is None: + override_values = {} -def run_natality_tutorial(override_values={}): # [START bigquery_query_natality_tutorial] """Create a Google BigQuery linear regression input table. diff --git a/samples/snippets/natality_tutorial_test.py b/samples/snippets/natality_tutorial_test.py index d9c89bef2..f56738528 100644 --- a/samples/snippets/natality_tutorial_test.py +++ b/samples/snippets/natality_tutorial_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator, List import uuid from google.cloud import bigquery @@ -21,19 +22,21 @@ @pytest.fixture(scope="module") -def client(): +def client() -> bigquery.Client: return bigquery.Client() @pytest.fixture -def datasets_to_delete(client): - doomed = [] +def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: + doomed: List[str] = [] yield doomed for item in doomed: client.delete_dataset(item, delete_contents=True) -def test_natality_tutorial(client, datasets_to_delete): +def test_natality_tutorial( + client: bigquery.Client, datasets_to_delete: List[str] +) -> None: override_values = { "dataset_id": "natality_regression_{}".format( str(uuid.uuid4()).replace("-", "_") diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py index 1b0ef5b3a..f9628da7d 100644 --- a/samples/snippets/quickstart.py +++ b/samples/snippets/quickstart.py @@ -14,8 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Dict, Optional + + +def run_quickstart(override_values: Optional[Dict[str, str]] = None) -> None: + + if override_values is None: + override_values = {} -def run_quickstart(override_values={}): # [START bigquery_quickstart] # Imports the Google Cloud client library from google.cloud import bigquery diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index a5e3a13e3..b0bad5ee5 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator, List import uuid from google.cloud import bigquery @@ -26,19 +27,23 @@ @pytest.fixture(scope="module") -def client(): +def client() -> bigquery.Client: return bigquery.Client() @pytest.fixture -def datasets_to_delete(client): - doomed = [] +def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]: + doomed: List[str] = [] yield doomed for item in doomed: client.delete_dataset(item, delete_contents=True) -def test_quickstart(capsys, client, datasets_to_delete): +def test_quickstart( + capsys: "pytest.CaptureFixture[str]", + client: bigquery.Client, + datasets_to_delete: List[str], +) -> None: override_values = { "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")), diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4f04611ba..32ac9fbe9 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -10,3 +10,4 @@ pandas==1.1.5; python_version < '3.7' pandas==1.3.4; python_version >= '3.7' pyarrow==6.0.0 pytz==2021.1 +typing-extensions==3.10.0.2 diff --git a/samples/snippets/revoke_dataset_access.py b/samples/snippets/revoke_dataset_access.py new file mode 100644 index 000000000..c8cb731ac --- /dev/null +++ b/samples/snippets/revoke_dataset_access.py @@ -0,0 +1,52 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def revoke_dataset_access(dataset_id: str, entity_id: str) -> None: + original_dataset_id = dataset_id + original_entity_id = entity_id + + # [START bigquery_revoke_dataset_access] + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + dataset_id = "your-project.your_dataset" + + # TODO(developer): Set entity_id to the ID of the email or group from whom you are revoking access. + entity_id = "user-or-group-to-remove@example.com" + # [END bigquery_revoke_dataset_access] + dataset_id = original_dataset_id + entity_id = original_entity_id + # [START bigquery_revoke_dataset_access] + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + dataset = client.get_dataset(dataset_id) # Make an API request. + + entries = list(dataset.access_entries) + dataset.access_entries = [ + entry for entry in entries if entry.entity_id != entity_id + ] + + dataset = client.update_dataset( + dataset, + # Update just the `access_entries` property of the dataset. + ["access_entries"], + ) # Make an API request. + + full_dataset_id = f"{dataset.project}.{dataset.dataset_id}" + print(f"Revoked dataset access for '{entity_id}' to ' dataset '{full_dataset_id}.'") + # [END bigquery_revoke_dataset_access] diff --git a/samples/snippets/simple_app.py b/samples/snippets/simple_app.py index c21ae86f4..3d856d4bb 100644 --- a/samples/snippets/simple_app.py +++ b/samples/snippets/simple_app.py @@ -22,7 +22,7 @@ # [END bigquery_simple_app_deps] -def query_stackoverflow(): +def query_stackoverflow() -> None: # [START bigquery_simple_app_client] client = bigquery.Client() # [END bigquery_simple_app_client] diff --git a/samples/snippets/simple_app_test.py b/samples/snippets/simple_app_test.py index 5c608e1fd..de4e1ce34 100644 --- a/samples/snippets/simple_app_test.py +++ b/samples/snippets/simple_app_test.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import simple_app +if typing.TYPE_CHECKING: + import pytest + -def test_query_stackoverflow(capsys): +def test_query_stackoverflow(capsys: "pytest.CaptureFixture[str]") -> None: simple_app.query_stackoverflow() out, _ = capsys.readouterr() assert "views" in out diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py index 912fd76e2..ef5ec196a 100644 --- a/samples/snippets/test_update_with_dml.py +++ b/samples/snippets/test_update_with_dml.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Iterator + from google.cloud import bigquery import pytest @@ -20,14 +22,18 @@ @pytest.fixture -def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): +def table_id( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +) -> Iterator[str]: table_id = f"{prefixer.create_prefix()}_update_with_dml" yield table_id full_table_id = f"{project_id}.{dataset_id}.{table_id}" bigquery_client.delete_table(full_table_id, not_found_ok=True) -def test_update_with_dml(bigquery_client_patch, dataset_id, table_id): +def test_update_with_dml( + bigquery_client_patch: None, dataset_id: str, table_id: str +) -> None: override_values = { "dataset_id": dataset_id, "table_id": table_id, diff --git a/samples/snippets/update_dataset_access.py b/samples/snippets/update_dataset_access.py new file mode 100644 index 000000000..4d66b8b1f --- /dev/null +++ b/samples/snippets/update_dataset_access.py @@ -0,0 +1,70 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_dataset_access(dataset_id: str, entity_id: str) -> None: + original_dataset_id = dataset_id + original_entity_id = entity_id + + # [START bigquery_update_dataset_access] + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + dataset_id = "your-project.your_dataset" + + # TODO(developer): Set entity_id to the ID of the email or group from whom + # you are adding access. Alternatively, to the JSON REST API representation + # of the entity, such as a view's table reference. + entity_id = "user-or-group-to-add@example.com" + + # TODO(developer): Set entity_type to the type of entity you are granting access to. + # Common types include: + # + # * "userByEmail" -- A single user or service account. For example "fred@example.com" + # * "groupByEmail" -- A group of users. For example "example@googlegroups.com" + # * "view" -- An authorized view. For example + # {"projectId": "p", "datasetId": "d", "tableId": "v"} + # + # For a complete reference, see the REST API reference documentation: + # https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#Dataset.FIELDS.access + entity_type = "groupByEmail" + + # TODO(developer): Set role to a one of the "Basic roles for datasets" + # described here: + # https://cloud.google.com/bigquery/docs/access-control-basic-roles#dataset-basic-roles + role = "READER" + # [END bigquery_update_dataset_access] + dataset_id = original_dataset_id + entity_id = original_entity_id + # [START bigquery_update_dataset_access] + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + dataset = client.get_dataset(dataset_id) # Make an API request. + + entries = list(dataset.access_entries) + entries.append( + bigquery.AccessEntry(role=role, entity_type=entity_type, entity_id=entity_id,) + ) + dataset.access_entries = entries + + dataset = client.update_dataset(dataset, ["access_entries"]) # Make an API request. + + full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id) + print( + "Updated dataset '{}' with modified user permissions.".format(full_dataset_id) + ) + # [END bigquery_update_dataset_access] diff --git a/samples/snippets/update_with_dml.py b/samples/snippets/update_with_dml.py index 7fd09dd80..2d0294ead 100644 --- a/samples/snippets/update_with_dml.py +++ b/samples/snippets/update_with_dml.py @@ -14,6 +14,7 @@ # [START bigquery_update_with_dml] import pathlib +from typing import Dict, Optional from google.cloud import bigquery from google.cloud.bigquery import enums @@ -25,7 +26,7 @@ def load_from_newline_delimited_json( project_id: str, dataset_id: str, table_id: str, -): +) -> None: full_table_id = f"{project_id}.{dataset_id}.{table_id}" job_config = bigquery.LoadJobConfig() job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON @@ -48,7 +49,7 @@ def load_from_newline_delimited_json( def update_with_dml( client: bigquery.Client, project_id: str, dataset_id: str, table_id: str -): +) -> int: query_text = f""" UPDATE `{project_id}.{dataset_id}.{table_id}` SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0") @@ -59,11 +60,16 @@ def update_with_dml( # Wait for query job to finish. query_job.result() + assert query_job.num_dml_affected_rows is not None + print(f"DML query modified {query_job.num_dml_affected_rows} rows.") return query_job.num_dml_affected_rows -def run_sample(override_values={}): +def run_sample(override_values: Optional[Dict[str, str]] = None) -> int: + if override_values is None: + override_values = {} + client = bigquery.Client() filepath = pathlib.Path(__file__).parent / "user_sessions_data.json" project_id = client.project diff --git a/samples/snippets/user_credentials.py b/samples/snippets/user_credentials.py index 6089d9fd9..dcd498c42 100644 --- a/samples/snippets/user_credentials.py +++ b/samples/snippets/user_credentials.py @@ -23,7 +23,7 @@ import argparse -def main(project, launch_browser=True): +def main(project: str, launch_browser: bool = True) -> None: # [START bigquery_auth_user_flow] from google_auth_oauthlib import flow diff --git a/samples/snippets/user_credentials_test.py b/samples/snippets/user_credentials_test.py index 829502d25..79ebb2538 100644 --- a/samples/snippets/user_credentials_test.py +++ b/samples/snippets/user_credentials_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +from typing import Iterator, Union import google.auth import mock @@ -23,9 +24,11 @@ PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] +MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock] + @pytest.fixture -def mock_flow(): +def mock_flow() -> Iterator[MockType]: flow_patch = mock.patch("google_auth_oauthlib.flow.InstalledAppFlow", autospec=True) with flow_patch as flow_mock: @@ -34,7 +37,9 @@ def mock_flow(): yield flow_mock -def test_auth_query_console(mock_flow, capsys): +def test_auth_query_console( + mock_flow: MockType, capsys: pytest.CaptureFixture[str] +) -> None: main(PROJECT, launch_browser=False) out, _ = capsys.readouterr() # Fun fact: William P. Wood was the 1st director of the US Secret Service. diff --git a/samples/snippets/view.py b/samples/snippets/view.py index ad3f11717..5e976f68a 100644 --- a/samples/snippets/view.py +++ b/samples/snippets/view.py @@ -12,8 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing +from typing import Dict, Optional, Tuple + +try: + from typing import TypedDict +except ImportError: + from typing_extensions import TypedDict + +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +class OverridesDict(TypedDict, total=False): + analyst_group_email: str + view_dataset_id: str + view_id: str + view_reference: Dict[str, str] + source_dataset_id: str + source_id: str + + +def create_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.Table": + if override_values is None: + override_values = {} -def create_view(override_values={}): # [START bigquery_create_view] from google.cloud import bigquery @@ -43,7 +66,10 @@ def create_view(override_values={}): return view -def get_view(override_values={}): +def get_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.Table": + if override_values is None: + override_values = {} + # [START bigquery_get_view] from google.cloud import bigquery @@ -65,7 +91,10 @@ def get_view(override_values={}): return view -def update_view(override_values={}): +def update_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.Table": + if override_values is None: + override_values = {} + # [START bigquery_update_view_query] from google.cloud import bigquery @@ -95,7 +124,13 @@ def update_view(override_values={}): return view -def grant_access(override_values={}): +def grant_access( + override_values: Optional[OverridesDict] = None, +) -> Tuple["bigquery.Dataset", "bigquery.Dataset"]: + + if override_values is None: + override_values = {} + # [START bigquery_grant_view_access] from google.cloud import bigquery diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py index 77105b61a..4d0d43b77 100644 --- a/samples/snippets/view_test.py +++ b/samples/snippets/view_test.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +from typing import Iterator import uuid from google.cloud import bigquery @@ -21,18 +22,20 @@ import view -def temp_suffix(): +def temp_suffix() -> str: now = datetime.datetime.now() return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(autouse=True) -def bigquery_client_patch(monkeypatch, bigquery_client): +def bigquery_client_patch( + monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client +) -> None: monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) @pytest.fixture(scope="module") -def view_dataset_id(bigquery_client, project_id): +def view_dataset_id(bigquery_client: bigquery.Client, project_id: str) -> Iterator[str]: dataset_id = f"{project_id}.view_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -40,14 +43,16 @@ def view_dataset_id(bigquery_client, project_id): @pytest.fixture(scope="module") -def view_id(bigquery_client, view_dataset_id): +def view_id(bigquery_client: bigquery.Client, view_dataset_id: str) -> Iterator[str]: view_id = f"{view_dataset_id}.my_view" yield view_id bigquery_client.delete_table(view_id, not_found_ok=True) @pytest.fixture(scope="module") -def source_dataset_id(bigquery_client, project_id): +def source_dataset_id( + bigquery_client: bigquery.Client, project_id: str +) -> Iterator[str]: dataset_id = f"{project_id}.view_{temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id @@ -55,7 +60,9 @@ def source_dataset_id(bigquery_client, project_id): @pytest.fixture(scope="module") -def source_table_id(bigquery_client, source_dataset_id): +def source_table_id( + bigquery_client: bigquery.Client, source_dataset_id: str +) -> Iterator[str]: source_table_id = f"{source_dataset_id}.us_states" job_config = bigquery.LoadJobConfig( schema=[ @@ -74,7 +81,13 @@ def source_table_id(bigquery_client, source_dataset_id): bigquery_client.delete_table(source_table_id, not_found_ok=True) -def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_id): +def test_view( + capsys: pytest.CaptureFixture[str], + view_id: str, + view_dataset_id: str, + source_table_id: str, + source_dataset_id: str, +) -> None: override_values = { "view_id": view_id, "source_id": source_table_id, @@ -99,7 +112,7 @@ def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_ assert view_id in out project_id, dataset_id, table_id = view_id.split(".") - override_values = { + overrides: view.OverridesDict = { "analyst_group_email": "cloud-dpes-bigquery@google.com", "view_dataset_id": view_dataset_id, "source_dataset_id": source_dataset_id, @@ -109,7 +122,7 @@ def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_ "tableId": table_id, }, } - view_dataset, source_dataset = view.grant_access(override_values) + view_dataset, source_dataset = view.grant_access(overrides) assert len(view_dataset.access_entries) != 0 assert len(source_dataset.access_entries) != 0 out, _ = capsys.readouterr() diff --git a/samples/table_exists.py b/samples/table_exists.py index 152d95534..6edba9239 100644 --- a/samples/table_exists.py +++ b/samples/table_exists.py @@ -13,7 +13,7 @@ # limitations under the License. -def table_exists(table_id): +def table_exists(table_id: str) -> None: # [START bigquery_table_exists] from google.cloud import bigquery diff --git a/samples/table_insert_rows.py b/samples/table_insert_rows.py index 24d739871..897133330 100644 --- a/samples/table_insert_rows.py +++ b/samples/table_insert_rows.py @@ -13,7 +13,7 @@ # limitations under the License. -def table_insert_rows(table_id): +def table_insert_rows(table_id: str) -> None: # [START bigquery_table_insert_rows] from google.cloud import bigquery diff --git a/samples/table_insert_rows_explicit_none_insert_ids.py b/samples/table_insert_rows_explicit_none_insert_ids.py index d91792b82..1ccb1acc4 100644 --- a/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/samples/table_insert_rows_explicit_none_insert_ids.py @@ -13,7 +13,7 @@ # limitations under the License. -def table_insert_rows_explicit_none_insert_ids(table_id): +def table_insert_rows_explicit_none_insert_ids(table_id: str) -> None: # [START bigquery_table_insert_rows_explicit_none_insert_ids] from google.cloud import bigquery diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index 4764a571f..b7a2ad587 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +from typing import Iterator import uuid import google.auth @@ -23,7 +24,7 @@ @pytest.fixture(scope="session", autouse=True) -def client(): +def client() -> bigquery.Client: credentials, project = google.auth.default( scopes=[ "https://www.googleapis.com/auth/drive", @@ -33,12 +34,12 @@ def client(): real_client = bigquery.Client(credentials=credentials, project=project) mock_client = mock.create_autospec(bigquery.Client) mock_client.return_value = real_client - bigquery.Client = mock_client + bigquery.Client = mock_client # type: ignore return real_client @pytest.fixture -def random_table_id(dataset_id): +def random_table_id(dataset_id: str) -> str: now = datetime.datetime.now() random_table_id = "example_table_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -47,7 +48,7 @@ def random_table_id(dataset_id): @pytest.fixture -def random_dataset_id(client): +def random_dataset_id(client: bigquery.Client) -> Iterator[str]: now = datetime.datetime.now() random_dataset_id = "example_dataset_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -57,7 +58,7 @@ def random_dataset_id(client): @pytest.fixture -def random_routine_id(dataset_id): +def random_routine_id(dataset_id: str) -> str: now = datetime.datetime.now() random_routine_id = "example_routine_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -66,7 +67,7 @@ def random_routine_id(dataset_id): @pytest.fixture -def dataset_id(client): +def dataset_id(client: bigquery.Client) -> Iterator[str]: now = datetime.datetime.now() dataset_id = "python_dataset_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -77,7 +78,7 @@ def dataset_id(client): @pytest.fixture -def table_id(client, dataset_id): +def table_id(client: bigquery.Client, dataset_id: str) -> Iterator[str]: now = datetime.datetime.now() table_id = "python_table_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -90,7 +91,7 @@ def table_id(client, dataset_id): @pytest.fixture -def table_with_schema_id(client, dataset_id): +def table_with_schema_id(client: bigquery.Client, dataset_id: str) -> Iterator[str]: now = datetime.datetime.now() table_id = "python_table_with_schema_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -106,12 +107,12 @@ def table_with_schema_id(client, dataset_id): @pytest.fixture -def table_with_data_id(): +def table_with_data_id() -> str: return "bigquery-public-data.samples.shakespeare" @pytest.fixture -def routine_id(client, dataset_id): +def routine_id(client: bigquery.Client, dataset_id: str) -> Iterator[str]: now = datetime.datetime.now() routine_id = "python_routine_sample_{}_{}".format( now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] @@ -136,7 +137,7 @@ def routine_id(client, dataset_id): @pytest.fixture -def model_id(client, dataset_id): +def model_id(client: bigquery.Client, dataset_id: str) -> str: model_id = "{}.{}".format(dataset_id, uuid.uuid4().hex) # The only way to create a model resource is via SQL. @@ -162,5 +163,5 @@ def model_id(client, dataset_id): @pytest.fixture -def kms_key_name(): +def kms_key_name() -> str: return "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test" diff --git a/samples/tests/test_add_empty_column.py b/samples/tests/test_add_empty_column.py index d89fcb6b7..5c7184766 100644 --- a/samples/tests/test_add_empty_column.py +++ b/samples/tests/test_add_empty_column.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import add_empty_column +if typing.TYPE_CHECKING: + import pytest + -def test_add_empty_column(capsys, table_id): +def test_add_empty_column(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: add_empty_column.add_empty_column(table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_browse_table_data.py b/samples/tests/test_browse_table_data.py index a5f647bdb..368e5cad6 100644 --- a/samples/tests/test_browse_table_data.py +++ b/samples/tests/test_browse_table_data.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import browse_table_data +if typing.TYPE_CHECKING: + import pytest + -def test_browse_table_data(capsys, table_with_data_id): +def test_browse_table_data( + capsys: "pytest.CaptureFixture[str]", table_with_data_id: str +) -> None: browse_table_data.browse_table_data(table_with_data_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_client_list_jobs.py b/samples/tests/test_client_list_jobs.py index 896950a82..a2845b7ad 100644 --- a/samples/tests/test_client_list_jobs.py +++ b/samples/tests/test_client_list_jobs.py @@ -12,11 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_list_jobs from .. import create_job +if typing.TYPE_CHECKING: + from google.cloud import bigquery + import pytest + -def test_client_list_jobs(capsys, client): +def test_client_list_jobs( + capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client" +) -> None: job = create_job.create_job() client.cancel_job(job.job_id) diff --git a/samples/tests/test_client_load_partitioned_table.py b/samples/tests/test_client_load_partitioned_table.py index f1d72a858..24f86c700 100644 --- a/samples/tests/test_client_load_partitioned_table.py +++ b/samples/tests/test_client_load_partitioned_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_load_partitioned_table +if typing.TYPE_CHECKING: + import pytest + -def test_client_load_partitioned_table(capsys, random_table_id): +def test_client_load_partitioned_table( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: client_load_partitioned_table.client_load_partitioned_table(random_table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query.py b/samples/tests/test_client_query.py index 810c46a17..a8e3c343e 100644 --- a/samples/tests/test_client_query.py +++ b/samples/tests/test_client_query.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query +if typing.TYPE_CHECKING: + import pytest + -def test_client_query(capsys,): +def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None: client_query.client_query() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_add_column.py b/samples/tests/test_client_query_add_column.py index 254533f78..1eb5a1ed6 100644 --- a/samples/tests/test_client_query_add_column.py +++ b/samples/tests/test_client_query_add_column.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import client_query_add_column +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_add_column(capsys, random_table_id, client): +def test_client_query_add_column( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/samples/tests/test_client_query_batch.py b/samples/tests/test_client_query_batch.py index c5e19985d..548fe3ac3 100644 --- a/samples/tests/test_client_query_batch.py +++ b/samples/tests/test_client_query_batch.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_batch +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_batch(capsys,): +def test_client_query_batch(capsys: "pytest.CaptureFixture[str]") -> None: job = client_query_batch.client_query_batch() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_destination_table.py b/samples/tests/test_client_query_destination_table.py index 6bcdd498a..067bc16ec 100644 --- a/samples/tests/test_client_query_destination_table.py +++ b/samples/tests/test_client_query_destination_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table(capsys, table_id): +def test_client_query_destination_table( + capsys: "pytest.CaptureFixture[str]", table_id: str +) -> None: client_query_destination_table.client_query_destination_table(table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py index b4bdd588c..02b131531 100644 --- a/samples/tests/test_client_query_destination_table_clustered.py +++ b/samples/tests/test_client_query_destination_table_clustered.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table_clustered +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table_clustered(capsys, random_table_id): +def test_client_query_destination_table_clustered( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: client_query_destination_table_clustered.client_query_destination_table_clustered( random_table_id diff --git a/samples/tests/test_client_query_destination_table_cmek.py b/samples/tests/test_client_query_destination_table_cmek.py index 4f9e3bc9a..f2fe3bc39 100644 --- a/samples/tests/test_client_query_destination_table_cmek.py +++ b/samples/tests/test_client_query_destination_table_cmek.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table_cmek +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table_cmek(capsys, random_table_id, kms_key_name): +def test_client_query_destination_table_cmek( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str +) -> None: client_query_destination_table_cmek.client_query_destination_table_cmek( random_table_id, kms_key_name diff --git a/samples/tests/test_client_query_destination_table_legacy.py b/samples/tests/test_client_query_destination_table_legacy.py index 46077497b..0071ee4a4 100644 --- a/samples/tests/test_client_query_destination_table_legacy.py +++ b/samples/tests/test_client_query_destination_table_legacy.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_destination_table_legacy +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_destination_table_legacy(capsys, random_table_id): +def test_client_query_destination_table_legacy( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: client_query_destination_table_legacy.client_query_destination_table_legacy( random_table_id diff --git a/samples/tests/test_client_query_dry_run.py b/samples/tests/test_client_query_dry_run.py index 5cbf2e3fa..cffb152ef 100644 --- a/samples/tests/test_client_query_dry_run.py +++ b/samples/tests/test_client_query_dry_run.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_dry_run +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_dry_run(capsys,): +def test_client_query_dry_run(capsys: "pytest.CaptureFixture[str]") -> None: query_job = client_query_dry_run.client_query_dry_run() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_legacy_sql.py b/samples/tests/test_client_query_legacy_sql.py index ab240fad1..b12b5a934 100644 --- a/samples/tests/test_client_query_legacy_sql.py +++ b/samples/tests/test_client_query_legacy_sql.py @@ -13,11 +13,15 @@ # limitations under the License. import re +import typing from .. import client_query_legacy_sql +if typing.TYPE_CHECKING: + import pytest -def test_client_query_legacy_sql(capsys,): + +def test_client_query_legacy_sql(capsys: "pytest.CaptureFixture[str]") -> None: client_query_legacy_sql.client_query_legacy_sql() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_relax_column.py b/samples/tests/test_client_query_relax_column.py index 0c5b7aa6f..f910d61f0 100644 --- a/samples/tests/test_client_query_relax_column.py +++ b/samples/tests/test_client_query_relax_column.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import client_query_relax_column +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_relax_column(capsys, random_table_id, client): +def test_client_query_relax_column( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client, +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/samples/tests/test_client_query_w_array_params.py b/samples/tests/test_client_query_w_array_params.py index 07e0294e9..fcd3f6972 100644 --- a/samples/tests/test_client_query_w_array_params.py +++ b/samples/tests/test_client_query_w_array_params.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_array_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_array_params(capsys,): +def test_client_query_w_array_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_array_params.client_query_w_array_params() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_w_named_params.py b/samples/tests/test_client_query_w_named_params.py index 2970dfdc4..85ef1dc4a 100644 --- a/samples/tests/test_client_query_w_named_params.py +++ b/samples/tests/test_client_query_w_named_params.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_named_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_named_params(capsys,): +def test_client_query_w_named_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_named_params.client_query_w_named_params() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_w_positional_params.py b/samples/tests/test_client_query_w_positional_params.py index e41ffa825..8ade676ab 100644 --- a/samples/tests/test_client_query_w_positional_params.py +++ b/samples/tests/test_client_query_w_positional_params.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_positional_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_positional_params(capsys,): +def test_client_query_w_positional_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_positional_params.client_query_w_positional_params() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_w_struct_params.py b/samples/tests/test_client_query_w_struct_params.py index 03083a3a7..3198dbad5 100644 --- a/samples/tests/test_client_query_w_struct_params.py +++ b/samples/tests/test_client_query_w_struct_params.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_struct_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_struct_params(capsys,): +def test_client_query_w_struct_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_struct_params.client_query_w_struct_params() out, err = capsys.readouterr() diff --git a/samples/tests/test_client_query_w_timestamp_params.py b/samples/tests/test_client_query_w_timestamp_params.py index 9dddcb9a0..a3bbccdd4 100644 --- a/samples/tests/test_client_query_w_timestamp_params.py +++ b/samples/tests/test_client_query_w_timestamp_params.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import client_query_w_timestamp_params +if typing.TYPE_CHECKING: + import pytest + -def test_client_query_w_timestamp_params(capsys,): +def test_client_query_w_timestamp_params(capsys: "pytest.CaptureFixture[str]") -> None: client_query_w_timestamp_params.client_query_w_timestamp_params() out, err = capsys.readouterr() diff --git a/samples/tests/test_copy_table.py b/samples/tests/test_copy_table.py index 0b95c5443..64fbdd778 100644 --- a/samples/tests/test_copy_table.py +++ b/samples/tests/test_copy_table.py @@ -12,10 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import copy_table +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_copy_table(capsys, table_with_data_id, random_table_id, client): +def test_copy_table( + capsys: "pytest.CaptureFixture[str]", + table_with_data_id: str, + random_table_id: str, + client: "bigquery.Client", +) -> None: copy_table.copy_table(table_with_data_id, random_table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_copy_table_cmek.py b/samples/tests/test_copy_table_cmek.py index ac04675c9..061410b99 100644 --- a/samples/tests/test_copy_table_cmek.py +++ b/samples/tests/test_copy_table_cmek.py @@ -12,10 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import copy_table_cmek +if typing.TYPE_CHECKING: + import pytest + -def test_copy_table_cmek(capsys, random_table_id, table_with_data_id, kms_key_name): +def test_copy_table_cmek( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + table_with_data_id: str, + kms_key_name: str, +) -> None: copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name) out, err = capsys.readouterr() diff --git a/samples/tests/test_copy_table_multiple_source.py b/samples/tests/test_copy_table_multiple_source.py index 5bc4668b0..e8b27d2a9 100644 --- a/samples/tests/test_copy_table_multiple_source.py +++ b/samples/tests/test_copy_table_multiple_source.py @@ -13,12 +13,22 @@ # limitations under the License. import io +import typing + from google.cloud import bigquery from .. import copy_table_multiple_source +if typing.TYPE_CHECKING: + import pytest + -def test_copy_table_multiple_source(capsys, random_table_id, random_dataset_id, client): +def test_copy_table_multiple_source( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + random_dataset_id: str, + client: bigquery.Client, +) -> None: dataset = bigquery.Dataset(random_dataset_id) dataset.location = "US" diff --git a/samples/tests/test_create_dataset.py b/samples/tests/test_create_dataset.py index a00003803..e7a897f8f 100644 --- a/samples/tests/test_create_dataset.py +++ b/samples/tests/test_create_dataset.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_create_dataset(capsys, random_dataset_id): +def test_create_dataset( + capsys: "pytest.CaptureFixture[str]", random_dataset_id: str +) -> None: create_dataset.create_dataset(random_dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_create_job.py b/samples/tests/test_create_job.py index eab4b3e48..9e6621e91 100644 --- a/samples/tests/test_create_job.py +++ b/samples/tests/test_create_job.py @@ -12,10 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_job +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_create_job(capsys, client): +def test_create_job( + capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client" +) -> None: query_job = create_job.create_job() client.cancel_job(query_job.job_id, location=query_job.location) out, err = capsys.readouterr() diff --git a/samples/tests/test_create_table.py b/samples/tests/test_create_table.py index 48e52889a..98a0fa936 100644 --- a/samples/tests/test_create_table.py +++ b/samples/tests/test_create_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_table +if typing.TYPE_CHECKING: + import pytest + -def test_create_table(capsys, random_table_id): +def test_create_table( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: create_table.create_table(random_table_id) out, err = capsys.readouterr() assert "Created table {}".format(random_table_id) in out diff --git a/samples/tests/test_create_table_clustered.py b/samples/tests/test_create_table_clustered.py index 8eab5d48b..a3e483441 100644 --- a/samples/tests/test_create_table_clustered.py +++ b/samples/tests/test_create_table_clustered.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_table_clustered +if typing.TYPE_CHECKING: + import pytest + -def test_create_table_clustered(capsys, random_table_id): +def test_create_table_clustered( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: table = create_table_clustered.create_table_clustered(random_table_id) out, _ = capsys.readouterr() assert "Created clustered table {}".format(random_table_id) in out diff --git a/samples/tests/test_create_table_range_partitioned.py b/samples/tests/test_create_table_range_partitioned.py index 9745966bf..1c06b66fe 100644 --- a/samples/tests/test_create_table_range_partitioned.py +++ b/samples/tests/test_create_table_range_partitioned.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import create_table_range_partitioned +if typing.TYPE_CHECKING: + import pytest + -def test_create_table_range_partitioned(capsys, random_table_id): +def test_create_table_range_partitioned( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: table = create_table_range_partitioned.create_table_range_partitioned( random_table_id ) diff --git a/samples/tests/test_dataset_exists.py b/samples/tests/test_dataset_exists.py index 6bc38b4d2..bfef4368f 100644 --- a/samples/tests/test_dataset_exists.py +++ b/samples/tests/test_dataset_exists.py @@ -12,12 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import dataset_exists +if typing.TYPE_CHECKING: + import pytest + -def test_dataset_exists(capsys, random_dataset_id, client): +def test_dataset_exists( + capsys: "pytest.CaptureFixture[str]", + random_dataset_id: str, + client: bigquery.Client, +) -> None: dataset_exists.dataset_exists(random_dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_dataset_label_samples.py b/samples/tests/test_dataset_label_samples.py index 0dbb2a76b..75a024856 100644 --- a/samples/tests/test_dataset_label_samples.py +++ b/samples/tests/test_dataset_label_samples.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_dataset_labels from .. import get_dataset_labels from .. import label_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_dataset_label_samples(capsys, dataset_id): +def test_dataset_label_samples( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: label_dataset.label_dataset(dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_delete_dataset.py b/samples/tests/test_delete_dataset.py index 1f9b3c823..9347bf185 100644 --- a/samples/tests/test_delete_dataset.py +++ b/samples/tests/test_delete_dataset.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_delete_dataset(capsys, dataset_id): +def test_delete_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: delete_dataset.delete_dataset(dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_delete_table.py b/samples/tests/test_delete_table.py index 7065743b0..aca2df62f 100644 --- a/samples/tests/test_delete_table.py +++ b/samples/tests/test_delete_table.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_table +if typing.TYPE_CHECKING: + import pytest + -def test_delete_table(capsys, table_id): +def test_delete_table(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None: delete_table.delete_table(table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_download_public_data.py b/samples/tests/test_download_public_data.py index 2412c147f..02c2c6f9c 100644 --- a/samples/tests/test_download_public_data.py +++ b/samples/tests/test_download_public_data.py @@ -21,7 +21,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data(caplog, capsys): +def test_download_public_data( + caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] +) -> None: # Enable debug-level logging to verify the BigQuery Storage API is used. caplog.set_level(logging.DEBUG) diff --git a/samples/tests/test_download_public_data_sandbox.py b/samples/tests/test_download_public_data_sandbox.py index 08e1aab73..e86f604ad 100644 --- a/samples/tests/test_download_public_data_sandbox.py +++ b/samples/tests/test_download_public_data_sandbox.py @@ -21,7 +21,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data_sandbox(caplog, capsys): +def test_download_public_data_sandbox( + caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] +) -> None: # Enable debug-level logging to verify the BigQuery Storage API is used. caplog.set_level(logging.DEBUG) diff --git a/samples/tests/test_get_dataset.py b/samples/tests/test_get_dataset.py index 3afdb00d3..97b30541b 100644 --- a/samples/tests/test_get_dataset.py +++ b/samples/tests/test_get_dataset.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import get_dataset +if typing.TYPE_CHECKING: + import pytest + -def test_get_dataset(capsys, dataset_id): +def test_get_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None: get_dataset.get_dataset(dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_get_table.py b/samples/tests/test_get_table.py index 8bbd0681b..e6383010f 100644 --- a/samples/tests/test_get_table.py +++ b/samples/tests/test_get_table.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import get_table +if typing.TYPE_CHECKING: + import pytest + -def test_get_table(capsys, random_table_id, client): +def test_get_table( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/samples/tests/test_list_datasets.py b/samples/tests/test_list_datasets.py index 1610d0e4a..f51fe18f1 100644 --- a/samples/tests/test_list_datasets.py +++ b/samples/tests/test_list_datasets.py @@ -12,10 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import list_datasets +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_list_datasets(capsys, dataset_id, client): +def test_list_datasets( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, client: "bigquery.Client" +) -> None: list_datasets.list_datasets() out, err = capsys.readouterr() assert "Datasets in project {}:".format(client.project) in out diff --git a/samples/tests/test_list_datasets_by_label.py b/samples/tests/test_list_datasets_by_label.py index 5b375f4f4..ee6b9a999 100644 --- a/samples/tests/test_list_datasets_by_label.py +++ b/samples/tests/test_list_datasets_by_label.py @@ -12,10 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import list_datasets_by_label +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_list_datasets_by_label(capsys, dataset_id, client): +def test_list_datasets_by_label( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, client: "bigquery.Client" +) -> None: dataset = client.get_dataset(dataset_id) dataset.labels = {"color": "green"} dataset = client.update_dataset(dataset, ["labels"]) diff --git a/samples/tests/test_list_tables.py b/samples/tests/test_list_tables.py index f9426aa53..7c726accc 100644 --- a/samples/tests/test_list_tables.py +++ b/samples/tests/test_list_tables.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import list_tables +if typing.TYPE_CHECKING: + import pytest + -def test_list_tables(capsys, dataset_id, table_id): +def test_list_tables( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, table_id: str +) -> None: list_tables.list_tables(dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py index bafdc2051..bbf3c671f 100644 --- a/samples/tests/test_load_table_clustered.py +++ b/samples/tests/test_load_table_clustered.py @@ -12,10 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_clustered +if typing.TYPE_CHECKING: + import pytest + from google.cloud import bigquery + -def test_load_table_clustered(capsys, random_table_id, client): +def test_load_table_clustered( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: "bigquery.Client", +) -> None: table = load_table_clustered.load_table_clustered(random_table_id) diff --git a/samples/tests/test_load_table_dataframe.py b/samples/tests/test_load_table_dataframe.py index 777967959..152c82f8c 100644 --- a/samples/tests/test_load_table_dataframe.py +++ b/samples/tests/test_load_table_dataframe.py @@ -12,16 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + import pytest from .. import load_table_dataframe +if typing.TYPE_CHECKING: + from google.cloud import bigquery + pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") -def test_load_table_dataframe(capsys, client, random_table_id): +def test_load_table_dataframe( + capsys: pytest.CaptureFixture[str], client: "bigquery.Client", random_table_id: str, +) -> None: table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_file.py b/samples/tests/test_load_table_file.py index a7ebe7682..95b06c7f6 100644 --- a/samples/tests/test_load_table_file.py +++ b/samples/tests/test_load_table_file.py @@ -13,14 +13,19 @@ # limitations under the License. import os +import typing from google.cloud import bigquery from .. import load_table_file +if typing.TYPE_CHECKING: + import pytest -def test_load_table_file(capsys, random_table_id, client): +def test_load_table_file( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: samples_test_dir = os.path.abspath(os.path.dirname(__file__)) file_path = os.path.join( samples_test_dir, "..", "..", "tests", "data", "people.csv" diff --git a/samples/tests/test_load_table_uri_autodetect_csv.py b/samples/tests/test_load_table_uri_autodetect_csv.py index a40719783..c9b410850 100644 --- a/samples/tests/test_load_table_uri_autodetect_csv.py +++ b/samples/tests/test_load_table_uri_autodetect_csv.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_autodetect_csv +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_autodetect_csv(capsys, random_table_id): +def test_load_table_uri_autodetect_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_load_table_uri_autodetect_json.py b/samples/tests/test_load_table_uri_autodetect_json.py index df14d26ed..2c68a13db 100644 --- a/samples/tests/test_load_table_uri_autodetect_json.py +++ b/samples/tests/test_load_table_uri_autodetect_json.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_autodetect_json +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_autodetect_csv(capsys, random_table_id): +def test_load_table_uri_autodetect_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_load_table_uri_avro.py b/samples/tests/test_load_table_uri_avro.py index 0be29d6b3..d0be44aca 100644 --- a/samples/tests/test_load_table_uri_avro.py +++ b/samples/tests/test_load_table_uri_avro.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_avro +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_avro(capsys, random_table_id): +def test_load_table_uri_avro( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_avro.load_table_uri_avro(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_cmek.py b/samples/tests/test_load_table_uri_cmek.py index c15dad9a7..1eb873843 100644 --- a/samples/tests/test_load_table_uri_cmek.py +++ b/samples/tests/test_load_table_uri_cmek.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_cmek +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_cmek(capsys, random_table_id, kms_key_name): +def test_load_table_uri_cmek( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str +) -> None: load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_uri_csv.py b/samples/tests/test_load_table_uri_csv.py index fbcc69358..a57224c84 100644 --- a/samples/tests/test_load_table_uri_csv.py +++ b/samples/tests/test_load_table_uri_csv.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_csv +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_csv(capsys, random_table_id): +def test_load_table_uri_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_csv.load_table_uri_csv(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_uri_json.py b/samples/tests/test_load_table_uri_json.py index e054cb07a..3ad0ce29b 100644 --- a/samples/tests/test_load_table_uri_json.py +++ b/samples/tests/test_load_table_uri_json.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_json +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_json(capsys, random_table_id): +def test_load_table_uri_json( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_json.load_table_uri_json(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_uri_orc.py b/samples/tests/test_load_table_uri_orc.py index 96dc72022..f31e8cabb 100644 --- a/samples/tests/test_load_table_uri_orc.py +++ b/samples/tests/test_load_table_uri_orc.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_orc +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_orc(capsys, random_table_id): +def test_load_table_uri_orc( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_orc.load_table_uri_orc(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_uri_parquet.py b/samples/tests/test_load_table_uri_parquet.py index 81ba3fcef..5404e8584 100644 --- a/samples/tests/test_load_table_uri_parquet.py +++ b/samples/tests/test_load_table_uri_parquet.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_parquet +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_json(capsys, random_table_id): +def test_load_table_uri_json( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_parquet.load_table_uri_parquet(random_table_id) out, _ = capsys.readouterr() diff --git a/samples/tests/test_load_table_uri_truncate_avro.py b/samples/tests/test_load_table_uri_truncate_avro.py index ba680cabd..19b62fe7e 100644 --- a/samples/tests/test_load_table_uri_truncate_avro.py +++ b/samples/tests/test_load_table_uri_truncate_avro.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_avro +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_avro(capsys, random_table_id): +def test_load_table_uri_truncate_avro( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_avro.load_table_uri_truncate_avro(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_truncate_csv.py b/samples/tests/test_load_table_uri_truncate_csv.py index 5c1da7dce..9bc467cd0 100644 --- a/samples/tests/test_load_table_uri_truncate_csv.py +++ b/samples/tests/test_load_table_uri_truncate_csv.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_csv +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_csv(capsys, random_table_id): +def test_load_table_uri_truncate_csv( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_csv.load_table_uri_truncate_csv(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_truncate_json.py b/samples/tests/test_load_table_uri_truncate_json.py index 180ca7f40..cdf96454b 100644 --- a/samples/tests/test_load_table_uri_truncate_json.py +++ b/samples/tests/test_load_table_uri_truncate_json.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_json +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_json(capsys, random_table_id): +def test_load_table_uri_truncate_json( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_json.load_table_uri_truncate_json(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_truncate_orc.py b/samples/tests/test_load_table_uri_truncate_orc.py index 322bf3127..041923da9 100644 --- a/samples/tests/test_load_table_uri_truncate_orc.py +++ b/samples/tests/test_load_table_uri_truncate_orc.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_orc +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_orc(capsys, random_table_id): +def test_load_table_uri_truncate_orc( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_orc.load_table_uri_truncate_orc(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_load_table_uri_truncate_parquet.py b/samples/tests/test_load_table_uri_truncate_parquet.py index ca901defa..2139f316f 100644 --- a/samples/tests/test_load_table_uri_truncate_parquet.py +++ b/samples/tests/test_load_table_uri_truncate_parquet.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import load_table_uri_truncate_parquet +if typing.TYPE_CHECKING: + import pytest + -def test_load_table_uri_truncate_parquet(capsys, random_table_id): +def test_load_table_uri_truncate_parquet( + capsys: "pytest.CaptureFixture[str]", random_table_id: str +) -> None: load_table_uri_truncate_parquet.load_table_uri_truncate_parquet(random_table_id) out, _ = capsys.readouterr() assert "Loaded 50 rows." in out diff --git a/samples/tests/test_model_samples.py b/samples/tests/test_model_samples.py index ebefad846..ed82dd678 100644 --- a/samples/tests/test_model_samples.py +++ b/samples/tests/test_model_samples.py @@ -12,13 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import delete_model from .. import get_model from .. import list_models from .. import update_model +if typing.TYPE_CHECKING: + import pytest + -def test_model_samples(capsys, dataset_id, model_id): +def test_model_samples( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, model_id: str +) -> None: """Since creating a model is a long operation, test all model samples in the same test, following a typical end-to-end flow. """ diff --git a/samples/tests/test_query_external_gcs_temporary_table.py b/samples/tests/test_query_external_gcs_temporary_table.py index 022b327be..9590f3d7a 100644 --- a/samples/tests/test_query_external_gcs_temporary_table.py +++ b/samples/tests/test_query_external_gcs_temporary_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_external_gcs_temporary_table +if typing.TYPE_CHECKING: + import pytest + -def test_query_external_gcs_temporary_table(capsys,): +def test_query_external_gcs_temporary_table( + capsys: "pytest.CaptureFixture[str]", +) -> None: query_external_gcs_temporary_table.query_external_gcs_temporary_table() out, err = capsys.readouterr() diff --git a/samples/tests/test_query_external_sheets_permanent_table.py b/samples/tests/test_query_external_sheets_permanent_table.py index a00930cad..851839054 100644 --- a/samples/tests/test_query_external_sheets_permanent_table.py +++ b/samples/tests/test_query_external_sheets_permanent_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_external_sheets_permanent_table +if typing.TYPE_CHECKING: + import pytest + -def test_query_external_sheets_permanent_table(capsys, dataset_id): +def test_query_external_sheets_permanent_table( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: query_external_sheets_permanent_table.query_external_sheets_permanent_table( dataset_id diff --git a/samples/tests/test_query_external_sheets_temporary_table.py b/samples/tests/test_query_external_sheets_temporary_table.py index 8274787cb..58e0cb394 100644 --- a/samples/tests/test_query_external_sheets_temporary_table.py +++ b/samples/tests/test_query_external_sheets_temporary_table.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_external_sheets_temporary_table +if typing.TYPE_CHECKING: + import pytest + -def test_query_external_sheets_temporary_table(capsys): +def test_query_external_sheets_temporary_table( + capsys: "pytest.CaptureFixture[str]", +) -> None: query_external_sheets_temporary_table.query_external_sheets_temporary_table() out, err = capsys.readouterr() diff --git a/samples/tests/test_query_no_cache.py b/samples/tests/test_query_no_cache.py index df17d0d0b..f3fb039c9 100644 --- a/samples/tests/test_query_no_cache.py +++ b/samples/tests/test_query_no_cache.py @@ -13,11 +13,15 @@ # limitations under the License. import re +import typing from .. import query_no_cache +if typing.TYPE_CHECKING: + import pytest -def test_query_no_cache(capsys,): + +def test_query_no_cache(capsys: "pytest.CaptureFixture[str]") -> None: query_no_cache.query_no_cache() out, err = capsys.readouterr() diff --git a/samples/tests/test_query_pagination.py b/samples/tests/test_query_pagination.py index 7ab049c8c..daf711e49 100644 --- a/samples/tests/test_query_pagination.py +++ b/samples/tests/test_query_pagination.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_pagination +if typing.TYPE_CHECKING: + import pytest + -def test_query_pagination(capsys,): +def test_query_pagination(capsys: "pytest.CaptureFixture[str]") -> None: query_pagination.query_pagination() out, _ = capsys.readouterr() diff --git a/samples/tests/test_query_script.py b/samples/tests/test_query_script.py index 037664d36..98dd1253b 100644 --- a/samples/tests/test_query_script.py +++ b/samples/tests/test_query_script.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import query_script +if typing.TYPE_CHECKING: + import pytest + -def test_query_script(capsys,): +def test_query_script(capsys: "pytest.CaptureFixture[str]") -> None: query_script.query_script() out, _ = capsys.readouterr() diff --git a/samples/tests/test_query_to_arrow.py b/samples/tests/test_query_to_arrow.py index f14ce5561..d9b1aeb73 100644 --- a/samples/tests/test_query_to_arrow.py +++ b/samples/tests/test_query_to_arrow.py @@ -19,7 +19,7 @@ pyarrow = pytest.importorskip("pyarrow") -def test_query_to_arrow(capsys,): +def test_query_to_arrow(capsys: "pytest.CaptureFixture[str]") -> None: arrow_table = query_to_arrow.query_to_arrow() out, err = capsys.readouterr() diff --git a/samples/tests/test_routine_samples.py b/samples/tests/test_routine_samples.py index b457c464a..57bca074a 100644 --- a/samples/tests/test_routine_samples.py +++ b/samples/tests/test_routine_samples.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery +if typing.TYPE_CHECKING: + import pytest + -def test_create_routine(capsys, random_routine_id): +def test_create_routine( + capsys: "pytest.CaptureFixture[str]", random_routine_id: str +) -> None: from .. import create_routine create_routine.create_routine(random_routine_id) @@ -23,7 +30,11 @@ def test_create_routine(capsys, random_routine_id): assert "Created routine {}".format(random_routine_id) in out -def test_create_routine_ddl(capsys, random_routine_id, client): +def test_create_routine_ddl( + capsys: "pytest.CaptureFixture[str]", + random_routine_id: str, + client: bigquery.Client, +) -> None: from .. import create_routine_ddl create_routine_ddl.create_routine_ddl(random_routine_id) @@ -63,7 +74,9 @@ def test_create_routine_ddl(capsys, random_routine_id, client): assert routine.arguments == expected_arguments -def test_list_routines(capsys, dataset_id, routine_id): +def test_list_routines( + capsys: "pytest.CaptureFixture[str]", dataset_id: str, routine_id: str +) -> None: from .. import list_routines list_routines.list_routines(dataset_id) @@ -72,7 +85,7 @@ def test_list_routines(capsys, dataset_id, routine_id): assert routine_id in out -def test_get_routine(capsys, routine_id): +def test_get_routine(capsys: "pytest.CaptureFixture[str]", routine_id: str) -> None: from .. import get_routine get_routine.get_routine(routine_id) @@ -84,7 +97,7 @@ def test_get_routine(capsys, routine_id): assert "type_kind=" in out -def test_delete_routine(capsys, routine_id): +def test_delete_routine(capsys: "pytest.CaptureFixture[str]", routine_id: str) -> None: from .. import delete_routine delete_routine.delete_routine(routine_id) @@ -92,7 +105,7 @@ def test_delete_routine(capsys, routine_id): assert "Deleted routine {}.".format(routine_id) in out -def test_update_routine(routine_id): +def test_update_routine(routine_id: str) -> None: from .. import update_routine routine = update_routine.update_routine(routine_id) diff --git a/samples/tests/test_table_exists.py b/samples/tests/test_table_exists.py index d1f579a64..7317ba747 100644 --- a/samples/tests/test_table_exists.py +++ b/samples/tests/test_table_exists.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import table_exists +if typing.TYPE_CHECKING: + import pytest + -def test_table_exists(capsys, random_table_id, client): +def test_table_exists( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: table_exists.table_exists(random_table_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_table_insert_rows.py b/samples/tests/test_table_insert_rows.py index 72b51df9c..410137631 100644 --- a/samples/tests/test_table_insert_rows.py +++ b/samples/tests/test_table_insert_rows.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import table_insert_rows +if typing.TYPE_CHECKING: + import pytest + -def test_table_insert_rows(capsys, random_table_id, client): +def test_table_insert_rows( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client, +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py index c6199894a..00456ce84 100644 --- a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py +++ b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import table_insert_rows_explicit_none_insert_ids as mut +if typing.TYPE_CHECKING: + import pytest + -def test_table_insert_rows_explicit_none_insert_ids(capsys, random_table_id, client): +def test_table_insert_rows_explicit_none_insert_ids( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client +) -> None: schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), diff --git a/samples/tests/test_undelete_table.py b/samples/tests/test_undelete_table.py index a070abdbd..08841ad72 100644 --- a/samples/tests/test_undelete_table.py +++ b/samples/tests/test_undelete_table.py @@ -12,10 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import undelete_table +if typing.TYPE_CHECKING: + import pytest + -def test_undelete_table(capsys, table_with_schema_id, random_table_id): +def test_undelete_table( + capsys: "pytest.CaptureFixture[str]", + table_with_schema_id: str, + random_table_id: str, +) -> None: undelete_table.undelete_table(table_with_schema_id, random_table_id) out, _ = capsys.readouterr() assert ( diff --git a/samples/tests/test_update_dataset_access.py b/samples/tests/test_update_dataset_access.py index 4c0aa835b..186a3b575 100644 --- a/samples/tests/test_update_dataset_access.py +++ b/samples/tests/test_update_dataset_access.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_access +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_access(capsys, dataset_id): +def test_update_dataset_access( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: update_dataset_access.update_dataset_access(dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_update_dataset_default_partition_expiration.py b/samples/tests/test_update_dataset_default_partition_expiration.py index a5a8e6b52..b7787dde3 100644 --- a/samples/tests/test_update_dataset_default_partition_expiration.py +++ b/samples/tests/test_update_dataset_default_partition_expiration.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_default_partition_expiration +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_default_partition_expiration(capsys, dataset_id): +def test_update_dataset_default_partition_expiration( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds diff --git a/samples/tests/test_update_dataset_default_table_expiration.py b/samples/tests/test_update_dataset_default_table_expiration.py index b0f701322..f780827f2 100644 --- a/samples/tests/test_update_dataset_default_table_expiration.py +++ b/samples/tests/test_update_dataset_default_table_expiration.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_default_table_expiration +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_default_table_expiration(capsys, dataset_id): +def test_update_dataset_default_table_expiration( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds diff --git a/samples/tests/test_update_dataset_description.py b/samples/tests/test_update_dataset_description.py index e4ff586c7..5d1209e22 100644 --- a/samples/tests/test_update_dataset_description.py +++ b/samples/tests/test_update_dataset_description.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from .. import update_dataset_description +if typing.TYPE_CHECKING: + import pytest + -def test_update_dataset_description(capsys, dataset_id): +def test_update_dataset_description( + capsys: "pytest.CaptureFixture[str]", dataset_id: str +) -> None: update_dataset_description.update_dataset_description(dataset_id) out, err = capsys.readouterr() diff --git a/samples/tests/test_update_table_require_partition_filter.py b/samples/tests/test_update_table_require_partition_filter.py index 7e9ca6f2b..580796ed3 100644 --- a/samples/tests/test_update_table_require_partition_filter.py +++ b/samples/tests/test_update_table_require_partition_filter.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing + from google.cloud import bigquery from .. import update_table_require_partition_filter +if typing.TYPE_CHECKING: + import pytest + -def test_update_table_require_partition_filter(capsys, random_table_id, client): +def test_update_table_require_partition_filter( + capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client, +) -> None: # Make a partitioned table. schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")] diff --git a/samples/undelete_table.py b/samples/undelete_table.py index 18b15801f..c230a9230 100644 --- a/samples/undelete_table.py +++ b/samples/undelete_table.py @@ -15,7 +15,7 @@ from google.api_core import datetime_helpers -def undelete_table(table_id, recovered_table_id): +def undelete_table(table_id: str, recovered_table_id: str) -> None: # [START bigquery_undelete_table] import time @@ -39,7 +39,7 @@ def undelete_table(table_id, recovered_table_id): # Due to very short lifecycle of the table, ensure we're not picking a time # prior to the table creation due to time drift between backend and client. table = client.get_table(table_id) - created_epoch = datetime_helpers.to_milliseconds(table.created) + created_epoch: int = datetime_helpers.to_milliseconds(table.created) # type: ignore if created_epoch > snapshot_epoch: snapshot_epoch = created_epoch # [END_EXCLUDE] diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py index a5c2670e7..fda784da5 100644 --- a/samples/update_dataset_access.py +++ b/samples/update_dataset_access.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_access(dataset_id): +def update_dataset_access(dataset_id: str) -> None: # [START bigquery_update_dataset_access] from google.cloud import bigquery diff --git a/samples/update_dataset_default_partition_expiration.py b/samples/update_dataset_default_partition_expiration.py index 18cfb92db..37456f3a0 100644 --- a/samples/update_dataset_default_partition_expiration.py +++ b/samples/update_dataset_default_partition_expiration.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_default_partition_expiration(dataset_id): +def update_dataset_default_partition_expiration(dataset_id: str) -> None: # [START bigquery_update_dataset_partition_expiration] diff --git a/samples/update_dataset_default_table_expiration.py b/samples/update_dataset_default_table_expiration.py index b7e5cea9b..cf6f50d9f 100644 --- a/samples/update_dataset_default_table_expiration.py +++ b/samples/update_dataset_default_table_expiration.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_default_table_expiration(dataset_id): +def update_dataset_default_table_expiration(dataset_id: str) -> None: # [START bigquery_update_dataset_expiration] diff --git a/samples/update_dataset_description.py b/samples/update_dataset_description.py index 0732b1c61..98c5fed43 100644 --- a/samples/update_dataset_description.py +++ b/samples/update_dataset_description.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_dataset_description(dataset_id): +def update_dataset_description(dataset_id: str) -> None: # [START bigquery_update_dataset_description] diff --git a/samples/update_model.py b/samples/update_model.py index db262d8cc..e11b6d5af 100644 --- a/samples/update_model.py +++ b/samples/update_model.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_model(model_id): +def update_model(model_id: str) -> None: """Sample ID: go/samples-tracker/1533""" # [START bigquery_update_model_description] diff --git a/samples/update_routine.py b/samples/update_routine.py index 61c6855b5..1a975a253 100644 --- a/samples/update_routine.py +++ b/samples/update_routine.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import typing -def update_routine(routine_id): +if typing.TYPE_CHECKING: + from google.cloud import bigquery + + +def update_routine(routine_id: str) -> "bigquery.Routine": # [START bigquery_update_routine] diff --git a/samples/update_table_require_partition_filter.py b/samples/update_table_require_partition_filter.py index cf1d53277..8221238a7 100644 --- a/samples/update_table_require_partition_filter.py +++ b/samples/update_table_require_partition_filter.py @@ -13,7 +13,7 @@ # limitations under the License. -def update_table_require_partition_filter(table_id): +def update_table_require_partition_filter(table_id: str) -> None: # [START bigquery_update_table_require_partition_filter] diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 6f06c6feb..73b88964f 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -20,6 +20,7 @@ import json import io import operator +import warnings import google.api_core.retry import pkg_resources @@ -363,7 +364,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): See: https://github.com/googleapis/google-cloud-python/issues/7370 """ # Schema with all scalar types. - scalars_schema = ( + table_schema = ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), @@ -378,15 +379,6 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) num_rows = 100 nulls = [None] * num_rows df_data = [ @@ -467,7 +459,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 - scalars_schema = ( + table_schema = ( + bigquery.SchemaField("row_num", "INTEGER"), bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), @@ -482,17 +475,8 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - df_data = [ + ("row_num", [1, 2, 3]), ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), @@ -559,6 +543,22 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id assert tuple(table.schema) == table_schema assert table.num_rows == 3 + result = bigquery_client.list_rows(table).to_dataframe() + result.sort_values("row_num", inplace=True) + + # Check that extreme DATE/DATETIME values are loaded correctly. + # https://github.com/googleapis/python-bigquery/issues/1076 + assert result["date_col"][0] == datetime.date(1, 1, 1) + assert result["date_col"][2] == datetime.date(9999, 12, 31) + assert result["dt_col"][0] == datetime.datetime(1, 1, 1, 0, 0, 0) + assert result["dt_col"][2] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert result["ts_col"][0] == datetime.datetime( + 1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc + ) + assert result["ts_col"][2] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id): """Test that a DataFrame with struct datatype can be uploaded if a @@ -1180,9 +1180,17 @@ def test_to_geodataframe(bigquery_client, dataset_id): assert df["geog"][2] == wkt.loads("point(0 0)") assert isinstance(df, geopandas.GeoDataFrame) assert isinstance(df["geog"], geopandas.GeoSeries) - assert df.area[0] == 0.5 - assert pandas.isna(df.area[1]) - assert df.area[2] == 0.0 + + with warnings.catch_warnings(): + # Computing the area on a GeoDataFrame that uses a geographic Coordinate + # Reference System (CRS) produces a warning that we are not interested in. + # We do not mind if the computed area is incorrect with respect to the + # GeoDataFrame data, as long as it matches the expected "incorrect" value. + warnings.filterwarnings("ignore", category=UserWarning) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" assert df.crs.name == "WGS 84" assert df.geog.crs.srs == "EPSG:4326" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8ebf5137e..073452002 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8218,6 +8218,22 @@ def test__do_resumable_upload_custom_project(self): assert initiation_url is not None assert "projects/custom-project" in initiation_url + def test__do_resumable_upload_custom_timeout(self): + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len) + ) + client = self._make_client(transport) + + client._do_resumable_upload( + file_obj, self.EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 + ) + + # The timeout should be applied to all underlying calls. + for call_args in transport.request.call_args_list: + assert call_args.kwargs.get("timeout") == 3.14 + def test__do_multipart_upload(self): transport = self._make_transport([self._make_response(http.client.OK)]) client = self._make_client(transport) @@ -8425,7 +8441,7 @@ def test_upload_chunksize(client): upload.finished = False - def transmit_next_chunk(transport): + def transmit_next_chunk(transport, *args, **kwargs): upload.finished = True result = mock.MagicMock() result.json.return_value = {} diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 01c213e98..a95992a26 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -518,7 +518,7 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_clears_display_in_verbose_mode(): +def test_bigquery_magic_clears_display_in_non_verbose_mode(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( @@ -1639,6 +1639,143 @@ def test_bigquery_magic_with_improperly_formatted_params(): ip.run_cell_magic("bigquery", "--params {17}", sql) +@pytest.mark.parametrize( + "raw_sql", ("SELECT answer AS 42", " \t SELECT answer AS 42 \t ") +) +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw_sql): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + ipython_ns_cleanup.append((ip, "custom_query")) + ipython_ns_cleanup.append((ip, "query_results_df")) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + mock_result = pandas.DataFrame([42], columns=["answer"]) + query_job_mock.to_dataframe.return_value = mock_result + + ip.user_ns["custom_query"] = raw_sql + cell_body = "$custom_query" # Referring to an existing variable name (custom_query) + assert "query_results_df" not in ip.user_ns + + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + ip.run_cell_magic("bigquery", "query_results_df", cell_body) + + run_query_mock.assert_called_once_with(mock.ANY, raw_sql, mock.ANY) + + assert "query_results_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["query_results_df"] + assert len(df) == len(mock_result) # verify row count + assert list(df) == list(mock_result) # verify column names + assert list(df["answer"]) == [42] + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_nonexisting_query_variable(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + ip.user_ns.pop("custom_query", None) # Make sure the variable does NOT exist. + cell_body = "$custom_query" # Referring to a non-existing variable name. + + with pytest.raises( + NameError, match=r".*custom_query does not exist.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_empty_query_variable_name(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + cell_body = "$" # Not referring to any variable (name omitted). + + with pytest.raises( + NameError, match=r"(?i).*missing query variable name.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + ipython_ns_cleanup.append((ip, "custom_query")) + + ip.user_ns["custom_query"] = object() + cell_body = "$custom_query" # Referring to a non-string variable. + + with pytest.raises( + TypeError, match=r".*must be a string or a bytes-like.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_query_variable_not_identifier(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + cell_body = "$123foo" # 123foo is not valid Python identifier + + with io.capture_output() as captured_io: + ip.run_cell_magic("bigquery", "", cell_body) + + # If "$" prefixes a string that is not a Python identifier, we do not treat such + # cell_body as a variable reference and just treat is as any other cell body input. + # If at the same time the cell body does not contain any whitespace, it is + # considered a table name, thus we expect an error that the table ID is not valid. + output = captured_io.stderr + assert "ERROR:" in output + assert "must be a fully-qualified ID" in output + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_invalid_multiple_option_values(): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index c9a3d2815..3c74259d4 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1824,6 +1824,18 @@ def test_to_arrow(self): self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) + def test_to_arrow_iterable(self): + row_iterator = self._make_one() + arrow_iter = row_iterator.to_arrow_iterable() + + result = list(arrow_iter) + + self.assertEqual(len(result), 1) + record_batch = result[0] + self.assertIsInstance(record_batch, pyarrow.RecordBatch) + self.assertEqual(record_batch.num_rows, 0) + self.assertEqual(record_batch.num_columns, 0) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_error_if_pandas_is_none(self): row_iterator = self._make_one() @@ -2093,6 +2105,181 @@ def test__validate_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test_to_arrow_iterable(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField( + "child", + "RECORD", + mode="REPEATED", + fields=[ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + ), + ] + rows = [ + { + "f": [ + {"v": "Bharney Rhubble"}, + {"v": "33"}, + { + "v": [ + {"v": {"f": [{"v": "Whamm-Whamm Rhubble"}, {"v": "3"}]}}, + {"v": {"f": [{"v": "Hoppy"}, {"v": "1"}]}}, + ] + }, + ] + }, + { + "f": [ + {"v": "Wylma Phlyntstone"}, + {"v": "29"}, + { + "v": [ + {"v": {"f": [{"v": "Bepples Phlyntstone"}, {"v": "0"}]}}, + {"v": {"f": [{"v": "Dino"}, {"v": "4"}]}}, + ] + }, + ] + }, + ] + path = "/foo" + api_request = mock.Mock( + side_effect=[ + {"rows": [rows[0]], "pageToken": "NEXTPAGE"}, + {"rows": [rows[1]]}, + ] + ) + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, page_size=1, max_results=5 + ) + + record_batches = row_iterator.to_arrow_iterable() + self.assertIsInstance(record_batches, types.GeneratorType) + record_batches = list(record_batches) + self.assertEqual(len(record_batches), 2) + + # Check the schema. + for record_batch in record_batches: + self.assertIsInstance(record_batch, pyarrow.RecordBatch) + self.assertEqual(record_batch.schema[0].name, "name") + self.assertTrue(pyarrow.types.is_string(record_batch.schema[0].type)) + self.assertEqual(record_batch.schema[1].name, "age") + self.assertTrue(pyarrow.types.is_int64(record_batch.schema[1].type)) + child_field = record_batch.schema[2] + self.assertEqual(child_field.name, "child") + self.assertTrue(pyarrow.types.is_list(child_field.type)) + self.assertTrue(pyarrow.types.is_struct(child_field.type.value_type)) + self.assertEqual(child_field.type.value_type[0].name, "name") + self.assertEqual(child_field.type.value_type[1].name, "age") + + # Check the data. + record_batch_1 = record_batches[0].to_pydict() + names = record_batch_1["name"] + ages = record_batch_1["age"] + children = record_batch_1["child"] + self.assertEqual(names, ["Bharney Rhubble"]) + self.assertEqual(ages, [33]) + self.assertEqual( + children, + [ + [ + {"name": "Whamm-Whamm Rhubble", "age": 3}, + {"name": "Hoppy", "age": 1}, + ], + ], + ) + + record_batch_2 = record_batches[1].to_pydict() + names = record_batch_2["name"] + ages = record_batch_2["age"] + children = record_batch_2["child"] + self.assertEqual(names, ["Wylma Phlyntstone"]) + self.assertEqual(ages, [29]) + self.assertEqual( + children, + [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], + ) + + def test_to_arrow_iterable_w_bqstorage(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud.bigquery_storage_v1 import reader + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( + big_query_read_grpc_transport.BigQueryReadGrpcTransport + ) + streams = [ + # Use two streams we want to check frames are read from each stream. + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, + {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, + ] + session = bigquery_storage.types.ReadSession(streams=streams) + arrow_schema = pyarrow.schema( + [ + pyarrow.field("colA", pyarrow.int64()), + # Not alphabetical to test column order. + pyarrow.field("colC", pyarrow.float64()), + pyarrow.field("colB", pyarrow.string()), + ] + ) + session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() + bqstorage_client.create_read_session.return_value = session + + mock_rowstream = mock.create_autospec(reader.ReadRowsStream) + bqstorage_client.read_rows.return_value = mock_rowstream + + mock_rows = mock.create_autospec(reader.ReadRowsIterable) + mock_rowstream.rows.return_value = mock_rows + page_items = [ + pyarrow.array([1, -1]), + pyarrow.array([2.0, 4.0]), + pyarrow.array(["abc", "def"]), + ] + + expected_record_batch = pyarrow.RecordBatch.from_arrays( + page_items, schema=arrow_schema + ) + expected_num_record_batches = 3 + + mock_page = mock.create_autospec(reader.ReadRowsPage) + mock_page.to_arrow.return_value = expected_record_batch + mock_pages = (mock_page,) * expected_num_record_batches + type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) + + schema = [ + schema.SchemaField("colA", "INTEGER"), + schema.SchemaField("colC", "FLOAT"), + schema.SchemaField("colB", "STRING"), + ] + + row_iterator = mut.RowIterator( + _mock_client(), + None, # api_request: ignored + None, # path: ignored + schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=schema, + ) + + record_batches = list( + row_iterator.to_arrow_iterable(bqstorage_client=bqstorage_client) + ) + total_record_batches = len(streams) * len(mock_pages) + self.assertEqual(len(record_batches), total_record_batches) + + for record_batch in record_batches: + # Are the record batches return as expected? + self.assertEqual(record_batch, expected_record_batch) + + # Don't close the client if it was passed in. + bqstorage_client._transport.grpc_channel.close.assert_not_called() + def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField From 950b24e842d9e1fd82c162c9ed6f9cfeab897e3f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 15 Dec 2021 12:33:18 -0600 Subject: [PATCH 28/35] chore: add type annotations for mypy --- google/cloud/bigquery/dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 8400d813a..44968b822 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import copy +from typing import Dict, Any import google.cloud._helpers # type: ignore @@ -144,7 +145,7 @@ class AccessEntry(object): """ def __init__(self, role=None, entity_type=None, entity_id=None) -> None: - self._properties = {} + self._properties: Dict[str, Any] = {} if entity_type in ("view", "routine", "dataset"): if role is not None: raise ValueError( From e888c71461c2e072b3d7ea81939754987584aab8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 15 Dec 2021 12:45:59 -0600 Subject: [PATCH 29/35] chore: revert test for when pyarrow is not installed --- tests/unit/test_table.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f76bbba91..3c74259d4 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2205,25 +2205,6 @@ def test_to_arrow_iterable(self): [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], ) - @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - def test_to_arrow_iterable_error_if_pyarrow_is_none(self): - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - rows = [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - ] - path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - - with pytest.raises(ValueError, match="pyarrow"): - row_iterator.to_arrow_iterable() - def test_to_arrow_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2299,7 +2280,6 @@ def test_to_arrow_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField From dd40c2415c9ca09c77ce61cbafccd7b0fdf6ccfd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 19 Jan 2022 14:15:12 -0600 Subject: [PATCH 30/35] test: fix pandas tests with new bqstorage client (#1113) * wip: attempt to fix pandas tests with new bqstorage client * mock a bit more * update other test too * make flakey test more robust --- tests/system/test_client.py | 2 +- tests/unit/job/test_query_pandas.py | 50 ++++++++++++++++------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index d52cb9eb9..a00193788 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1552,7 +1552,7 @@ def test_dbapi_connection_does_not_leak_sockets(self): connection.close() conn_count_end = len(current_process.connections()) - self.assertEqual(conn_count_end, conn_count_start) + self.assertLessEqual(conn_count_end, conn_count_start) def _load_table_for_dml(self, rows, dataset_id, table_id): from google.cloud._testing import _NamedTemporaryFile diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 044ca6e9a..e35051c5c 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -41,7 +41,6 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from ..helpers import make_connection from .helpers import _make_client from .helpers import _make_job_resource @@ -142,18 +141,22 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): session = bigquery_storage.types.ReadSession() session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] - bqstorage_client.create_read_session.return_value = session - bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - page = bigquery_storage.types.ReadRowsResponse() - if BQ_STORAGE_VERSIONS.is_read_session_optional: - page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() - page.arrow_record_batch.serialized_record_batch = ( - record_batch.serialize().to_pybytes() + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + row_iterable = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsIterable, instance=True + ) + page = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsPage, instance=True ) - bqstorage_base_client.read_rows.return_value = [page] - reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream( - [page], bqstorage_base_client, stream_id, 0, {} + page.to_arrow.return_value = record_batch + type(row_iterable).pages = mock.PropertyMock(return_value=[page]) + reader.rows.return_value = row_iterable + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True ) + bqstorage_client.create_read_session.return_value = session bqstorage_client.read_rows.return_value = reader dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) @@ -536,22 +539,25 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): connection = make_connection(query_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) session = bigquery_storage.types.ReadSession() session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() session.streams = [bigquery_storage.types.ReadStream(name=stream_id)] - bqstorage_client.create_read_session.return_value = session - bqstorage_base_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - page = bigquery_storage.types.ReadRowsResponse() - if BQ_STORAGE_VERSIONS.is_read_session_optional: - page.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes() - page.arrow_record_batch.serialized_record_batch = ( - record_batch.serialize().to_pybytes() + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + row_iterable = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsIterable, instance=True ) - bqstorage_base_client.read_rows.return_value = [page] - reader = google.cloud.bigquery_storage_v1.reader.ReadRowsStream( - [page], bqstorage_base_client, stream_id, 0, {} + page = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsPage, instance=True ) + page.to_arrow.return_value = record_batch + type(row_iterable).pages = mock.PropertyMock(return_value=[page]) + reader.rows.return_value = row_iterable + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_client.create_read_session.return_value = session bqstorage_client.read_rows.return_value = reader dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) From 5f50242c39391fb112922ca6ab2b59c6bcb73466 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 28 Jan 2022 09:46:06 -0600 Subject: [PATCH 31/35] feat: use `StandardSqlField` class for `Model.feature_columns` and `Model.label_columns` (#1117) --- google/cloud/bigquery/model.py | 15 +++++++++---- tests/unit/model/test_model.py | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 52fe6276e..4d2bc346c 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -23,6 +23,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers +from google.cloud.bigquery import standard_sql from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -171,26 +172,32 @@ def training_runs(self) -> Sequence[Dict[str, Any]]: ) @property - def feature_columns(self) -> Sequence[Dict[str, Any]]: + def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]: """Input feature columns that were used to train this model. Read-only. """ - return typing.cast( + resource: Sequence[Dict[str, Any]] = typing.cast( Sequence[Dict[str, Any]], self._properties.get("featureColumns", []) ) + return [ + standard_sql.StandardSqlField.from_api_repr(column) for column in resource + ] @property - def label_columns(self) -> Sequence[Dict[str, Any]]: + def label_columns(self) -> Sequence[standard_sql.StandardSqlField]: """Label columns that were used to train this model. The output of the model will have a ``predicted_`` prefix to these columns. Read-only. """ - return typing.cast( + resource: Sequence[Dict[str, Any]] = typing.cast( Sequence[Dict[str, Any]], self._properties.get("labelColumns", []) ) + return [ + standard_sql.StandardSqlField.from_api_repr(column) for column in resource + ] @property def best_trial_id(self) -> Optional[int]: diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 3cc1dd4c4..1ae988414 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -273,6 +273,46 @@ def test_build_resource(object_under_test, resource, filter_fields, expected): assert got == expected +def test_feature_columns(object_under_test): + from google.cloud.bigquery import standard_sql + + object_under_test._properties["featureColumns"] = [ + {"name": "col_1", "type": {"typeKind": "STRING"}}, + {"name": "col_2", "type": {"typeKind": "FLOAT64"}}, + ] + expected = [ + standard_sql.StandardSqlField( + "col_1", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.STRING), + ), + standard_sql.StandardSqlField( + "col_2", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.FLOAT64), + ), + ] + assert object_under_test.feature_columns == expected + + +def test_label_columns(object_under_test): + from google.cloud.bigquery import standard_sql + + object_under_test._properties["labelColumns"] = [ + {"name": "col_1", "type": {"typeKind": "STRING"}}, + {"name": "col_2", "type": {"typeKind": "FLOAT64"}}, + ] + expected = [ + standard_sql.StandardSqlField( + "col_1", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.STRING), + ), + standard_sql.StandardSqlField( + "col_2", + standard_sql.StandardSqlDataType(standard_sql.StandardSqlTypeNames.FLOAT64), + ), + ] + assert object_under_test.label_columns == expected + + def test_set_description(object_under_test): assert not object_under_test.description object_under_test.description = "A model description." From dedb2eae74cc8c8ef8f84586af636b2c01f0f6f1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 25 Mar 2022 17:31:55 -0500 Subject: [PATCH 32/35] docs: add type annotations to job samples --- samples/snippets/manage_job_cancel.py | 2 +- samples/snippets/manage_job_get.py | 2 +- samples/snippets/manage_job_test.py | 2 +- samples/snippets/user_credentials.py | 9 +-------- 4 files changed, 4 insertions(+), 11 deletions(-) diff --git a/samples/snippets/manage_job_cancel.py b/samples/snippets/manage_job_cancel.py index 3e0fc5218..b0408b837 100644 --- a/samples/snippets/manage_job_cancel.py +++ b/samples/snippets/manage_job_cancel.py @@ -18,7 +18,7 @@ def cancel_job( client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", -): +) -> None: job = client.cancel_job(job_id, location=location) print(f"{job.location}:{job.job_id} cancelled") diff --git a/samples/snippets/manage_job_get.py b/samples/snippets/manage_job_get.py index 256d79e5b..f637edfe1 100644 --- a/samples/snippets/manage_job_get.py +++ b/samples/snippets/manage_job_get.py @@ -18,7 +18,7 @@ def get_job( client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", -): +) -> None: job = client.get_job(job_id, location=location) # All job classes have "location" and "job_id" string properties. diff --git a/samples/snippets/manage_job_test.py b/samples/snippets/manage_job_test.py index 745b7bbbe..630be365b 100644 --- a/samples/snippets/manage_job_test.py +++ b/samples/snippets/manage_job_test.py @@ -19,7 +19,7 @@ import manage_job_get -def test_manage_job(capsys: pytest.CaptureFixture): +def test_manage_job(capsys: pytest.CaptureFixture[str]) -> None: client = bigquery.Client() sql = """ SELECT corpus diff --git a/samples/snippets/user_credentials.py b/samples/snippets/user_credentials.py index a7f13a463..487a56c5f 100644 --- a/samples/snippets/user_credentials.py +++ b/samples/snippets/user_credentials.py @@ -73,13 +73,6 @@ def main(project: str) -> None: parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) - parser.add_argument( - "--launch-browser", - help="Use a local server flow to authenticate. ", - action="store_true", - ) parser.add_argument("project", help="Project to use for BigQuery billing.") - args = parser.parse_args() - - main(args.project, launch_browser=args.launch_browser) + main(args.project) From 0279fa9ff21560f5e561af553f8838213a7ee744 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 29 Mar 2022 10:22:11 -0500 Subject: [PATCH 33/35] chore: blacken with black 22.3.0 --- .github/.OwlBot.lock.yaml | 2 +- .pre-commit-config.yaml | 2 +- docs/conf.py | 5 +- google/cloud/bigquery/_pandas_helpers.py | 5 +- google/cloud/bigquery/_tqdm_helpers.py | 4 +- google/cloud/bigquery/client.py | 20 +- google/cloud/bigquery/dataset.py | 2 +- google/cloud/bigquery/dbapi/_helpers.py | 6 +- google/cloud/bigquery/external_config.py | 9 +- google/cloud/bigquery/job/query.py | 3 +- google/cloud/bigquery/magics/magics.py | 7 +- .../cloud/bigquery/opentelemetry_tracing.py | 44 +- google/cloud/bigquery/query.py | 3 +- google/cloud/bigquery/schema.py | 15 +- google/cloud/bigquery/table.py | 11 +- .../bigquery_v2/types/encryption_config.py | 9 +- google/cloud/bigquery_v2/types/model.py | 783 ++++++++++++++---- .../bigquery_v2/types/model_reference.py | 20 +- .../cloud/bigquery_v2/types/standard_sql.py | 39 +- .../bigquery_v2/types/table_reference.py | 35 +- noxfile.py | 7 +- samples/geography/noxfile.py | 4 +- samples/load_table_dataframe.py | 10 +- samples/load_table_file.py | 4 +- samples/load_table_uri_parquet.py | 4 +- samples/magics/noxfile.py | 4 +- .../snippets/authenticate_service_account.py | 8 +- samples/snippets/conftest.py | 3 +- ...te_table_external_hive_partitioned_test.py | 6 +- samples/snippets/delete_job_test.py | 3 +- samples/snippets/manage_job_cancel.py | 4 +- samples/snippets/manage_job_get.py | 4 +- samples/snippets/noxfile.py | 4 +- samples/snippets/update_dataset_access.py | 6 +- samples/table_insert_rows.py | 4 +- ...le_insert_rows_explicit_none_insert_ids.py | 4 +- .../tests/test_client_query_relax_column.py | 4 +- samples/tests/test_load_table_dataframe.py | 14 +- samples/tests/test_table_insert_rows.py | 4 +- ...t_update_table_require_partition_filter.py | 4 +- tests/system/helpers.py | 13 +- tests/system/test_arrow.py | 4 +- tests/system/test_client.py | 9 +- tests/system/test_pandas.py | 25 +- tests/system/test_query.py | 14 +- tests/system/test_structs.py | 3 +- tests/unit/job/test_base.py | 5 +- tests/unit/job/test_query_pandas.py | 30 +- tests/unit/test__helpers.py | 6 +- tests/unit/test__job_helpers.py | 12 +- tests/unit/test__pandas_helpers.py | 6 +- tests/unit/test_client.py | 173 +++- tests/unit/test_dbapi__helpers.py | 3 +- tests/unit/test_dbapi_connection.py | 10 +- tests/unit/test_dbapi_cursor.py | 21 +- tests/unit/test_dbapi_types.py | 4 +- tests/unit/test_list_jobs.py | 2 +- tests/unit/test_list_models.py | 8 +- tests/unit/test_magics.py | 15 +- tests/unit/test_query.py | 3 +- tests/unit/test_schema.py | 68 +- tests/unit/test_standard_sql_types.py | 12 +- tests/unit/test_table.py | 50 +- tests/unit/test_table_pandas.py | 6 +- 64 files changed, 1215 insertions(+), 416 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7e08e05a3..87dd00611 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,4 +13,4 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5d8da01438ece4021d135433f2cf3227aa39ef0eaccc941d62aa35e6902832ae + digest: sha256:7cffbc10910c3ab1b852c05114a08d374c195a81cdec1d4a67a1d129331d0bfe diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62eb5a77d..46d237160 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 19.10b0 + rev: 22.3.0 hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 diff --git a/docs/conf.py b/docs/conf.py index 512158e19..5c83fd79e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -362,7 +362,10 @@ intersphinx_mapping = { "python": ("https://python.readthedocs.org/en/latest/", None), "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), - "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), + "google.api_core": ( + "https://googleapis.dev/python/google-api-core/latest/", + None, + ), "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7917b989b..17de6830a 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -659,7 +659,10 @@ def dataframe_to_parquet( bq_schema = schema._to_schema_fields(bq_schema) arrow_table = dataframe_to_arrow(dataframe, bq_schema) pyarrow.parquet.write_table( - arrow_table, filepath, compression=parquet_compression, **kwargs, + arrow_table, + filepath, + compression=parquet_compression, + **kwargs, ) diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index 632f70f87..f2355ab3b 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -95,7 +95,9 @@ def wait_for_query( progress_bar.total = len(query_job.query_plan) progress_bar.set_description( "Query executing stage {} and status {} : {:0.2f}s".format( - current_stage.name, current_stage.status, time.time() - start_time, + current_stage.name, + current_stage.status, + time.time() - start_time, ), ) try: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 81a8463f1..b388f1d4c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1868,9 +1868,7 @@ def _get_query_results( def job_from_resource( self, resource: dict - ) -> Union[ - job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob, - ]: + ) -> Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]: """Detect correct job type from resource and instantiate. Args: @@ -1952,8 +1950,8 @@ def create_job( timeout=timeout, ) elif "extract" in job_config: - extract_job_config = google.cloud.bigquery.job.ExtractJobConfig.from_api_repr( - job_config + extract_job_config = ( + google.cloud.bigquery.job.ExtractJobConfig.from_api_repr(job_config) ) source = _get_sub_prop(job_config, ["extract", "sourceTable"]) if source: @@ -2124,7 +2122,8 @@ def cancel_job( job_instance = self.job_from_resource(resource["job"]) # never an UnknownJob return typing.cast( - Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], job_instance, + Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], + job_instance, ) def list_jobs( @@ -3319,7 +3318,14 @@ def query( # _default_query_job_config) up to this point. if api_method == enums.QueryApiMethod.QUERY: return _job_helpers.query_jobs_query( - self, query, job_config, location, project, retry, timeout, job_retry, + self, + query, + job_config, + location, + project, + retry, + timeout, + job_retry, ) elif api_method == enums.QueryApiMethod.INSERT: return _job_helpers.query_jobs_insert( diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 44968b822..0fafd5783 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -192,7 +192,7 @@ def __repr__(self): ) def _key(self): - """ A tuple key that uniquely describes this field. + """A tuple key that uniquely describes this field. Used to compute this instance's hashcode and evaluate equality. Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index da4e01d43..117fa8ae7 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -486,8 +486,7 @@ def raise_on_closed( """Make public instance methods raise an error if the instance is closed.""" def _raise_on_closed(method): - """Make a non-static method raise an error if its containing instance is closed. - """ + """Make a non-static method raise an error if its containing instance is closed.""" def with_closed_check(self, *args, **kwargs): if getattr(self, closed_attr_name): @@ -498,8 +497,7 @@ def with_closed_check(self, *args, **kwargs): return with_closed_check def decorate_public_methods(klass): - """Apply ``_raise_on_closed()`` decorator to public instance methods. - """ + """Apply ``_raise_on_closed()`` decorator to public instance methods.""" for name in dir(klass): if name.startswith("_") and name != "__iter__": continue diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index cabf2436b..640b2d16b 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -289,8 +289,7 @@ def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily": class BigtableOptions(object): - """Options that describe how to treat Bigtable tables as BigQuery tables. - """ + """Options that describe how to treat Bigtable tables as BigQuery tables.""" _SOURCE_FORMAT = "BIGTABLE" _RESOURCE_NAME = "bigtableOptions" @@ -557,7 +556,11 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": ) OptionsType = Union[ - AvroOptions, BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions, + AvroOptions, + BigtableOptions, + CSVOptions, + GoogleSheetsOptions, + ParquetOptions, ] diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 00013873b..c2d304e30 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -324,7 +324,8 @@ def connection_properties(self) -> List[ConnectionProperty]: @connection_properties.setter def connection_properties(self, value: Iterable[ConnectionProperty]): self._set_sub_prop( - "connectionProperties", [prop.to_api_repr() for prop in value], + "connectionProperties", + [prop.to_api_repr() for prop in value], ) @property diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index dd2904ce4..14819aa59 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -586,7 +586,9 @@ def _cell_magic(line, query): bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint bqstorage_client = _make_bqstorage_client( - client, use_bqstorage_api, bqstorage_client_options, + client, + use_bqstorage_api, + bqstorage_client_options, ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -637,7 +639,8 @@ def _cell_magic(line, query): return result = rows.to_dataframe( - bqstorage_client=bqstorage_client, create_bqstorage_client=False, + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index 748f2136d..adecea121 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -37,28 +37,28 @@ @contextmanager def create_span(name, attributes=None, client=None, job_ref=None): """Creates a ContextManager for a Span to be exported to the configured exporter. - If no configuration exists yields None. - - Args: - name (str): Name that will be set for the span being created - attributes (Optional[dict]): - Additional attributes that pertain to - the specific API call (i.e. not a default attribute) - client (Optional[google.cloud.bigquery.client.Client]): - Pass in a Client object to extract any attributes that may be - relevant to it and add them to the created spans. - job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) - Pass in a _AsyncJob object to extract any attributes that may be - relevant to it and add them to the created spans. - - Yields: - opentelemetry.trace.Span: Yields the newly created Span. - - Raises: - google.api_core.exceptions.GoogleAPICallError: - Raised if a span could not be yielded or issue with call to - OpenTelemetry. - """ + If no configuration exists yields None. + + Args: + name (str): Name that will be set for the span being created + attributes (Optional[dict]): + Additional attributes that pertain to + the specific API call (i.e. not a default attribute) + client (Optional[google.cloud.bigquery.client.Client]): + Pass in a Client object to extract any attributes that may be + relevant to it and add them to the created spans. + job_ref (Optional[google.cloud.bigquery.job._AsyncJob]) + Pass in a _AsyncJob object to extract any attributes that may be + relevant to it and add them to the created spans. + + Yields: + opentelemetry.trace.Span: Yields the newly created Span. + + Raises: + google.api_core.exceptions.GoogleAPICallError: + Raised if a span could not be yielded or issue with call to + OpenTelemetry. + """ global _warned_telemetry final_attributes = _get_final_span_attributes(attributes, client, job_ref) if not HAS_OPENTELEMETRY: diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index ad7c60f7d..0469cb271 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -363,8 +363,7 @@ def __repr__(self): class _AbstractQueryParameter(object): - """Base class for named / positional query parameters. - """ + """Base class for named / positional query parameters.""" @classmethod def from_api_repr(cls, resource: dict) -> "_AbstractQueryParameter": diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 4ccd6e70b..5580a2ae9 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -294,12 +294,14 @@ def to_standard_sql(self) -> standard_sql.StandardSqlField: sql_type.type_kind = StandardSqlTypeNames.ARRAY else: sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + self.field_type, + StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721 array_element_type = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, + self.field_type, + StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) sql_type.array_element_type = standard_sql.StandardSqlDataType( type_kind=array_element_type @@ -308,8 +310,10 @@ def to_standard_sql(self) -> standard_sql.StandardSqlField: # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721 - sql_type.array_element_type.struct_type = standard_sql.StandardSqlStructType( - fields=(field.to_standard_sql() for field in self.fields) + sql_type.array_element_type.struct_type = ( + standard_sql.StandardSqlStructType( + fields=(field.to_standard_sql() for field in self.fields) + ) ) elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721 sql_type.struct_type = standard_sql.StandardSqlStructType( @@ -411,8 +415,7 @@ def __init__(self, names: Iterable[str] = ()): @property def names(self): - """Tuple[str]: Policy tags associated with this definition. - """ + """Tuple[str]: Policy tags associated with this definition.""" return self._properties.get("names", ()) def _key(self): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 57a774c3e..ed4f214ce 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -206,7 +206,9 @@ def __init__(self, dataset_ref: "DatasetReference", table_id: str): dataset_ref.dataset_id, ) _helpers._set_sub_prop( - self._properties, self._PROPERTY_TO_API_FIELD["table_id"], table_id, + self._properties, + self._PROPERTY_TO_API_FIELD["table_id"], + table_id, ) @classmethod @@ -880,7 +882,9 @@ def mview_refresh_interval(self, value): api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] _helpers._set_sub_prop( - self._properties, [api_field, "refreshIntervalMs"], refresh_interval_ms, + self._properties, + [api_field, "refreshIntervalMs"], + refresh_interval_ms, ) @property @@ -1581,7 +1585,8 @@ def total_rows(self): return self._total_rows def _maybe_warn_max_results( - self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], + self, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], ): """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py index d300a417c..9f57acb7c 100644 --- a/google/cloud/bigquery_v2/types/encryption_config.py +++ b/google/cloud/bigquery_v2/types/encryption_config.py @@ -19,7 +19,10 @@ __protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"EncryptionConfiguration",}, + package="google.cloud.bigquery.v2", + manifest={ + "EncryptionConfiguration", + }, ) @@ -36,7 +39,9 @@ class EncryptionConfiguration(proto.Message): """ kms_key_name = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.StringValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.StringValue, ) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index f2c031715..7786d8ea4 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -252,8 +252,7 @@ class FeedbackType(proto.Enum): EXPLICIT = 2 class SeasonalPeriod(proto.Message): - r""" - """ + r""" """ class SeasonalPeriodType(proto.Enum): r"""""" @@ -266,8 +265,7 @@ class SeasonalPeriodType(proto.Enum): YEARLY = 6 class KmeansEnums(proto.Message): - r""" - """ + r""" """ class KmeansInitializationMethod(proto.Enum): r"""Indicates the method used to initialize the centroids for @@ -296,19 +294,29 @@ class RegressionMetrics(proto.Message): """ mean_absolute_error = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) mean_squared_log_error = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=3, + message=wrappers_pb2.DoubleValue, ) median_absolute_error = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) r_squared = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) class AggregateClassificationMetrics(proto.Message): @@ -352,23 +360,39 @@ class AggregateClassificationMetrics(proto.Message): """ precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field( + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) - recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,) accuracy = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=3, + message=wrappers_pb2.DoubleValue, ) threshold = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) f1_score = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) log_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, ) roc_auc = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.DoubleValue, ) class BinaryClassificationMetrics(proto.Message): @@ -417,43 +441,69 @@ class BinaryConfusionMatrix(proto.Message): """ positive_class_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) true_positives = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=2, + message=wrappers_pb2.Int64Value, ) false_positives = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.Int64Value, ) true_negatives = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=4, + message=wrappers_pb2.Int64Value, ) false_negatives = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=5, + message=wrappers_pb2.Int64Value, ) precision = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, ) recall = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.DoubleValue, ) f1_score = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.DoubleValue, ) accuracy = proto.Field( - proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=9, + message=wrappers_pb2.DoubleValue, ) aggregate_classification_metrics = proto.Field( - proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + proto.MESSAGE, + number=1, + message="Model.AggregateClassificationMetrics", ) binary_confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", ) - positive_label = proto.Field(proto.STRING, number=3,) - negative_label = proto.Field(proto.STRING, number=4,) + positive_label = proto.Field( + proto.STRING, + number=3, + ) + negative_label = proto.Field( + proto.STRING, + number=4, + ) class MultiClassClassificationMetrics(proto.Message): r"""Evaluation metrics for multi-class classification/classifier @@ -490,9 +540,14 @@ class Entry(proto.Message): label. """ - predicted_label = proto.Field(proto.STRING, number=1,) + predicted_label = proto.Field( + proto.STRING, + number=1, + ) item_count = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=2, + message=wrappers_pb2.Int64Value, ) class Row(proto.Message): @@ -505,7 +560,10 @@ class Row(proto.Message): Info describing predicted label distribution. """ - actual_label = proto.Field(proto.STRING, number=1,) + actual_label = proto.Field( + proto.STRING, + number=1, + ) entries = proto.RepeatedField( proto.MESSAGE, number=2, @@ -513,7 +571,9 @@ class Row(proto.Message): ) confidence_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) rows = proto.RepeatedField( proto.MESSAGE, @@ -522,7 +582,9 @@ class Row(proto.Message): ) aggregate_classification_metrics = proto.Field( - proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + proto.MESSAGE, + number=1, + message="Model.AggregateClassificationMetrics", ) confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, @@ -604,9 +666,14 @@ class CategoryCount(proto.Message): category within the cluster. """ - category = proto.Field(proto.STRING, number=1,) + category = proto.Field( + proto.STRING, + number=1, + ) count = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=2, + message=wrappers_pb2.Int64Value, ) category_counts = proto.RepeatedField( @@ -615,7 +682,10 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", ) - feature_column = proto.Field(proto.STRING, number=1,) + feature_column = proto.Field( + proto.STRING, + number=1, + ) numerical_value = proto.Field( proto.MESSAGE, number=2, @@ -629,24 +699,35 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", ) - centroid_id = proto.Field(proto.INT64, number=1,) + centroid_id = proto.Field( + proto.INT64, + number=1, + ) feature_values = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ClusteringMetrics.Cluster.FeatureValue", ) count = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.Int64Value, ) davies_bouldin_index = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) mean_squared_distance = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) clusters = proto.RepeatedField( - proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", + proto.MESSAGE, + number=3, + message="Model.ClusteringMetrics.Cluster", ) class RankingMetrics(proto.Message): @@ -677,16 +758,24 @@ class RankingMetrics(proto.Message): """ mean_average_precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=1, + message=wrappers_pb2.DoubleValue, ) mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) normalized_discounted_cumulative_gain = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=3, + message=wrappers_pb2.DoubleValue, ) average_rank = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) class ArimaForecastingMetrics(proto.Message): @@ -751,38 +840,71 @@ class ArimaSingleModelForecastingMetrics(proto.Message): """ non_seasonal_order = proto.Field( - proto.MESSAGE, number=1, message="Model.ArimaOrder", + proto.MESSAGE, + number=1, + message="Model.ArimaOrder", ) arima_fitting_metrics = proto.Field( - proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + proto.MESSAGE, + number=2, + message="Model.ArimaFittingMetrics", + ) + has_drift = proto.Field( + proto.BOOL, + number=3, + ) + time_series_id = proto.Field( + proto.STRING, + number=4, + ) + time_series_ids = proto.RepeatedField( + proto.STRING, + number=9, ) - has_drift = proto.Field(proto.BOOL, number=3,) - time_series_id = proto.Field(proto.STRING, number=4,) - time_series_ids = proto.RepeatedField(proto.STRING, number=9,) seasonal_periods = proto.RepeatedField( - proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", + proto.ENUM, + number=5, + enum="Model.SeasonalPeriod.SeasonalPeriodType", ) has_holiday_effect = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.BoolValue, ) has_spikes_and_dips = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.BoolValue, ) has_step_changes = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.BoolValue, ) non_seasonal_order = proto.RepeatedField( - proto.MESSAGE, number=1, message="Model.ArimaOrder", + proto.MESSAGE, + number=1, + message="Model.ArimaOrder", ) arima_fitting_metrics = proto.RepeatedField( - proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + proto.MESSAGE, + number=2, + message="Model.ArimaFittingMetrics", ) seasonal_periods = proto.RepeatedField( - proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", + proto.ENUM, + number=3, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + has_drift = proto.RepeatedField( + proto.BOOL, + number=4, + ) + time_series_id = proto.RepeatedField( + proto.STRING, + number=5, ) - has_drift = proto.RepeatedField(proto.BOOL, number=4,) - time_series_id = proto.RepeatedField(proto.STRING, number=5,) arima_single_model_forecasting_metrics = proto.RepeatedField( proto.MESSAGE, number=6, @@ -834,7 +956,10 @@ class EvaluationMetrics(proto.Message): """ regression_metrics = proto.Field( - proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", + proto.MESSAGE, + number=1, + oneof="metrics", + message="Model.RegressionMetrics", ) binary_classification_metrics = proto.Field( proto.MESSAGE, @@ -849,10 +974,16 @@ class EvaluationMetrics(proto.Message): message="Model.MultiClassClassificationMetrics", ) clustering_metrics = proto.Field( - proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", + proto.MESSAGE, + number=4, + oneof="metrics", + message="Model.ClusteringMetrics", ) ranking_metrics = proto.Field( - proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", + proto.MESSAGE, + number=5, + oneof="metrics", + message="Model.RankingMetrics", ) arima_forecasting_metrics = proto.Field( proto.MESSAGE, @@ -875,10 +1006,14 @@ class DataSplitResult(proto.Message): """ training_table = proto.Field( - proto.MESSAGE, number=1, message=table_reference.TableReference, + proto.MESSAGE, + number=1, + message=table_reference.TableReference, ) evaluation_table = proto.Field( - proto.MESSAGE, number=2, message=table_reference.TableReference, + proto.MESSAGE, + number=2, + message=table_reference.TableReference, ) class ArimaOrder(proto.Message): @@ -894,9 +1029,18 @@ class ArimaOrder(proto.Message): Order of the moving-average part. """ - p = proto.Field(proto.INT64, number=1,) - d = proto.Field(proto.INT64, number=2,) - q = proto.Field(proto.INT64, number=3,) + p = proto.Field( + proto.INT64, + number=1, + ) + d = proto.Field( + proto.INT64, + number=2, + ) + q = proto.Field( + proto.INT64, + number=3, + ) class ArimaFittingMetrics(proto.Message): r"""ARIMA model fitting metrics. @@ -910,9 +1054,18 @@ class ArimaFittingMetrics(proto.Message): Variance. """ - log_likelihood = proto.Field(proto.DOUBLE, number=1,) - aic = proto.Field(proto.DOUBLE, number=2,) - variance = proto.Field(proto.DOUBLE, number=3,) + log_likelihood = proto.Field( + proto.DOUBLE, + number=1, + ) + aic = proto.Field( + proto.DOUBLE, + number=2, + ) + variance = proto.Field( + proto.DOUBLE, + number=3, + ) class GlobalExplanation(proto.Message): r"""Global explanations containing the top most important @@ -943,15 +1096,25 @@ class Explanation(proto.Message): Attribution of feature. """ - feature_name = proto.Field(proto.STRING, number=1,) + feature_name = proto.Field( + proto.STRING, + number=1, + ) attribution = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) explanations = proto.RepeatedField( - proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", + proto.MESSAGE, + number=1, + message="Model.GlobalExplanation.Explanation", + ) + class_label = proto.Field( + proto.STRING, + number=2, ) - class_label = proto.Field(proto.STRING, number=2,) class TrainingRun(proto.Message): r"""Information about a single training query run for the model. @@ -1137,94 +1300,215 @@ class TrainingOptions(proto.Message): adjustment in the input time series. """ - max_iterations = proto.Field(proto.INT64, number=1,) - loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) - learn_rate = proto.Field(proto.DOUBLE, number=3,) + max_iterations = proto.Field( + proto.INT64, + number=1, + ) + loss_type = proto.Field( + proto.ENUM, + number=2, + enum="Model.LossType", + ) + learn_rate = proto.Field( + proto.DOUBLE, + number=3, + ) l1_regularization = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=4, + message=wrappers_pb2.DoubleValue, ) l2_regularization = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) min_relative_progress = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, ) warm_start = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.BoolValue, ) early_stop = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.BoolValue, + ) + input_label_columns = proto.RepeatedField( + proto.STRING, + number=9, ) - input_label_columns = proto.RepeatedField(proto.STRING, number=9,) data_split_method = proto.Field( - proto.ENUM, number=10, enum="Model.DataSplitMethod", + proto.ENUM, + number=10, + enum="Model.DataSplitMethod", + ) + data_split_eval_fraction = proto.Field( + proto.DOUBLE, + number=11, + ) + data_split_column = proto.Field( + proto.STRING, + number=12, ) - data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,) - data_split_column = proto.Field(proto.STRING, number=12,) learn_rate_strategy = proto.Field( - proto.ENUM, number=13, enum="Model.LearnRateStrategy", + proto.ENUM, + number=13, + enum="Model.LearnRateStrategy", + ) + initial_learn_rate = proto.Field( + proto.DOUBLE, + number=16, + ) + label_class_weights = proto.MapField( + proto.STRING, + proto.DOUBLE, + number=17, + ) + user_column = proto.Field( + proto.STRING, + number=18, + ) + item_column = proto.Field( + proto.STRING, + number=19, ) - initial_learn_rate = proto.Field(proto.DOUBLE, number=16,) - label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,) - user_column = proto.Field(proto.STRING, number=18,) - item_column = proto.Field(proto.STRING, number=19,) distance_type = proto.Field( - proto.ENUM, number=20, enum="Model.DistanceType", + proto.ENUM, + number=20, + enum="Model.DistanceType", + ) + num_clusters = proto.Field( + proto.INT64, + number=21, + ) + model_uri = proto.Field( + proto.STRING, + number=22, ) - num_clusters = proto.Field(proto.INT64, number=21,) - model_uri = proto.Field(proto.STRING, number=22,) optimization_strategy = proto.Field( - proto.ENUM, number=23, enum="Model.OptimizationStrategy", + proto.ENUM, + number=23, + enum="Model.OptimizationStrategy", + ) + hidden_units = proto.RepeatedField( + proto.INT64, + number=24, + ) + batch_size = proto.Field( + proto.INT64, + number=25, ) - hidden_units = proto.RepeatedField(proto.INT64, number=24,) - batch_size = proto.Field(proto.INT64, number=25,) dropout = proto.Field( - proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=26, + message=wrappers_pb2.DoubleValue, + ) + max_tree_depth = proto.Field( + proto.INT64, + number=27, + ) + subsample = proto.Field( + proto.DOUBLE, + number=28, ) - max_tree_depth = proto.Field(proto.INT64, number=27,) - subsample = proto.Field(proto.DOUBLE, number=28,) min_split_loss = proto.Field( - proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=29, + message=wrappers_pb2.DoubleValue, + ) + num_factors = proto.Field( + proto.INT64, + number=30, ) - num_factors = proto.Field(proto.INT64, number=30,) feedback_type = proto.Field( - proto.ENUM, number=31, enum="Model.FeedbackType", + proto.ENUM, + number=31, + enum="Model.FeedbackType", ) wals_alpha = proto.Field( - proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=32, + message=wrappers_pb2.DoubleValue, ) kmeans_initialization_method = proto.Field( proto.ENUM, number=33, enum="Model.KmeansEnums.KmeansInitializationMethod", ) - kmeans_initialization_column = proto.Field(proto.STRING, number=34,) - time_series_timestamp_column = proto.Field(proto.STRING, number=35,) - time_series_data_column = proto.Field(proto.STRING, number=36,) - auto_arima = proto.Field(proto.BOOL, number=37,) + kmeans_initialization_column = proto.Field( + proto.STRING, + number=34, + ) + time_series_timestamp_column = proto.Field( + proto.STRING, + number=35, + ) + time_series_data_column = proto.Field( + proto.STRING, + number=36, + ) + auto_arima = proto.Field( + proto.BOOL, + number=37, + ) non_seasonal_order = proto.Field( - proto.MESSAGE, number=38, message="Model.ArimaOrder", + proto.MESSAGE, + number=38, + message="Model.ArimaOrder", ) data_frequency = proto.Field( - proto.ENUM, number=39, enum="Model.DataFrequency", + proto.ENUM, + number=39, + enum="Model.DataFrequency", + ) + include_drift = proto.Field( + proto.BOOL, + number=41, ) - include_drift = proto.Field(proto.BOOL, number=41,) holiday_region = proto.Field( - proto.ENUM, number=42, enum="Model.HolidayRegion", + proto.ENUM, + number=42, + enum="Model.HolidayRegion", + ) + time_series_id_column = proto.Field( + proto.STRING, + number=43, + ) + time_series_id_columns = proto.RepeatedField( + proto.STRING, + number=51, + ) + horizon = proto.Field( + proto.INT64, + number=44, + ) + preserve_input_structs = proto.Field( + proto.BOOL, + number=45, + ) + auto_arima_max_order = proto.Field( + proto.INT64, + number=46, ) - time_series_id_column = proto.Field(proto.STRING, number=43,) - time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) - horizon = proto.Field(proto.INT64, number=44,) - preserve_input_structs = proto.Field(proto.BOOL, number=45,) - auto_arima_max_order = proto.Field(proto.INT64, number=46,) decompose_time_series = proto.Field( - proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=50, + message=wrappers_pb2.BoolValue, ) clean_spikes_and_dips = proto.Field( - proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=52, + message=wrappers_pb2.BoolValue, ) adjust_step_changes = proto.Field( - proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=53, + message=wrappers_pb2.BoolValue, ) class IterationResult(proto.Message): @@ -1265,12 +1549,19 @@ class ClusterInfo(proto.Message): assigned to the cluster. """ - centroid_id = proto.Field(proto.INT64, number=1,) + centroid_id = proto.Field( + proto.INT64, + number=1, + ) cluster_radius = proto.Field( - proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=2, + message=wrappers_pb2.DoubleValue, ) cluster_size = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.Int64Value, ) class ArimaResult(proto.Message): @@ -1304,12 +1595,17 @@ class ArimaCoefficients(proto.Message): """ auto_regressive_coefficients = proto.RepeatedField( - proto.DOUBLE, number=1, + proto.DOUBLE, + number=1, ) moving_average_coefficients = proto.RepeatedField( - proto.DOUBLE, number=2, + proto.DOUBLE, + number=2, + ) + intercept_coefficient = proto.Field( + proto.DOUBLE, + number=3, ) - intercept_coefficient = proto.Field(proto.DOUBLE, number=3,) class ArimaModelInfo(proto.Message): r"""Arima model information. @@ -1351,7 +1647,9 @@ class ArimaModelInfo(proto.Message): """ non_seasonal_order = proto.Field( - proto.MESSAGE, number=1, message="Model.ArimaOrder", + proto.MESSAGE, + number=1, + message="Model.ArimaOrder", ) arima_coefficients = proto.Field( proto.MESSAGE, @@ -1359,24 +1657,41 @@ class ArimaModelInfo(proto.Message): message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", ) arima_fitting_metrics = proto.Field( - proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", + proto.MESSAGE, + number=3, + message="Model.ArimaFittingMetrics", + ) + has_drift = proto.Field( + proto.BOOL, + number=4, + ) + time_series_id = proto.Field( + proto.STRING, + number=5, + ) + time_series_ids = proto.RepeatedField( + proto.STRING, + number=10, ) - has_drift = proto.Field(proto.BOOL, number=4,) - time_series_id = proto.Field(proto.STRING, number=5,) - time_series_ids = proto.RepeatedField(proto.STRING, number=10,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) has_holiday_effect = proto.Field( - proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=7, + message=wrappers_pb2.BoolValue, ) has_spikes_and_dips = proto.Field( - proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=8, + message=wrappers_pb2.BoolValue, ) has_step_changes = proto.Field( - proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + proto.MESSAGE, + number=9, + message=wrappers_pb2.BoolValue, ) arima_model_info = proto.RepeatedField( @@ -1391,18 +1706,29 @@ class ArimaModelInfo(proto.Message): ) index = proto.Field( - proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value, + proto.MESSAGE, + number=1, + message=wrappers_pb2.Int32Value, ) duration_ms = proto.Field( - proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, + proto.MESSAGE, + number=4, + message=wrappers_pb2.Int64Value, ) training_loss = proto.Field( - proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=5, + message=wrappers_pb2.DoubleValue, ) eval_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + proto.MESSAGE, + number=6, + message=wrappers_pb2.DoubleValue, + ) + learn_rate = proto.Field( + proto.DOUBLE, + number=7, ) - learn_rate = proto.Field(proto.DOUBLE, number=7,) cluster_infos = proto.RepeatedField( proto.MESSAGE, number=8, @@ -1415,47 +1741,103 @@ class ArimaModelInfo(proto.Message): ) training_options = proto.Field( - proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", + proto.MESSAGE, + number=1, + message="Model.TrainingRun.TrainingOptions", ) start_time = proto.Field( - proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp, + proto.MESSAGE, + number=8, + message=timestamp_pb2.Timestamp, ) results = proto.RepeatedField( - proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", + proto.MESSAGE, + number=6, + message="Model.TrainingRun.IterationResult", ) evaluation_metrics = proto.Field( - proto.MESSAGE, number=7, message="Model.EvaluationMetrics", + proto.MESSAGE, + number=7, + message="Model.EvaluationMetrics", ) data_split_result = proto.Field( - proto.MESSAGE, number=9, message="Model.DataSplitResult", + proto.MESSAGE, + number=9, + message="Model.DataSplitResult", ) global_explanations = proto.RepeatedField( - proto.MESSAGE, number=10, message="Model.GlobalExplanation", + proto.MESSAGE, + number=10, + message="Model.GlobalExplanation", ) - etag = proto.Field(proto.STRING, number=1,) + etag = proto.Field( + proto.STRING, + number=1, + ) model_reference = proto.Field( - proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, + proto.MESSAGE, + number=2, + message=gcb_model_reference.ModelReference, + ) + creation_time = proto.Field( + proto.INT64, + number=5, + ) + last_modified_time = proto.Field( + proto.INT64, + number=6, + ) + description = proto.Field( + proto.STRING, + number=12, + ) + friendly_name = proto.Field( + proto.STRING, + number=14, + ) + labels = proto.MapField( + proto.STRING, + proto.STRING, + number=15, + ) + expiration_time = proto.Field( + proto.INT64, + number=16, + ) + location = proto.Field( + proto.STRING, + number=13, ) - creation_time = proto.Field(proto.INT64, number=5,) - last_modified_time = proto.Field(proto.INT64, number=6,) - description = proto.Field(proto.STRING, number=12,) - friendly_name = proto.Field(proto.STRING, number=14,) - labels = proto.MapField(proto.STRING, proto.STRING, number=15,) - expiration_time = proto.Field(proto.INT64, number=16,) - location = proto.Field(proto.STRING, number=13,) encryption_configuration = proto.Field( - proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, + proto.MESSAGE, + number=17, + message=encryption_config.EncryptionConfiguration, + ) + model_type = proto.Field( + proto.ENUM, + number=7, + enum=ModelType, + ) + training_runs = proto.RepeatedField( + proto.MESSAGE, + number=9, + message=TrainingRun, ) - model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) - training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) feature_columns = proto.RepeatedField( - proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, + proto.MESSAGE, + number=10, + message=standard_sql.StandardSqlField, ) label_columns = proto.RepeatedField( - proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, + proto.MESSAGE, + number=11, + message=standard_sql.StandardSqlField, + ) + best_trial_id = proto.Field( + proto.INT64, + number=19, ) - best_trial_id = proto.Field(proto.INT64, number=19,) class GetModelRequest(proto.Message): @@ -1470,9 +1852,18 @@ class GetModelRequest(proto.Message): Required. Model ID of the requested model. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) class PatchModelRequest(proto.Message): @@ -1492,10 +1883,23 @@ class PatchModelRequest(proto.Message): set to default value. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) - model = proto.Field(proto.MESSAGE, number=4, message="Model",) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) + model = proto.Field( + proto.MESSAGE, + number=4, + message="Model", + ) class DeleteModelRequest(proto.Message): @@ -1510,9 +1914,18 @@ class DeleteModelRequest(proto.Message): Required. Model ID of the model to delete. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) class ListModelsRequest(proto.Message): @@ -1532,12 +1945,23 @@ class ListModelsRequest(proto.Message): request the next page of results """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) max_results = proto.Field( - proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value, + proto.MESSAGE, + number=3, + message=wrappers_pb2.UInt32Value, + ) + page_token = proto.Field( + proto.STRING, + number=4, ) - page_token = proto.Field(proto.STRING, number=4,) class ListModelsResponse(proto.Message): @@ -1556,8 +1980,15 @@ class ListModelsResponse(proto.Message): def raw_page(self): return self - models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) - next_page_token = proto.Field(proto.STRING, number=2,) + models = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model", + ) + next_page_token = proto.Field( + proto.STRING, + number=2, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py index a4aab59f7..cde139ebe 100644 --- a/google/cloud/bigquery_v2/types/model_reference.py +++ b/google/cloud/bigquery_v2/types/model_reference.py @@ -17,7 +17,10 @@ __protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"ModelReference",}, + package="google.cloud.bigquery.v2", + manifest={ + "ModelReference", + }, ) @@ -37,9 +40,18 @@ class ModelReference(proto.Message): maximum length is 1,024 characters. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - model_id = proto.Field(proto.STRING, number=3,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + model_id = proto.Field( + proto.STRING, + number=3, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index 579c3b138..3be5304fc 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -78,12 +78,22 @@ class TypeKind(proto.Enum): ARRAY = 16 STRUCT = 17 - type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) + type_kind = proto.Field( + proto.ENUM, + number=1, + enum=TypeKind, + ) array_element_type = proto.Field( - proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", + proto.MESSAGE, + number=2, + oneof="sub_type", + message="StandardSqlDataType", ) struct_type = proto.Field( - proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", + proto.MESSAGE, + number=3, + oneof="sub_type", + message="StandardSqlStructType", ) @@ -102,8 +112,15 @@ class StandardSqlField(proto.Message): this "type" field). """ - name = proto.Field(proto.STRING, number=1,) - type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) + name = proto.Field( + proto.STRING, + number=1, + ) + type = proto.Field( + proto.MESSAGE, + number=2, + message="StandardSqlDataType", + ) class StandardSqlStructType(proto.Message): @@ -114,7 +131,11 @@ class StandardSqlStructType(proto.Message): """ - fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + fields = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="StandardSqlField", + ) class StandardSqlTableType(proto.Message): @@ -125,7 +146,11 @@ class StandardSqlTableType(proto.Message): The columns in this table type """ - columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + columns = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="StandardSqlField", + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py index aaab83e85..c02eb206f 100644 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -17,7 +17,10 @@ __protobuf__ = proto.module( - package="google.cloud.bigquery.v2", manifest={"TableReference",}, + package="google.cloud.bigquery.v2", + manifest={ + "TableReference", + }, ) @@ -48,12 +51,30 @@ class TableReference(proto.Message): to translate the received data to the project_id field. """ - project_id = proto.Field(proto.STRING, number=1,) - dataset_id = proto.Field(proto.STRING, number=2,) - table_id = proto.Field(proto.STRING, number=3,) - project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) - dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) - table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) + project_id = proto.Field( + proto.STRING, + number=1, + ) + dataset_id = proto.Field( + proto.STRING, + number=2, + ) + table_id = proto.Field( + proto.STRING, + number=3, + ) + project_id_alternative = proto.RepeatedField( + proto.STRING, + number=4, + ) + dataset_id_alternative = proto.RepeatedField( + proto.STRING, + number=5, + ) + table_id_alternative = proto.RepeatedField( + proto.STRING, + number=6, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/noxfile.py b/noxfile.py index 018751168..f088e10c2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -24,7 +24,7 @@ MYPY_VERSION = "mypy==0.910" PYTYPE_VERSION = "pytype==2021.4.9" -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") DEFAULT_PYTHON_VERSION = "3.8" @@ -123,7 +123,10 @@ def mypy(session): # Just install the dependencies' type info directly, since "mypy --install-types" # might require an additional pass. session.install( - "types-protobuf", "types-python-dateutil", "types-requests", "types-setuptools", + "types-protobuf", + "types-python-dateutil", + "types-requests", + "types-setuptools", ) session.run("mypy", "google/cloud") diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 85f5836db..25f87a215 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -253,7 +253,7 @@ def py(session: nox.sessions.Session) -> None: def _get_repo_root() -> Optional[str]: - """ Returns the root folder of the project. """ + """Returns the root folder of the project.""" # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) for i in range(10): diff --git a/samples/load_table_dataframe.py b/samples/load_table_dataframe.py index 6cd06f266..db4c131f2 100644 --- a/samples/load_table_dataframe.py +++ b/samples/load_table_dataframe.py @@ -35,7 +35,7 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": records = [ { - "title": u"The Meaning of Life", + "title": "The Meaning of Life", "release_year": 1983, "length_minutes": 112.5, "release_date": pytz.timezone("Europe/Paris") @@ -45,7 +45,7 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0), }, { - "title": u"Monty Python and the Holy Grail", + "title": "Monty Python and the Holy Grail", "release_year": 1975, "length_minutes": 91.5, "release_date": pytz.timezone("Europe/London") @@ -54,7 +54,7 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": "dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0), }, { - "title": u"Life of Brian", + "title": "Life of Brian", "release_year": 1979, "length_minutes": 94.25, "release_date": pytz.timezone("America/New_York") @@ -63,7 +63,7 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": "dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0), }, { - "title": u"And Now for Something Completely Different", + "title": "And Now for Something Completely Different", "release_year": 1971, "length_minutes": 88.0, "release_date": pytz.timezone("Europe/London") @@ -86,7 +86,7 @@ def load_table_dataframe(table_id: str) -> "bigquery.Table": # Optionally, set a named index, which can also be written to the # BigQuery table. index=pandas.Index( - [u"Q24980", u"Q25043", u"Q24953", u"Q16403"], name="wikidata_id" + ["Q24980", "Q25043", "Q24953", "Q16403"], name="wikidata_id" ), ) job_config = bigquery.LoadJobConfig( diff --git a/samples/load_table_file.py b/samples/load_table_file.py index 81df368f0..00226eb3c 100644 --- a/samples/load_table_file.py +++ b/samples/load_table_file.py @@ -30,7 +30,9 @@ def load_table_file(file_path: str, table_id: str) -> "bigquery.Table": # table_id = "your-project.your_dataset.your_table_name" job_config = bigquery.LoadJobConfig( - source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True, + source_format=bigquery.SourceFormat.CSV, + skip_leading_rows=1, + autodetect=True, ) with open(file_path, "rb") as source_file: diff --git a/samples/load_table_uri_parquet.py b/samples/load_table_uri_parquet.py index 6ea032f71..e0ec59078 100644 --- a/samples/load_table_uri_parquet.py +++ b/samples/load_table_uri_parquet.py @@ -23,7 +23,9 @@ def load_table_uri_parquet(table_id: str) -> None: # TODO(developer): Set table_id to the ID of the table to create. # table_id = "your-project.your_dataset.your_table_name" - job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.PARQUET,) + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.PARQUET, + ) uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" load_job = client.load_table_from_uri( diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index 85f5836db..25f87a215 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -253,7 +253,7 @@ def py(session: nox.sessions.Session) -> None: def _get_repo_root() -> Optional[str]: - """ Returns the root folder of the project. """ + """Returns the root folder of the project.""" # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) for i in range(10): diff --git a/samples/snippets/authenticate_service_account.py b/samples/snippets/authenticate_service_account.py index e44766886..8a8c9557d 100644 --- a/samples/snippets/authenticate_service_account.py +++ b/samples/snippets/authenticate_service_account.py @@ -31,7 +31,8 @@ def main() -> "bigquery.Client": # key_path = "path/to/service_account.json" credentials = service_account.Credentials.from_service_account_file( - key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], + key_path, + scopes=["https://www.googleapis.com/auth/cloud-platform"], ) # Alternatively, use service_account.Credentials.from_service_account_info() @@ -39,7 +40,10 @@ def main() -> "bigquery.Client": # TODO(developer): Set key_json to the content of the service account key file. # credentials = service_account.Credentials.from_service_account_info(key_json) - client = bigquery.Client(credentials=credentials, project=credentials.project_id,) + client = bigquery.Client( + credentials=credentials, + project=credentials.project_id, + ) # [END bigquery_client_json_credentials] return client diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 3bbfe1c74..37b52256b 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -59,7 +59,8 @@ def entity_id(bigquery_client: bigquery.Client, dataset_id: str) -> str: @pytest.fixture(scope="session") def dataset_id_us_east1( - bigquery_client: bigquery.Client, project_id: str, + bigquery_client: bigquery.Client, + project_id: str, ) -> Iterator[str]: dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" diff --git a/samples/snippets/create_table_external_hive_partitioned_test.py b/samples/snippets/create_table_external_hive_partitioned_test.py index 3ff39c881..37deb8b12 100644 --- a/samples/snippets/create_table_external_hive_partitioned_test.py +++ b/samples/snippets/create_table_external_hive_partitioned_test.py @@ -23,8 +23,10 @@ def test_create_table_external_hive_partitioned( capsys: "pytest.CaptureFixture[str]", random_table_id: str ) -> None: - table = create_table_external_hive_partitioned.create_table_external_hive_partitioned( - random_table_id + table = ( + create_table_external_hive_partitioned.create_table_external_hive_partitioned( + random_table_id + ) ) out, _ = capsys.readouterr() diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py index 0bc83e4a6..ac9d52dcf 100644 --- a/samples/snippets/delete_job_test.py +++ b/samples/snippets/delete_job_test.py @@ -28,7 +28,8 @@ def test_delete_job_metadata( table_id_us_east1: str, ) -> None: query_job: bigquery.QueryJob = bigquery_client.query( - f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1", + f"SELECT COUNT(*) FROM `{table_id_us_east1}`", + location="us-east1", ) query_job.result() assert query_job.job_id is not None diff --git a/samples/snippets/manage_job_cancel.py b/samples/snippets/manage_job_cancel.py index b0408b837..9cbdef450 100644 --- a/samples/snippets/manage_job_cancel.py +++ b/samples/snippets/manage_job_cancel.py @@ -17,7 +17,9 @@ def cancel_job( - client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", + client: bigquery.Client, + location: str = "us", + job_id: str = "abcd-efgh-ijkl-mnop", ) -> None: job = client.cancel_job(job_id, location=location) print(f"{job.location}:{job.job_id} cancelled") diff --git a/samples/snippets/manage_job_get.py b/samples/snippets/manage_job_get.py index f637edfe1..ca7ffc0c9 100644 --- a/samples/snippets/manage_job_get.py +++ b/samples/snippets/manage_job_get.py @@ -17,7 +17,9 @@ def get_job( - client: bigquery.Client, location: str = "us", job_id: str = "abcd-efgh-ijkl-mnop", + client: bigquery.Client, + location: str = "us", + job_id: str = "abcd-efgh-ijkl-mnop", ) -> None: job = client.get_job(job_id, location=location) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 85f5836db..25f87a215 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==19.10b0" +BLACK_VERSION = "black==22.3.0" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -253,7 +253,7 @@ def py(session: nox.sessions.Session) -> None: def _get_repo_root() -> Optional[str]: - """ Returns the root folder of the project. """ + """Returns the root folder of the project.""" # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) for i in range(10): diff --git a/samples/snippets/update_dataset_access.py b/samples/snippets/update_dataset_access.py index 9ce4f02c3..7b3293ea5 100644 --- a/samples/snippets/update_dataset_access.py +++ b/samples/snippets/update_dataset_access.py @@ -59,7 +59,11 @@ def update_dataset_access(dataset_id: str, entity_id: str) -> None: entries = list(dataset.access_entries) entries.append( - bigquery.AccessEntry(role=role, entity_type=entity_type, entity_id=entity_id,) + bigquery.AccessEntry( + role=role, + entity_type=entity_type, + entity_id=entity_id, + ) ) dataset.access_entries = entries diff --git a/samples/table_insert_rows.py b/samples/table_insert_rows.py index 897133330..8aa723fe0 100644 --- a/samples/table_insert_rows.py +++ b/samples/table_insert_rows.py @@ -25,8 +25,8 @@ def table_insert_rows(table_id: str) -> None: # table_id = "your-project.your_dataset.your_table" rows_to_insert = [ - {u"full_name": u"Phred Phlyntstone", u"age": 32}, - {u"full_name": u"Wylma Phlyntstone", u"age": 29}, + {"full_name": "Phred Phlyntstone", "age": 32}, + {"full_name": "Wylma Phlyntstone", "age": 29}, ] errors = client.insert_rows_json(table_id, rows_to_insert) # Make an API request. diff --git a/samples/table_insert_rows_explicit_none_insert_ids.py b/samples/table_insert_rows_explicit_none_insert_ids.py index 1ccb1acc4..b2bd06372 100644 --- a/samples/table_insert_rows_explicit_none_insert_ids.py +++ b/samples/table_insert_rows_explicit_none_insert_ids.py @@ -25,8 +25,8 @@ def table_insert_rows_explicit_none_insert_ids(table_id: str) -> None: # table_id = "your-project.your_dataset.your_table" rows_to_insert = [ - {u"full_name": u"Phred Phlyntstone", u"age": 32}, - {u"full_name": u"Wylma Phlyntstone", u"age": 29}, + {"full_name": "Phred Phlyntstone", "age": 32}, + {"full_name": "Wylma Phlyntstone", "age": 29}, ] errors = client.insert_rows_json( diff --git a/samples/tests/test_client_query_relax_column.py b/samples/tests/test_client_query_relax_column.py index f910d61f0..93fa0f3cf 100644 --- a/samples/tests/test_client_query_relax_column.py +++ b/samples/tests/test_client_query_relax_column.py @@ -23,7 +23,9 @@ def test_client_query_relax_column( - capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client, + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: bigquery.Client, ) -> None: schema = [ diff --git a/samples/tests/test_load_table_dataframe.py b/samples/tests/test_load_table_dataframe.py index 152c82f8c..9a975493c 100644 --- a/samples/tests/test_load_table_dataframe.py +++ b/samples/tests/test_load_table_dataframe.py @@ -27,7 +27,9 @@ def test_load_table_dataframe( - capsys: pytest.CaptureFixture[str], client: "bigquery.Client", random_table_id: str, + capsys: pytest.CaptureFixture[str], + client: "bigquery.Client", + random_table_id: str, ) -> None: table = load_table_dataframe.load_table_dataframe(random_table_id) @@ -57,10 +59,10 @@ def test_load_table_dataframe( df = client.list_rows(table).to_dataframe() df.sort_values("release_year", inplace=True) assert df["title"].tolist() == [ - u"And Now for Something Completely Different", - u"Monty Python and the Holy Grail", - u"Life of Brian", - u"The Meaning of Life", + "And Now for Something Completely Different", + "Monty Python and the Holy Grail", + "Life of Brian", + "The Meaning of Life", ] assert df["release_year"].tolist() == [1971, 1975, 1979, 1983] assert df["length_minutes"].tolist() == [88.0, 91.5, 94.25, 112.5] @@ -76,4 +78,4 @@ def test_load_table_dataframe( pandas.Timestamp("2008-01-14T08:00:00"), pandas.Timestamp("2002-01-22T07:00:00"), ] - assert df["wikidata_id"].tolist() == [u"Q16403", u"Q25043", u"Q24953", u"Q24980"] + assert df["wikidata_id"].tolist() == ["Q16403", "Q25043", "Q24953", "Q24980"] diff --git a/samples/tests/test_table_insert_rows.py b/samples/tests/test_table_insert_rows.py index 410137631..59024fa95 100644 --- a/samples/tests/test_table_insert_rows.py +++ b/samples/tests/test_table_insert_rows.py @@ -23,7 +23,9 @@ def test_table_insert_rows( - capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client, + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: bigquery.Client, ) -> None: schema = [ diff --git a/samples/tests/test_update_table_require_partition_filter.py b/samples/tests/test_update_table_require_partition_filter.py index 580796ed3..68e1c1e2b 100644 --- a/samples/tests/test_update_table_require_partition_filter.py +++ b/samples/tests/test_update_table_require_partition_filter.py @@ -23,7 +23,9 @@ def test_update_table_require_partition_filter( - capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client, + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, + client: bigquery.Client, ) -> None: # Make a partitioned table. diff --git a/tests/system/helpers.py b/tests/system/helpers.py index 76e609345..721f55040 100644 --- a/tests/system/helpers.py +++ b/tests/system/helpers.py @@ -39,9 +39,15 @@ ('SELECT "ABC"', "ABC"), ('SELECT CAST("foo" AS BYTES)', b"foo"), ('SELECT TIMESTAMP "%s"' % (_stamp,), _zoned), - ('SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), _zoned_microseconds,), + ( + 'SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), + _zoned_microseconds, + ), ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp,), _naive), - ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), _naive_microseconds,), + ( + 'SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), + _naive_microseconds, + ), ('SELECT DATE(TIMESTAMP "%s")' % (_stamp,), _naive.date()), ('SELECT TIME(TIMESTAMP "%s")' % (_stamp,), _naive.time()), ('SELECT NUMERIC "%s"' % (_numeric,), _numeric), @@ -90,5 +96,6 @@ def _rate_limit_exceeded(forbidden): # they return instead of the more appropriate 429. # See https://cloud.google.com/bigquery/quota-policy retry_403 = test_utils.retry.RetryErrors( - google.api_core.exceptions.Forbidden, error_predicate=_rate_limit_exceeded, + google.api_core.exceptions.Forbidden, + error_predicate=_rate_limit_exceeded, ) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index 092562b3c..8b88b6844 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -61,7 +61,9 @@ def test_list_rows_nullable_scalars_dtypes( ] arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, selected_fields=schema, + table_id, + max_results=max_results, + selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/tests/system/test_client.py b/tests/system/test_client.py index a00193788..773ef3c90 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1888,17 +1888,20 @@ def test_create_tvf_routine(self): return_table_type = StandardSqlTableType( columns=[ StandardSqlField( - name="int_col", type=StandardSqlDataType(type_kind=INT64), + name="int_col", + type=StandardSqlDataType(type_kind=INT64), ), StandardSqlField( - name="str_col", type=StandardSqlDataType(type_kind=STRING), + name="str_col", + type=StandardSqlDataType(type_kind=STRING), ), ] ) routine_args = [ RoutineArgument( - name="threshold", data_type=StandardSqlDataType(type_kind=INT64), + name="threshold", + data_type=StandardSqlDataType(type_kind=INT64), ) ] diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 73b88964f..34e4243c4 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -618,7 +618,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( [ ("bool_col", [True, None, False]), ("bytes_col", ["abc", None, "def"]), - ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],), + ( + "date_col", + [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], + ), ( "dt_col", [ @@ -1000,7 +1003,11 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): @pytest.mark.parametrize( - ("max_results",), ((None,), (10,),) # Use BQ Storage API. # Use REST API. + ("max_results",), + ( + (None,), + (10,), + ), # Use BQ Storage API. # Use REST API. ) def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_results): # TODO(GH#836): Avoid INTERVAL columns until they are supported by the @@ -1021,7 +1028,9 @@ def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_r ] df = bigquery_client.list_rows( - scalars_table, max_results=max_results, selected_fields=schema, + scalars_table, + max_results=max_results, + selected_fields=schema, ).to_dataframe() assert df.dtypes["bool_col"].name == "boolean" @@ -1042,7 +1051,11 @@ def test_list_rows_nullable_scalars_dtypes(bigquery_client, scalars_table, max_r @pytest.mark.parametrize( - ("max_results",), ((None,), (10,),) # Use BQ Storage API. # Use REST API. + ("max_results",), + ( + (None,), + (10,), + ), # Use BQ Storage API. # Use REST API. ) def test_list_rows_nullable_scalars_extreme_dtypes( bigquery_client, scalars_extreme_table, max_results @@ -1065,7 +1078,9 @@ def test_list_rows_nullable_scalars_extreme_dtypes( ] df = bigquery_client.list_rows( - scalars_extreme_table, max_results=max_results, selected_fields=schema, + scalars_extreme_table, + max_results=max_results, + selected_fields=schema, ).to_dataframe() # Extreme values are out-of-bounds for pandas datetime64 values, which use diff --git a/tests/system/test_query.py b/tests/system/test_query.py index f76b1e6ca..723f927d7 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -296,9 +296,15 @@ def test_query_statistics(bigquery_client, query_api_method): StructQueryParameter( "hitchhiker", ScalarQueryParameter( - name="question", type_="STRING", value="What is the answer?", + name="question", + type_="STRING", + value="What is the answer?", + ), + ScalarQueryParameter( + name="answer", + type_="INT64", + value=42, ), - ScalarQueryParameter(name="answer", type_="INT64", value=42,), ), ], ), @@ -412,7 +418,9 @@ def test_query_parameters( jconfig = bigquery.QueryJobConfig() jconfig.query_parameters = query_parameters query_job = bigquery_client.query( - sql, job_config=jconfig, api_method=query_api_method, + sql, + job_config=jconfig, + api_method=query_api_method, ) rows = list(query_job.result()) assert len(rows) == 1 diff --git a/tests/system/test_structs.py b/tests/system/test_structs.py index 20740f614..1812b6fde 100644 --- a/tests/system/test_structs.py +++ b/tests/system/test_structs.py @@ -23,7 +23,8 @@ def test_structs(bigquery_client, dataset_id, person_type_decl, table_id): ], ) cursor.execute( - f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data), + f"insert into {table_id} (person) values (%(v:{person_type})s)", + dict(v=data), ) cursor.execute(f"select * from {table_id}") diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 250be83bb..f0525c22a 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -991,7 +991,10 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1, + predicate=custom_predicate, + initial=0.001, + maximum=0.001, + deadline=0.1, ) self.assertIs(job.result(retry=custom_retry), job) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index e35051c5c..84aab3aca 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -162,8 +162,10 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) assert len(dataframe) == 4 - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **job_resource["configuration"]["query"]["destinationTable"] + destination_table = ( + "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **job_resource["configuration"]["query"]["destinationTable"] + ) ) expected_session = bigquery_storage.ReadSession( table=destination_table, @@ -286,7 +288,8 @@ def test_to_arrow_max_results_no_progress_bar(): row_iterator = table.RowIterator(client, api_request, path, schema) result_patch = mock.patch( - "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator, + "google.cloud.bigquery.job.QueryJob.result", + return_value=row_iterator, ) with result_patch as result_patch_tqdm: tbl = job.to_arrow(create_bqstorage_client=False, max_results=123) @@ -563,8 +566,10 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): dataframe = job.to_dataframe(bqstorage_client=bqstorage_client) assert len(dataframe) == 4 - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **resource["configuration"]["query"]["destinationTable"] + destination_table = ( + "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) ) expected_session = bigquery_storage.ReadSession( table=destination_table, @@ -608,11 +613,14 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ): job.to_dataframe(bqstorage_client=bqstorage_client) - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **resource["configuration"]["query"]["destinationTable"] + destination_table = ( + "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) ) expected_session = bigquery_storage.ReadSession( - table=destination_table, data_format=bigquery_storage.DataFormat.ARROW, + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( parent=f"projects/{client.project}", @@ -726,7 +734,11 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = make_connection( - begun_resource, query_resource, done_resource, query_resource, query_resource, + begun_resource, + query_resource, + done_resource, + query_resource, + query_resource, ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 2377be79c..885e773d3 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1079,7 +1079,8 @@ def test_w_dict_unknown_fields(self): # Unknown fields should be included (if not None), but converted as strings. self.assertEqual( - converted, {"whoami": "2020-07-20", "one": "111", "two": "222"}, + converted, + {"whoami": "2020-07-20", "one": "111", "two": "222"}, ) @@ -1256,8 +1257,7 @@ def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()): def _field_isinstance_patcher(): - """A patcher thank makes _Field instances seem like SchemaField instances. - """ + """A patcher thank makes _Field instances seem like SchemaField instances.""" from google.cloud.bigquery.schema import SchemaField def fake_isinstance(instance, target_class): diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 63dde75e7..012352f4e 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -184,7 +184,11 @@ def test__to_query_job_dry_run(): @pytest.mark.parametrize( - ("completed", "expected_state"), ((True, "DONE"), (False, "PENDING"),), + ("completed", "expected_state"), + ( + (True, "DONE"), + (False, "PENDING"), + ), ) def test__to_query_job_sets_state(completed, expected_state): mock_client = mock.create_autospec(Client) @@ -278,7 +282,11 @@ def test_query_jobs_query_sets_format_options(): @pytest.mark.parametrize( ("timeout", "expected_timeout"), - ((-1, 0), (0, 0), (1, 1000 - _job_helpers._TIMEOUT_BUFFER_MILLIS),), + ( + (-1, 0), + (0, 0), + (1, 1000 - _job_helpers._TIMEOUT_BUFFER_MILLIS), + ), ) def test_query_jobs_query_sets_timeout(timeout, expected_timeout): mock_client = mock.create_autospec(Client) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index e8d9562e6..5b2fadaf1 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1546,7 +1546,8 @@ def test__download_table_bqstorage( bqstorage_client.create_read_session.return_value = fake_session table_ref = table.TableReference( - dataset.DatasetReference("project-x", "dataset-y"), "table-z", + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", ) def fake_download_stream( @@ -1723,7 +1724,8 @@ def test_bq_to_arrow_field_type_override(module_under_test): assert ( module_under_test.bq_to_arrow_field( - schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + schema.SchemaField("g", "GEOGRAPHY"), + pyarrow.binary(), ).type == pyarrow.binary() ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 073452002..30bab8fa9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -236,7 +236,9 @@ def test__call_api_applying_custom_retry_on_timeout(self): client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[TimeoutError, "result"], + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, TimeoutError) @@ -259,7 +261,9 @@ def test__call_api_span_creator_not_called(self): client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[TimeoutError, "result"], + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, TimeoutError) @@ -281,7 +285,9 @@ def test__call_api_span_creator_called(self): client = self._make_one(project=self.PROJECT, credentials=creds) api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[TimeoutError, "result"], + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, TimeoutError) @@ -446,7 +452,9 @@ def test_get_service_account_email_w_custom_retry(self): "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", } api_request_patcher = mock.patch.object( - client._connection, "api_request", side_effect=[ValueError, resource], + client._connection, + "api_request", + side_effect=[ValueError, resource], ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( @@ -664,7 +672,10 @@ def test_create_routine_w_minimal_resource(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=7.5, + method="POST", + path=path, + data=resource, + timeout=7.5, ) self.assertEqual( actual_routine.reference, RoutineReference.from_string(full_routine_id) @@ -698,7 +709,10 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + method="POST", + path=path, + data=resource, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -737,7 +751,10 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + method="POST", + path=path, + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -774,7 +791,10 @@ def test_create_routine_w_conflict_exists_ok(self): conn.api_request.assert_has_calls( [ mock.call( - method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + method="POST", + path=path, + data=resource, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -1174,7 +1194,9 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) final_attributes.assert_called_with( - {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, + {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) conn.api_request.assert_called_once_with( @@ -1344,7 +1366,9 @@ def test_get_routine(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path=path, timeout=7.5, + method="GET", + path=path, + timeout=7.5, ) self.assertEqual( actual_routine.reference, @@ -1426,7 +1450,9 @@ def test_get_iam_policy(self): from google.api_core.iam import Policy PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) BODY = {"options": {"requestedPolicyVersion": 1}} ETAG = "CARDI" @@ -1477,7 +1503,9 @@ def test_get_iam_policy_w_invalid_table(self): client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) table_resource_string = "projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) with self.assertRaises(ValueError): @@ -1697,7 +1725,11 @@ def test_update_dataset(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) + ds2 = client.update_dataset( + ds, + fields=fields, + timeout=7.5, + ) final_attributes.assert_called_once_with( {"path": "/%s" % PATH, "fields": fields}, client, None @@ -1874,7 +1906,11 @@ def test_update_routine(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.update_routine(routine, fields, timeout=7.5,) + actual_routine = client.update_routine( + routine, + fields, + timeout=7.5, + ) final_attributes.assert_called_once_with( {"path": routine.path, "fields": fields}, client, None @@ -2044,7 +2080,9 @@ def test_update_table_w_custom_property(self): updated_table = client.update_table(table, ["newAlphaProperty"]) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, + client, + None, ) conn.api_request.assert_called_once_with( @@ -2078,7 +2116,9 @@ def test_update_table_only_use_legacy_sql(self): updated_table = client.update_table(table, ["view_use_legacy_sql"]) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, + {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, + client, + None, ) conn.api_request.assert_called_once_with( @@ -2158,7 +2198,9 @@ def test_update_table_w_query(self): updated_table = client.update_table(table, updated_properties) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": updated_properties}, client, None, + {"path": "/%s" % path, "fields": updated_properties}, + client, + None, ) self.assertEqual(updated_table.schema, table.schema) @@ -2450,7 +2492,9 @@ def test_delete_routine(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=7.5, + method="DELETE", + path=path, + timeout=7.5, ) def test_delete_routine_w_wrong_type(self): @@ -2477,7 +2521,9 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, + method="DELETE", + path=path, + timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2499,7 +2545,9 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, + method="DELETE", + path=path, + timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -3638,7 +3686,10 @@ def test_extract_table(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) # Check the job resource. @@ -3880,7 +3931,10 @@ def test_extract_table_for_source_type_model(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) # Check the job resource. @@ -3923,7 +3977,10 @@ def test_extract_table_for_source_type_model_w_string_model_id(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) def test_extract_table_for_source_type_model_w_model_object(self): @@ -3962,7 +4019,10 @@ def test_extract_table_for_source_type_model_w_model_object(self): # Check that extract_table actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=RESOURCE, timeout=7.5, + method="POST", + path="/projects/PROJECT/jobs", + data=RESOURCE, + timeout=7.5, ) def test_extract_table_for_invalid_source_type_model(self): @@ -4305,7 +4365,11 @@ def test_query_preserving_explicit_job_config(self): from google.cloud.bigquery import QueryJobConfig - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http,) + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + ) conn = client._connection = make_connection(resource) job_config = QueryJobConfig() @@ -5164,7 +5228,10 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, + method="POST", + path="/%s" % PATH, + data=SENT, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5238,7 +5305,9 @@ def test_insert_rows_w_explicit_none_insert_ids(self): from google.cloud.bigquery.table import Table PATH = "projects/{}/datasets/{}/tables/{}/insertAll".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) creds = _make_credentials() http = object() @@ -5264,7 +5333,10 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, + method="POST", + path="/{}".format(PATH), + data=SENT, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5655,7 +5727,10 @@ def test_insert_rows_json_default_behavior(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=7.5, + method="POST", + path="/%s" % PATH, + data=SENT, + timeout=7.5, ) def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): @@ -5703,7 +5778,9 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): conn = client._connection = make_connection({}) errors = client.insert_rows_json( - "proj.dset.tbl", rows, row_ids=AutoRowIDs.DISABLED, + "proj.dset.tbl", + rows, + row_ids=AutoRowIDs.DISABLED, ) self.assertEqual(len(errors), 0) @@ -5789,7 +5866,9 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): conn = client._connection = make_connection({}) errors = client.insert_rows_json( - "proj.dset.tbl", rows, row_ids=[None] * len(rows), + "proj.dset.tbl", + rows, + row_ids=[None] * len(rows), ) self.assertEqual(len(errors), 0) @@ -6465,7 +6544,10 @@ def test_load_table_from_file_resumable(self): ) with do_upload_patch as do_upload: client.load_table_from_file( - file_obj, self.TABLE_REF, job_id="job_id", job_config=job_config, + file_obj, + self.TABLE_REF, + job_id="job_id", + job_config=job_config, ) do_upload.assert_called_once_with( @@ -6782,7 +6864,10 @@ def test_load_table_from_dataframe(self): policy_tags=PolicyTagList(names=("baz",)), ), "accounts": SchemaField( - "accounts", "INTEGER", mode="REPEATED", description="array column", + "accounts", + "INTEGER", + mode="REPEATED", + description="array column", ), } get_table_schema = [ @@ -7611,7 +7696,11 @@ def test_load_table_from_dataframe_array_fields(self): schema = [ SchemaField("float_column", "FLOAT"), - SchemaField("array_column", "INTEGER", mode="REPEATED",), + SchemaField( + "array_column", + "INTEGER", + mode="REPEATED", + ), ] job_config = job.LoadJobConfig(schema=schema) @@ -7671,7 +7760,11 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): expected_schema = [ SchemaField("float_column", "FLOAT"), - SchemaField("array_column", "INT64", mode="REPEATED",), + SchemaField( + "array_column", + "INT64", + mode="REPEATED", + ), ] load_patch = mock.patch( @@ -7686,7 +7779,9 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION, + dataframe, + self.TABLE_REF, + location=self.LOCATION, ) load_table_from_file.assert_called_once_with( @@ -8189,7 +8284,11 @@ def test__do_resumable_upload_custom_project(self): client = self._make_client(transport) result = client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, None, None, project="custom-project", + file_obj, + self.EXPECTED_CONFIGURATION, + None, + None, + project="custom-project", ) content = result.content.decode("utf-8") diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index b4bb9365f..7cc1f11c3 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -597,7 +597,8 @@ def test_complex_query_parameter_type_errors(type_, value, expect): from google.cloud.bigquery.dbapi import exceptions with pytest.raises( - exceptions.ProgrammingError, match=_expected_error_match(expect), + exceptions.ProgrammingError, + match=_expected_error_match(expect), ): complex_query_parameter("test", value, type_) diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 770154377..e96ab55d7 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -61,7 +61,8 @@ def test_ctor_w_bqstorage_client(self): mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = self._make_one( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) mock_client._ensure_bqstorage_client.assert_called_once_with( @@ -105,7 +106,8 @@ def test_connect_w_both_clients(self): mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) mock_client._ensure_bqstorage_client.assert_called_once_with( @@ -137,7 +139,9 @@ def test_close_closes_all_created_bigquery_clients(self): return_value=client, ) bqstorage_client_patcher = mock.patch.object( - client, "_ensure_bqstorage_client", return_value=bqstorage_client, + client, + "_ensure_bqstorage_client", + return_value=bqstorage_client, ) with client_patcher, bqstorage_client_patcher: diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 07bce986f..d672c0f6c 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -295,12 +295,14 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): mock_client = self._mock_client(rows=row_data) mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=bqstorage_streamed_rows, + stream_count=1, + rows=bqstorage_streamed_rows, ) mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") @@ -328,7 +330,8 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") @@ -353,13 +356,15 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): mock_client = self._mock_client(rows=row_data) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=row_data, + stream_count=1, + rows=row_data, ) no_access_error = exceptions.Forbidden("invalid credentials") mock_bqstorage_client.create_read_session.side_effect = no_access_error connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") @@ -384,11 +389,13 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): mock_client = self._mock_client(rows=row_data) mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=bqstorage_streamed_rows, + stream_count=1, + rows=bqstorage_streamed_rows, ) connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, + client=mock_client, + bqstorage_client=mock_bqstorage_client, ) cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") diff --git a/tests/unit/test_dbapi_types.py b/tests/unit/test_dbapi_types.py index cf282c68b..7319aa016 100644 --- a/tests/unit/test_dbapi_types.py +++ b/tests/unit/test_dbapi_types.py @@ -48,8 +48,8 @@ def __bytes__(self): @pytest.mark.parametrize( "raw,expected", [ - (u"hello", b"hello"), - (u"\u1f60", u"\u1f60".encode("utf-8")), + ("hello", b"hello"), + ("\u1f60", "\u1f60".encode("utf-8")), (b"hello", b"hello"), (bytearray(b"hello"), b"hello"), (memoryview(b"hello"), b"hello"), diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py index 1fb40d446..1db6b5668 100644 --- a/tests/unit/test_list_jobs.py +++ b/tests/unit/test_list_jobs.py @@ -258,7 +258,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) # One millisecond after the the 2038 31-bit signed int rollover end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) - end_time_millis = (((2 ** 31) - 1) * 1000) + 1 + end_time_millis = (((2**31) - 1) * 1000) + 1 list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index b14852338..04932d357 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -40,7 +40,13 @@ def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): ) @dataset_polymorphic def test_list_models_defaults( - make_dataset, get_reference, client, PROJECT, DS_ID, extra, query, + make_dataset, + get_reference, + client, + PROJECT, + DS_ID, + extra, + query, ): from google.cloud.bigquery.model import Model diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index b7db20b3f..ea8fe568f 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -505,7 +505,8 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): ) clear_patch = mock.patch( - "google.cloud.bigquery.magics.magics.display.clear_output", autospec=True, + "google.cloud.bigquery.magics.magics.display.clear_output", + autospec=True, ) run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -525,7 +526,8 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): ) clear_patch = mock.patch( - "google.cloud.bigquery.magics.magics.display.clear_output", autospec=True, + "google.cloud.bigquery.magics.magics.display.clear_output", + autospec=True, ) run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -717,7 +719,8 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(): "google.cloud.bigquery.client.Client.query", autospec=True ) close_transports_patch = mock.patch( - "google.cloud.bigquery.magics.magics._close_transports", autospec=True, + "google.cloud.bigquery.magics.magics._close_transports", + autospec=True, ) sql = "SELECT 17 AS num" @@ -867,7 +870,8 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, ) @@ -1952,7 +1956,8 @@ def test_bigquery_magic_create_dataset_fails(): side_effect=OSError, ) close_transports_patch = mock.patch( - "google.cloud.bigquery.magics.magics._close_transports", autospec=True, + "google.cloud.bigquery.magics.magics._close_transports", + autospec=True, ) with pytest.raises( diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 71ca67616..4b687152f 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -825,7 +825,8 @@ def test_to_api_repr_array_type_as_type_instance(self): } klass = self._get_target_class() param = klass.positional( - array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False], + array_type=ScalarQueryParameterType("BOOLEAN"), + values=[True, False], ) self.assertEqual(param.to_api_repr(), EXPECTED) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 863ef1e5f..6a547cb13 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -98,7 +98,8 @@ def test_to_api_repr(self): policy = PolicyTagList(names=("foo", "bar")) self.assertEqual( - policy.to_api_repr(), {"names": ["foo", "bar"]}, + policy.to_api_repr(), + {"names": ["foo", "bar"]}, ) field = self._make_one( @@ -546,7 +547,11 @@ def test___repr__evaluable_no_policy_tags(self): def test___repr__evaluable_with_policy_tags(self): policy_tags = PolicyTagList(names=["foo", "bar"]) field = self._make_one( - "field1", "STRING", "REQUIRED", "Description", policy_tags=policy_tags, + "field1", + "STRING", + "REQUIRED", + "Description", + policy_tags=policy_tags, ) field_repr = repr(field) SchemaField = self._get_target_class() # needed for eval # noqa @@ -630,10 +635,12 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + resource[1], + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ) def test_w_description(self): @@ -683,7 +690,8 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -796,11 +804,13 @@ def test_from_api_repr(self): def test_to_api_repr(self): taglist = self._make_one(names=["foo", "bar"]) self.assertEqual( - taglist.to_api_repr(), {"names": ["foo", "bar"]}, + taglist.to_api_repr(), + {"names": ["foo", "bar"]}, ) taglist2 = self._make_one(names=("foo", "bar")) self.assertEqual( - taglist2.to_api_repr(), {"names": ["foo", "bar"]}, + taglist2.to_api_repr(), + {"names": ["foo", "bar"]}, ) def test___eq___wrong_type(self): @@ -937,11 +947,22 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9,), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + ), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2,), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + scale=2, + ), ), ( dict(name="n", field_type="BIGNUMERIC"), @@ -949,11 +970,22 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40,), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2,), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + scale=2, + ), ), ( dict(name="n", field_type="STRING"), @@ -961,7 +993,12 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="STRING", max_length=9), - dict(name="n", type="STRING", mode="NULLABLE", maxLength=9,), + dict( + name="n", + type="STRING", + mode="NULLABLE", + maxLength=9, + ), ), ( dict(name="n", field_type="BYTES"), @@ -969,7 +1006,12 @@ def test_from_api_repr_parameterized(api, expect, key2): ), ( dict(name="n", field_type="BYTES", max_length=9), - dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9,), + dict( + name="n", + type="BYTES", + mode="NULLABLE", + maxLength=9, + ), ), ], ) diff --git a/tests/unit/test_standard_sql_types.py b/tests/unit/test_standard_sql_types.py index b91f877cc..0ba0e0cfd 100644 --- a/tests/unit/test_standard_sql_types.py +++ b/tests/unit/test_standard_sql_types.py @@ -96,7 +96,8 @@ def test_to_api_repr_struct_type_w_field_types(self): StandardSqlField( "employee_info", StandardSqlDataType( - type_kind=TypeNames.STRUCT, struct_type=person_type, + type_kind=TypeNames.STRUCT, + struct_type=person_type, ), ), ] @@ -402,7 +403,11 @@ def test__eq__similar_instance(self): @pytest.mark.parametrize( ("attr_name", "value", "value2"), ( - ("name", "foo", "bar",), + ( + "name", + "foo", + "bar", + ), ( "type", bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.INTERVAL), @@ -542,7 +547,8 @@ def test_from_api_repr_with_incomplete_columns(self): assert len(result.columns) == 2 expected = StandardSqlField( - name=None, type=StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), + name=None, + type=StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.BOOL), ) assert result.columns[0] == expected diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3c74259d4..5241230a4 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2744,7 +2744,8 @@ def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_moc for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) tbl = row_iterator.to_arrow( - progress_bar_type=progress_bar_type, create_bqstorage_client=False, + progress_bar_type=progress_bar_type, + create_bqstorage_client=False, ) progress_bar_mock.assert_called() @@ -3093,7 +3094,8 @@ def test_to_dataframe_progress_bar( for progress_bar_type, progress_bar_mock in progress_bars: row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe( - progress_bar_type=progress_bar_type, create_bqstorage_client=False, + progress_bar_type=progress_bar_type, + create_bqstorage_client=False, ) progress_bar_mock.assert_called() @@ -3150,7 +3152,8 @@ def test_to_dataframe_no_tqdm(self): with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe( - progress_bar_type="tqdm", create_bqstorage_client=False, + progress_bar_type="tqdm", + create_bqstorage_client=False, ) user_warnings = [ @@ -3188,7 +3191,8 @@ def test_to_dataframe_tqdm_error(self): with warnings.catch_warnings(record=True) as warned: df = row_iterator.to_dataframe( - progress_bar_type=progress_bar_type, create_bqstorage_client=False, + progress_bar_type=progress_bar_type, + create_bqstorage_client=False, ) self.assertEqual(len(df), 4) # all should be well @@ -3230,9 +3234,9 @@ def test_to_dataframe_w_various_types_nullable(self): ] row_data = [ [None, None, None, None, None, None], - ["1433836800000000", "420", "1.1", u"Cash", "true", "1999-12-01"], - ["1387811700000000", "2580", "17.7", u"Cash", "false", "1953-06-14"], - ["1385565300000000", "2280", "4.4", u"Credit", "true", "1981-11-04"], + ["1433836800000000", "420", "1.1", "Cash", "true", "1999-12-01"], + ["1387811700000000", "2580", "17.7", "Cash", "false", "1953-06-14"], + ["1385565300000000", "2280", "4.4", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3270,17 +3274,17 @@ def test_to_dataframe_column_dtypes(self): SchemaField("date", "DATE"), ] row_data = [ - ["1433836800000000", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"], + ["1433836800000000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"], [ "1387811700000000", "2580", "17.7", "28.5", - u"Cash", + "Cash", "false", "1953-06-14", ], - ["1385565300000000", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"], + ["1385565300000000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -3288,7 +3292,8 @@ def test_to_dataframe_column_dtypes(self): row_iterator = self._make_one(_mock_client(), api_request, path, schema) df = row_iterator.to_dataframe( - dtypes={"km": "float16"}, create_bqstorage_client=False, + dtypes={"km": "float16"}, + create_bqstorage_client=False, ) self.assertIsInstance(df, pandas.DataFrame) @@ -3912,7 +3917,8 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): ) df = row_iterator.to_dataframe( - bqstorage_client=None, create_bqstorage_client=False, + bqstorage_client=None, + create_bqstorage_client=False, ) self.assertIsInstance(df, pandas.DataFrame) @@ -3926,8 +3932,10 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.Forbidden( - "TEST BigQuery Storage API not enabled. TEST" + bqstorage_client.create_read_session.side_effect = ( + google.api_core.exceptions.Forbidden( + "TEST BigQuery Storage API not enabled. TEST" + ) ) path = "/foo" api_request = mock.Mock(return_value={"rows": []}) @@ -4047,7 +4055,8 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): bqstorage_client=bqstorage_client, dtypes={ "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, + categories=["low", "medium", "high"], + ordered=False, ), }, ) @@ -4065,7 +4074,8 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): expected_dtypes = [ pandas.core.dtypes.dtypes.np.dtype("O"), # the default for string data pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, + categories=["low", "medium", "high"], + ordered=False, ), ] self.assertEqual(list(got.dtypes), expected_dtypes) @@ -4090,7 +4100,8 @@ def test_to_dataframe_geography_as_object(self): ), ) df = row_iterator.to_dataframe( - create_bqstorage_client=False, geography_as_object=True, + create_bqstorage_client=False, + geography_as_object=True, ) self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 3) # verify the number of rows @@ -4256,7 +4267,10 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): geography_column = "g" to_dataframe.return_value = pandas.DataFrame( - dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + dict( + name=["foo"], + g=[wkt.loads("point(0 0)")], + ) ) df = row_iterator.to_geodataframe( diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 8e37ed504..943baa326 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -145,7 +145,8 @@ def test_to_dataframe_nullable_scalars_with_custom_dtypes( ] ) arrow_table = pyarrow.Table.from_pydict( - {"int64_col": [1000], "other_int_col": [-7]}, schema=arrow_schema, + {"int64_col": [1000], "other_int_col": [-7]}, + schema=arrow_schema, ) nullable_schema = [ @@ -173,7 +174,8 @@ def test_to_dataframe_arrays(monkeypatch, class_under_test): [pyarrow.field("int64_repeated", pyarrow.list_(pyarrow.int64()))] ) arrow_table = pyarrow.Table.from_pydict( - {"int64_repeated": [[-1, 0, 2]]}, schema=arrow_schema, + {"int64_repeated": [[-1, 0, 2]]}, + schema=arrow_schema, ) nullable_schema = [ From af0ecb021b3718314a1093e891f845512fedd9b7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 29 Mar 2022 18:45:12 +0300 Subject: [PATCH 34/35] docs: Add migration guide from version 2.x to 3.x (#1027) * docs: add migration guide from version 2.x. to 3.x * Add a section on typee annotations * Explain additional requirement of pandas extra * Mention new default type for TZ-aware datetimes * rearrange and add a section * start documenting model properties that have changed * add table of changes for pandas and Model Co-authored-by: Tim Swast --- UPGRADING.md | 182 ++++++++++++++++++++++++++++++++++++++++++++++++- docs/index.rst | 3 +- 2 files changed, 183 insertions(+), 2 deletions(-) diff --git a/UPGRADING.md b/UPGRADING.md index c75c4fddb..95f87f7ee 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -13,8 +13,188 @@ limitations under the License. # 3.0.0 Migration Guide -TODO +## New Required Dependencies +Some of the previously optional dependencies are now *required* in `3.x` versions of the +library, namely +[google-cloud-bigquery-storage](https://pypi.org/project/google-cloud-bigquery-storage/) +(minimum version `2.0.0`) and [pyarrow](https://pypi.org/project/pyarrow/) (minimum +version `3.0.0`). + +The behavior of some of the package "extras" has thus also changed: + * The `pandas` extra now requires the [db-types](https://pypi.org/project/db-dtypes/) + package. + * The `bqstorage` extra has been preserved for comaptibility reasons, but it is now a + no-op and should be omitted when installing the BigQuery client library. + + **Before:** + ``` + $ pip install google-cloud-bigquery[bqstorage] + ``` + + **After:** + ``` + $ pip install google-cloud-bigquery + ``` + + * The `bignumeric_type` extra has been removed, as `BIGNUMERIC` type is now + automatically supported. That extra should thus not be used. + + **Before:** + ``` + $ pip install google-cloud-bigquery[bignumeric_type] + ``` + + **After:** + ``` + $ pip install google-cloud-bigquery + ``` + + +## Type Annotations + +The library is now type-annotated and declares itself as such. If you use a static +type checker such as `mypy`, you might start getting errors in places where +`google-cloud-bigquery` package is used. + +It is recommended to update your code and/or type annotations to fix these errors, but +if this is not feasible in the short term, you can temporarily ignore type annotations +in `google-cloud-bigquery`, for example by using a special `# type: ignore` comment: + +```py +from google.cloud import bigquery # type: ignore +``` + +But again, this is only recommended as a possible short-term workaround if immediately +fixing the type check errors in your project is not feasible. + +## Re-organized Types + +The auto-generated parts of the library has been removed, and proto-based types formerly +found in `google.cloud.bigquery_v2` have been replaced by the new implementation (but +see the [section](#legacy-types) below). + +For example, the standard SQL data types should new be imported from a new location: + +**Before:** +```py +from google.cloud.bigquery_v2 import StandardSqlDataType +from google.cloud.bigquery_v2.types import StandardSqlField +from google.cloud.bigquery_v2.types.standard_sql import StandardSqlStructType +``` + +**After:** +```py +from google.cloud.bigquery import StandardSqlDataType +from google.cloud.bigquery.standard_sql import StandardSqlField +from google.cloud.bigquery.standard_sql import StandardSqlStructType +``` + +The `TypeKind` enum defining all possible SQL types for schema fields has been renamed +and is not nested anymore under `StandardSqlDataType`: + + +**Before:** +```py +from google.cloud.bigquery_v2 import StandardSqlDataType + +if field_type == StandardSqlDataType.TypeKind.STRING: + ... +``` + +**After:** +```py + +from google.cloud.bigquery import StandardSqlTypeNames + +if field_type == StandardSqlTypeNames.STRING: + ... +``` + + +## Issuing queries with `Client.create_job` preserves destination table + +The `Client.create_job` method no longer removes the destination table from a +query job's configuration. Destination table for the query can thus be +explicitly defined by the user. + + +## Changes to data types when reading a pandas DataFrame + +The default dtypes returned by the `to_dataframe` method have changed. + +* Now, the BigQuery `BOOLEAN` data type maps to the pandas `boolean` dtype. + Previously, this mapped to the pandas `bool` dtype when the column did not + contain `NULL` values and the pandas `object` dtype when `NULL` values are + present. +* Now, the BigQuery `INT64` data type maps to the pandas `Int64` dtype. + Previously, this mapped to the pandas `int64` dtype when the column did not + contain `NULL` values and the pandas `float64` dtype when `NULL` values are + present. +* Now, the BigQuery `DATE` data type maps to the pandas `dbdate` dtype, which + is provided by the + [db-dtypes](https://googleapis.dev/python/db-dtypes/latest/index.html) + package. If any date value is outside of the range of + [pandas.Timestamp.min](https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.min.html) + (1677-09-22) and + [pandas.Timestamp.max](https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.max.html) + (2262-04-11), the data type maps to the pandas `object` dtype. The + `date_as_object` parameter has been removed. +* Now, the BigQuery `TIME` data type maps to the pandas `dbtime` dtype, which + is provided by the + [db-dtypes](https://googleapis.dev/python/db-dtypes/latest/index.html) + package. + + +## Changes to data types loading a pandas DataFrame + +In the absence of schema information, pandas columns with naive +`datetime64[ns]` values, i.e. without timezone information, are recognized and +loaded using the `DATETIME` type. On the other hand, for columns with +timezone-aware `datetime64[ns, UTC]` values, the `TIMESTAMP` type is continued +to be used. + +## Changes to `Model`, `Client.get_model`, `Client.update_model`, and `Client.list_models` + +The types of several `Model` properties have been changed. + +- `Model.feature_columns` now returns a sequence of `google.cloud.bigquery.standard_sql.StandardSqlField`. +- `Model.label_columns` now returns a sequence of `google.cloud.bigquery.standard_sql.StandardSqlField`. +- `Model.model_type` now returns a string. +- `Model.training_runs` now returns a sequence of dictionaries, as recieved from the [BigQuery REST API](https://cloud.google.com/bigquery/docs/reference/rest/v2/models#Model.FIELDS.training_runs). + + +## Legacy Protocol Buffers Types + +For compatibility reasons, the legacy proto-based types still exists as static code +and can be imported: + +```py +from google.cloud.bigquery_v2 import Model # a sublcass of proto.Message +``` + +Mind, however, that importing them will issue a warning, because aside from +being importable, these types **are not maintained anymore**. They may differ +both from the types in `google.cloud.bigquery`, and from the types supported on +the backend. + +### Maintaining compatibility with `google-cloud-bigquery` version 2.0 + +If you maintain a library or system that needs to support both +`google-cloud-bigquery` version 2.x and 3.x, it is recommended that you detect +when version 2.x is in use and convert properties that use the legacy protocol +buffer types, such as `Model.training_runs`, into the types used in 3.x. + +Call the [`to_dict` +method](https://proto-plus-python.readthedocs.io/en/latest/reference/message.html#proto.message.Message.to_dict) +on the protocol buffers objects to get a JSON-compatible dictionary. + +```py +from google.cloud.bigquery_v2 import Model + +training_run: Model.TrainingRun = ... +training_run_dict = training_run.to_dict() +``` # 2.0.0 Migration Guide diff --git a/docs/index.rst b/docs/index.rst index 3f8ba2304..4ab0a298d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,7 +30,8 @@ API Reference Migration Guide --------------- -See the guide below for instructions on migrating to the 2.x release of this library. +See the guides below for instructions on migrating from older to newer *major* releases +of this library (from ``1.x`` to ``2.x``, or from ``2.x`` to ``3.x``). .. toctree:: :maxdepth: 2 From f69bae7f3060dfb852ab6373a0e658a792690088 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 29 Mar 2022 15:45:14 +0000 Subject: [PATCH 35/35] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 46d237160..62eb5a77d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: - id: end-of-file-fixer - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 19.10b0 hooks: - id: black - repo: https://gitlab.com/pycqa/flake8