Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.enums import DefaultPandasDTypes
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery import schema as _schema
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
Expand Down Expand Up @@ -411,6 +412,7 @@ class Table(_TableBase):
"max_staleness": "maxStaleness",
"resource_tags": "resourceTags",
"external_catalog_table_options": "externalCatalogTableOptions",
"foreign_type_info": "foreignTypeInfo",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should adjust this to the format used for _helpers._get_sub_prop and _helpers._set_sub_prop. Likewise, let's replace schema with something compatible with that.

Suggested change
"foreign_type_info": "foreignTypeInfo",
"foreign_type_info": ["schema", "foreignTypeInfo"],
# TODO: remove "schema" from above (between time_partitioning and "snapshot_definition"
"schema": ["schema", "fields"],

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Partially complete. Added a new value for schema, but not yet done with foreign_type_info.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Complete.

}

def __init__(self, table_ref, schema=None) -> None:
Expand Down Expand Up @@ -451,6 +453,15 @@ def schema(self):
If ``schema`` is not a sequence, or if any item in the sequence
is not a :class:`~google.cloud.bigquery.schema.SchemaField`
instance or a compatible mapping representation of the field.

NOTE: If you are referencing a schema for an external catalog table such
as a Hive table, it will also be necessary to populate the foreign_type_info
attribute. This is not necessary if defining the schema for a BigQuery table.

For details, see:
https://cloud.google.com/bigquery/docs/external-tables
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we'll need some logic in the setter for schema to avoid overwriting the schema property entirely. Instead, it'll need to be responsible for just schema.fields.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I'm not sure if this format will render well in the docs. We might just move all the contents under NOTE: to after Table's schema.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not yet addressed Tim's comment here.
I spoke with Linchin about the note and with the revision I added, Sphinx should be able to handle the note with no problem.

@chalmerlowe chalmerlowe Feb 20, 2025

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tswast

fields and schema are two separate items:

  • two different attributes (with setters and getters)
  • two separate nodes on the .properties dict

It is possible for a user to supply an api_resource (a dict) that will overwrite both at the same time, such as: ._properties["schema"] = {fields: [], foreign_type_info: {type_system: "hello world"}. At that point, the end user should expect both to be overwritten.

Due to the nested separation of fields and schema in the ._properties dict and how we write content to it (either with setters OR directly into ._properties), I am not aware of any means where by setting one or the other will cause it's opposite to be accidentally overwritten.

I don't think any additional checks are required.

Also, in test_table.py::test_to_api_repr_w_schema_and_foreign_type_info test, it is broken into several steps. Two of those steps are specifically focused on ensuring that if either item is set the other does not change.

"""
prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
if not prop:
Expand Down Expand Up @@ -1075,6 +1086,41 @@ def external_catalog_table_options(
self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
] = value

@property
def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]:
"""Optional. Specifies metadata of the foreign data type definition in
field schema (TableFieldSchema.foreign_type_definition).

Returns:
Optional[schema.ForeignTypeInfo]:
Foreign type information, or :data:`None` if not set.

NOTE: foreign_type_info is only required if you are referencing an
external catalog such as a Hive table.
For details, see:
https://cloud.google.com/bigquery/docs/external-tables
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
"""

prop = self._properties.get(self._PROPERTY_TO_API_FIELD["foreign_type_info"])

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if we are exposing this at the table level, it needs to be fetched from schema, still, right?

This is exactly the sort of thing the _get_sub_prop (

def _get_sub_prop(container, keys, default=None):
) and _set_sub_prop (
def _set_sub_prop(container, keys, value):
) helpers are intended to be used for.

We even use it in other Table properties, such as project:

return _helpers._get_sub_prop(
self._properties, self._PROPERTY_TO_API_FIELD["project"]
)

Suggested change
prop = self._properties.get(self._PROPERTY_TO_API_FIELD["foreign_type_info"])
prop = _helpers._get_sub_prop(self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"])

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Complete.

if prop is not None:
return _schema.ForeignTypeInfo.from_api_repr(prop)
return None

@foreign_type_info.setter
def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]):
value = _helpers._isinstance_or_raise(
value,
(_schema.ForeignTypeInfo, dict),
none_allowed=True,
)
if isinstance(value, _schema.ForeignTypeInfo):
self._properties[
self._PROPERTY_TO_API_FIELD["foreign_type_info"]
] = value.to_api_repr()
else:
self._properties[self._PROPERTY_TO_API_FIELD["foreign_type_info"]] = value

@classmethod
def from_string(cls, full_table_id: str) -> "Table":
"""Construct a table from fully-qualified table ID.
Expand Down
66 changes: 66 additions & 0 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from google.cloud.bigquery import _versions_helpers
from google.cloud.bigquery import exceptions
from google.cloud.bigquery import external_config
from google.cloud.bigquery import schema
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.dataset import DatasetReference

Expand Down Expand Up @@ -5993,6 +5994,71 @@ def test_external_catalog_table_options_from_api_repr(self):
assert result == expected


class TestForeignTypeInfo:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also like to see a test where we do Table.from_api_repr and Table.to_api_repr so that we can visually compare that the correct schema.foreignTypeInfo field of the REST API object is set.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not yet addressed.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

PROJECT = "test-project"
DATASET_ID = "test_dataset"
TABLE_ID = "coffee_table"
DATASET = DatasetReference(PROJECT, DATASET_ID)
TABLEREF = DATASET.table(TABLE_ID)
FOREIGNTYPEINFO = {
"typeSystem": "TYPE_SYSTEM_UNSPECIFIED",
}

from google.cloud.bigquery.schema import ForeignTypeInfo

@staticmethod
def _get_target_class(self):
from google.cloud.bigquery.table import Table

return Table

def _make_one(self, *args, **kw):
return self._get_target_class(self)(*args, **kw)

def test_foreign_type_info_default_initialization(self):
table = self._make_one(self.TABLEREF)
assert table.foreign_type_info is None

def test_foreign_type_info_valid_inputs(self):

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we also add test cases for the setter for other supported types, i.e., dict and None.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not yet addressed.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Complete.
This test is now parametrized so that it tests under three input conditions:

  • dict
  • None
  • using a ForeignTypeInfo object

table = self._make_one(self.TABLEREF)

table.foreign_type_info = self.ForeignTypeInfo(
type_system="TYPE_SYSTEM_UNSPECIFIED",
)

result = table.foreign_type_info.type_system
expected = self.FOREIGNTYPEINFO["typeSystem"]
assert result == expected

def test_foreign_type_info_invalid_inputs(self):
table = self._make_one(self.TABLEREF)

# invalid on the whole
with pytest.raises(TypeError, match="Pass .*"):
table.foreign_type_info = 123

def test_foreign_type_info_to_api_repr(self):
table = self._make_one(self.TABLEREF)

table.foreign_type_info = self.ForeignTypeInfo(
type_system="TYPE_SYSTEM_UNSPECIFIED",
)

result = table.to_api_repr()["foreignTypeInfo"]
expected = self.FOREIGNTYPEINFO
assert result == expected

def test_foreign_type_info_from_api_repr(self):
table = self._make_one(self.TABLEREF)
table.foreign_type_info = self.FOREIGNTYPEINFO

fti = schema.ForeignTypeInfo.from_api_repr(self.FOREIGNTYPEINFO)

result = fti.to_api_repr()
expected = self.FOREIGNTYPEINFO
assert result == expected


@pytest.mark.parametrize(
"table_path",
(
Expand Down