Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ def dataframe_to_json_generator(dataframe):
output = {}
for column, value in zip(dataframe.columns, row):
# Omit NaN values.
if value != value:
if pandas.isna(value):
continue
output[column] = value
yield output
41 changes: 41 additions & 0 deletions tests/unit/test__pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import operator
import queue
import warnings
import pkg_resources

import mock

Expand Down Expand Up @@ -47,6 +48,14 @@
except ImportError: # pragma: NO COVER
bigquery_storage = None

PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")

if pandas is not None:
PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
else:
# Set to less than MIN version.
PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0")


skip_if_no_bignumeric = pytest.mark.skipif(
not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0",
Expand Down Expand Up @@ -734,6 +743,38 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name(
assert columns_and_indexes == expected


@pytest.mark.skipIf(
pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat to also include the reason why a minimum version is needed. 👍

)
def test_dataframe_to_json_generator(module_under_test):
utcnow = datetime.datetime.utcnow()
df_data = collections.OrderedDict(
[
("a_series", [pandas.NA, 2, 3, 4]),
("b_series", [0.1, float("NaN"), 0.3, 0.4]),
("c_series", ["a", "b", pandas.NA, "d"]),
("d_series", [utcnow, utcnow, utcnow, pandas.NaT]),
("e_series", [True, False, True, None]),
]
)
dataframe = pandas.DataFrame(
df_data, index=pandas.Index([4, 5, 6, 7], name="a_index")
)

dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()})

rows = module_under_test.dataframe_to_json_generator(dataframe)
expected = [
{"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True},
{"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False},
{"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True},
{"a_series": 4, "b_series": 0.4, "c_series": "d"},
]
for row, expect in zip(rows, expected):
assert row == expect


@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_list_columns_and_indexes_with_named_index(module_under_test):
df_data = collections.OrderedDict(
Expand Down