Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9b57878
test: upload DATE column with various dtypes
tswast Nov 10, 2021
907a1d5
add dbdate tests
tswast Nov 10, 2021
cf2a5bf
test with db-dtypes only with newer pandas
tswast Nov 11, 2021
1ea8fda
🦉 Updates from OwlBot
gcf-owl-bot[bot] Nov 11, 2021
b869a9f
sort by row number
tswast Nov 11, 2021
4d304f5
Merge branch 'issue362-to_gbq-date' of github.com:googleapis/python-b…
tswast Nov 11, 2021
b876cc7
Merge branch 'main' into issue362-to_gbq-date
tswast Nov 11, 2021
625df5a
fix: allow strings when writing to DATE and floats when writing to NU…
tswast Nov 11, 2021
bd3604e
🦉 Updates from OwlBot
gcf-owl-bot[bot] Nov 11, 2021
19df618
require db-dtypes
tswast Nov 11, 2021
e0c0a1f
Merge remote-tracking branch 'upstream/issue421-numeric' into issue42…
tswast Nov 11, 2021
ab85d8a
add unit tests for dataframe conversion
tswast Nov 11, 2021
a08f90d
fix unit tests
tswast Nov 11, 2021
4426957
🦉 Updates from OwlBot
gcf-owl-bot[bot] Nov 11, 2021
15d7b51
remove 'default' from system tests. redundant with load_parquet
tswast Nov 11, 2021
5ded1e8
Merge branch 'main' into issue421-numeric
tswast Nov 16, 2021
4ddcf9d
🦉 Updates from OwlBot
gcf-owl-bot[bot] Nov 16, 2021
95f051f
correct repeated_col name
tswast Nov 17, 2021
f8dddb2
used namedtuple in tests
tswast Nov 17, 2021
7563017
protect against explicit None
tswast Nov 17, 2021
92837e6
update tests since pandas 0.24+ is required
tswast Nov 17, 2021
f981705
update conda test pandas version
tswast Nov 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 111 additions & 1 deletion tests/unit/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@

# -*- coding: utf-8 -*-

import textwrap
import datetime
import decimal
from io import StringIO
import textwrap
from unittest import mock

import db_dtypes
import numpy
import pandas
import pandas.testing
import pytest

from pandas_gbq.features import FEATURES
Expand Down Expand Up @@ -137,3 +141,109 @@ def test_load_chunks_omits_policy_tags(
def test_load_chunks_with_invalid_api_method():
with pytest.raises(ValueError, match="Got unexpected api_method:"):
load.load_chunks(None, None, None, api_method="not_a_thing")


@pytest.mark.parametrize(
("numeric_type",),
(
("NUMERIC",),
("DECIMAL",),
("BIGNUMERIC",),
("BIGDECIMAL",),
("numeric",),
("decimal",),
("bignumeric",),
("bigdecimal",),
),
)
def test_cast_dataframe_for_parquet_w_float_numeric(numeric_type):
dataframe = pandas.DataFrame(
{
"row_num": [0, 1, 2],
"num_col": pandas.Series(
# Very much not recommend as the whole point of NUMERIC is to
# be more accurate than a floating point number, but tested to
# keep compatibility with CSV-based uploads. See:
# https://github.com/googleapis/python-bigquery-pandas/issues/421
[1.25, -1.25, 42.5],
dtype="float64",
),
"row_num_2": [0, 1, 2],
},
# Use multiple columns to ensure column order is maintained.
columns=["row_num", "num_col", "row_num_2"],
)
schema = {
"fields": [
{"name": "num_col", "type": numeric_type},
{"name": "not_in_df", "type": "IGNORED"},
]
}
result = load.cast_dataframe_for_parquet(dataframe, schema)
expected = pandas.DataFrame(
{
"row_num": [0, 1, 2],
"num_col": pandas.Series(
[decimal.Decimal(1.25), decimal.Decimal(-1.25), decimal.Decimal(42.5)],
dtype="object",
),
"row_num_2": [0, 1, 2],
},
columns=["row_num", "num_col", "row_num_2"],
)
pandas.testing.assert_frame_equal(result, expected)


def test_cast_dataframe_for_parquet_w_string_date():
dataframe = pandas.DataFrame(
{
"row_num": [0, 1, 2],
"date_col": pandas.Series(
["2021-04-17", "1999-12-31", "2038-01-19"], dtype="object",
),
"row_num_2": [0, 1, 2],
},
# Use multiple columns to ensure column order is maintained.
columns=["row_num", "date_col", "row_num_2"],
)
schema = {
"fields": [
{"name": "date_col", "type": "DATE"},
{"name": "not_in_df", "type": "IGNORED"},
]
}
result = load.cast_dataframe_for_parquet(dataframe, schema)
expected = pandas.DataFrame(
{
"row_num": [0, 1, 2],
"date_col": pandas.Series(
["2021-04-17", "1999-12-31", "2038-01-19"], dtype=db_dtypes.DateDtype(),
),
"row_num_2": [0, 1, 2],
},
columns=["row_num", "date_col", "row_num_2"],
)
pandas.testing.assert_frame_equal(result, expected)


def test_cast_dataframe_for_parquet_ignores_repeated_fields():
dataframe = pandas.DataFrame(
{
"row_num": [0, 1, 2],
"repeated_col": pandas.Series(
[
[datetime.date(2021, 4, 17)],
[datetime.date(199, 12, 31)],
[datetime.date(2038, 1, 19)],
],
dtype="object",
),
"row_num_2": [0, 1, 2],
},
# Use multiple columns to ensure column order is maintained.
columns=["row_num", "repeated_col", "row_num_2"],
)
expected = dataframe.copy()
schema = {"fields": [{"name": "date_col", "type": "DATE", "mode": "REPEATED"}]}
result = load.cast_dataframe_for_parquet(dataframe, schema)
pandas.testing.assert_frame_equal(result, expected)