Skip to content

Commit

Permalink
Correctly model JSON repr of complex nested records.
Browse files Browse the repository at this point in the history
  • Loading branch information
tseaver committed Nov 30, 2016
1 parent 2ad06a8 commit 65bf6ea
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 12 deletions.
9 changes: 5 additions & 4 deletions bigquery/google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ def _record_from_json(value, field):
"""Coerce 'value' to a mapping, if set or not nullable."""
if _not_null(value, field):
record = {}
for subfield, cell in zip(field.fields, value['f']):
record_iter = zip(field.fields, value['f'])
for subfield, cell in record_iter:
converter = _CELLDATA_FROM_JSON[subfield.field_type]
if field.mode == 'REPEATED':
value = [converter(item, subfield) for item in cell['v']]
if subfield.mode == 'REPEATED':
value = [converter(item['v'], subfield) for item in cell['v']]
else:
value = converter(cell['v'], subfield)
record[subfield.name] = value
Expand Down Expand Up @@ -103,7 +104,7 @@ def _row_from_json(row, schema):
for field, cell in zip(schema, row['f']):
converter = _CELLDATA_FROM_JSON[field.field_type]
if field.mode == 'REPEATED':
row_data.append([converter(item, field)
row_data.append([converter(item['v'], field)
for item in cell['v']])
else:
row_data.append(converter(cell['v'], field))
Expand Down
97 changes: 94 additions & 3 deletions bigquery/unit_tests/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_w_scalar_subfield(self):
def test_w_repeated_subfield(self):
subfield = _Field('REPEATED', 'color', 'STRING')
field = _Field('REQUIRED', fields=[subfield])
value = {'f': [{'v': ['red', 'yellow', 'blue']}]}
value = {'f': [{'v': [{'v': 'red'}, {'v': 'yellow'}, {'v': 'blue'}]}]}
coerced = self._call_fut(value, field)
self.assertEqual(coerced, {'color': ['red', 'yellow', 'blue']})

Expand Down Expand Up @@ -234,6 +234,97 @@ def test_w_string_value(self):
self.assertEqual(coerced, 'Wonderful!')


class Test_row_from_json(unittest.TestCase):

def _call_fut(self, row, schema):
from google.cloud.bigquery._helpers import _row_from_json
return _row_from_json(row, schema)

def test_w_single_scalar_column(self):
# SELECT 1 AS col
col = _Field('REQUIRED', 'col', 'INTEGER')
row = {u'f': [{u'v': u'1'}]}
self.assertEqual(self._call_fut(row, schema=[col]), (1,))

def test_w_single_struct_column(self):
# SELECT (1, 2) AS col
sub_1 = _Field('REQUIRED', 'sub_1', 'INTEGER')
sub_2 = _Field('REQUIRED', 'sub_2', 'INTEGER')
col = _Field('REQUIRED', 'col', 'RECORD', fields=[sub_1, sub_2])
row = {u'f': [{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}]}}]}
self.assertEqual(self._call_fut(row, schema=[col]),
({'sub_1': 1, 'sub_2': 2},))

def test_w_single_array_column(self):
# SELECT [1, 2, 3] as col
col = _Field('REPEATED', 'col', 'INTEGER')
row = {u'f': [{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}]}
self.assertEqual(self._call_fut(row, schema=[col]),
([1, 2, 3],))

def test_w_struct_w_nested_array_column(self):
# SELECT ([1, 2], 3, [4, 5]) as col
first = _Field('REPEATED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
third = _Field('REPEATED', 'third', 'INTEGER')
col = _Field('REQUIRED', 'col', 'RECORD',
fields=[first, second, third])
row = {
u'f': [
{u'v': {
u'f': [
{u'v': [{u'v': u'1'}, {u'v': u'2'}]},
{u'v': u'3'},
{u'v': [{u'v': u'4'}, {u'v': u'5'}]}
]
}},
]
}
self.assertEqual(
self._call_fut(row, schema=[col]),
({u'first': [1, 2], u'second': 3, u'third': [4, 5]},))

def test_w_array_of_struct(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field('REQUIRED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
third = _Field('REQUIRED', 'third', 'INTEGER')
col = _Field('REPEATED', 'col', 'RECORD',
fields=[first, second, third])
row = {u'f': [{u'v': [
{u'v': {u'f': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]}},
{u'v': {u'f': [{u'v': u'4'}, {u'v': u'5'}, {u'v': u'6'}]}},
]}]}
self.assertEqual(
self._call_fut(row, schema=[col]),
([
{u'first': 1, u'second': 2, u'third': 3},
{u'first': 4, u'second': 5, u'third': 6},
],))

def test_w_array_of_struct_w_array(self):
# SELECT [([1, 2, 3], 4), ([5, 6], 7)]
first = _Field('REPEATED', 'first', 'INTEGER')
second = _Field('REQUIRED', 'second', 'INTEGER')
col = _Field('REPEATED', 'col', 'RECORD', fields=[first, second])
row = {u'f': [{u'v': [
{u'v': {u'f': [
{u'v': [{u'v': u'1'}, {u'v': u'2'}, {u'v': u'3'}]},
{u'v': u'4'}
]}},
{u'v': {u'f': [
{u'v': [{u'v': u'5'}, {u'v': u'6'}]},
{u'v': u'7'}
]}}
]}]}
self.assertEqual(
self._call_fut(row, schema=[col]),
([
{u'first': [1, 2, 3], u'second': 4},
{u'first': [5, 6], u'second': 7},
],))


class Test_rows_from_json(unittest.TestCase):

def _call_fut(self, value, field):
Expand All @@ -253,12 +344,12 @@ def test_w_record_subfield(self):
{'f': [
{'v': 'Phred Phlyntstone'},
{'v': {'f': [{'v': '800'}, {'v': '555-1212'}, {'v': 1}]}},
{'v': ['orange', 'black']},
{'v': [{'v': 'orange'}, {'v': 'black'}]},
]},
{'f': [
{'v': 'Bharney Rhubble'},
{'v': {'f': [{'v': '877'}, {'v': '768-5309'}, {'v': 2}]}},
{'v': ['brown']},
{'v': [{'v': 'brown'}]},
]},
{'f': [
{'v': 'Wylma Phlyntstone'},
Expand Down
15 changes: 10 additions & 5 deletions bigquery/unit_tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,22 +1173,27 @@ def test_fetch_data_w_repeated_fields(self):
'pageToken': TOKEN,
'rows': [
{'f': [
{'v': ['red', 'green']},
{'v': [{'f': [{'v': ['1', '2']},
{'v': ['3.1415', '1.414']}]}]},
{'v': [{'v': 'red'}, {'v': 'green'}]},
{'v': [{
'v': {
'f': [
{'v': [{'v': '1'}, {'v': '2'}]},
{'v': [{'v': '3.1415'}, {'v': '1.414'}]},
]}
}]},
]},
]
}
conn = _Connection(DATA)
client = _Client(project=self.PROJECT, connection=conn)
dataset = _Dataset(client)
full_name = SchemaField('color', 'STRING', mode='REPEATED')
color = SchemaField('color', 'STRING', mode='REPEATED')
index = SchemaField('index', 'INTEGER', 'REPEATED')
score = SchemaField('score', 'FLOAT', 'REPEATED')
struct = SchemaField('struct', 'RECORD', mode='REPEATED',
fields=[index, score])
table = self._make_one(self.TABLE_NAME, dataset=dataset,
schema=[full_name, struct])
schema=[color, struct])

iterator = table.fetch_data()
page = six.next(iterator.pages)
Expand Down

0 comments on commit 65bf6ea

Please sign in to comment.