Skip to content

Commit

Permalink
Allow merge to operate on tables with different columns. Closes #465. C…
Browse files Browse the repository at this point in the history
…loses #467.
  • Loading branch information
onyxfish committed Feb 5, 2016
1 parent be80a10 commit 039c728
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 18 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
1.2.2
-----

* :meth:`.Table.merge` can now merge tables with different columns or columns in a different order. (#465)
* :meth:`.MappedSequence.get` will no longer raise ``KeyError`` if a default is not provided. (#467)
* :class:`.Number` can now test/cast the ``long`` type on Python 2.

1.2.1 - February 5, 2016
Expand Down
4 changes: 2 additions & 2 deletions agate/mapped_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ def get(self, key, default=None):
except KeyError:
if default:
return default

raise
else:
return None

@memoize
def dict(self):
Expand Down
30 changes: 23 additions & 7 deletions agate/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from agate.aggregations import Min, Max
from agate.columns import Column
from agate.data_types import TypeTester, DataType, Text, Number
from agate.exceptions import DataTypeError
from agate.mapped_sequence import MappedSequence
from agate.preview import print_table, print_html, print_bars, print_structure
from agate.rows import Row
Expand Down Expand Up @@ -943,21 +944,36 @@ def merge(cls, tables, row_names=None):
:returns:
A new :class:`Table`.
"""
column_names = tables[0].column_names
column_types = tables[0].column_types
new_columns = OrderedDict()

for table in tables[1:]:
if any(not isinstance(a, type(b)) for a, b in zip_longest(table.column_types, column_types)):
raise ValueError('Only tables with identical column types may be merged.')
for table in tables:
for i in range(0, len(table.columns)):
column_name = table.column_names[i]
column_type = table.column_types[i]

if column_name in new_columns:
if type(column_type) != type(new_columns[column_name]):
raise DataTypeError('Tables contain columns with the same names, but different types.')
else:
new_columns[column_name] = column_type

column_names = new_columns.keys()
column_types = new_columns.values()

rows = []

for table in tables:
if table.column_names == column_names:
# Performance optimization for identical table structures
if table.column_names == column_names and table.column_types == column_types:
rows.extend(table.rows)
else:
for row in table.rows:
rows.append(Row(row.values(), column_names))
data = []

for column_name in column_names:
data.append(row.get(column_name, None))

rows.append(Row(data, column_names))

return Table(rows, column_names, column_types, row_names=row_names, _is_fork=True)

Expand Down
4 changes: 1 addition & 3 deletions tests/test_mapped_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,8 @@ def test_items(self):
def test_get(self):
self.assertEqual(self.row.get('one'), 'a')

with self.assertRaises(KeyError):
self.row.get('four')

def test_get_default(self):
self.assertEqual(self.row.get('four'), None)
self.assertEqual(self.row.get('four', 'foo'), 'foo')

def test_dict(self):
Expand Down
31 changes: 25 additions & 6 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1662,12 +1662,31 @@ def test_merge_different_names(self):

self.assertIsNot(table_c, table_a)
self.assertIsNot(table_c, table_b)
self.assertColumnNames(table_c, self.column_names)
self.assertColumnTypes(table_c, [Number, Number, Text])
self.assertRows(table_c, self.rows + self.rows)
self.assertColumnNames(table_c, self.column_names + column_names)
self.assertColumnTypes(table_c, [Number, Number, Text, Number, Number, Text])
self.assertSequenceEqual(table_c.rows[0], [1, 4, 'a', None, None, None])
self.assertSequenceEqual(table_c.rows[3], [None, None, None, 1, 4, 'a'])

for row in table_c.rows:
self.assertSequenceEqual(row.keys(), self.column_names + column_names)

def test_merge_mixed_names(self):
table_a = Table(self.rows, self.column_names, self.column_types)

column_names = ['two', 'one', 'four']

table_b = Table(self.rows, column_names, self.column_types)
table_c = Table.merge([table_a, table_b])

self.assertIsNot(table_c, table_a)
self.assertIsNot(table_c, table_b)
self.assertColumnNames(table_c, ['one', 'two', 'three', 'four'])
self.assertColumnTypes(table_c, [Number, Number, Text, Text])
self.assertSequenceEqual(table_c.rows[0], [1, 4, 'a', None])
self.assertSequenceEqual(table_c.rows[3], [4, 1, None, 'a'])

for row in table_c.rows:
self.assertSequenceEqual(row.keys(), self.column_names)
self.assertSequenceEqual(row.keys(), ['one', 'two', 'three', 'four'])

def test_merge_different_types(self):
table_a = Table(self.rows, self.column_names, self.column_types)
Expand All @@ -1676,8 +1695,8 @@ def test_merge_different_types(self):

table_b = Table(self.rows, self.column_names, column_types)

with self.assertRaises(ValueError):
table_c = Table.merge([table_a, table_b]) # noqa
with self.assertRaises(DataTypeError):
table_c = Table.merge([table_a, table_b])

def test_merge_with_row_names(self):
table_a = Table(self.rows, self.column_names, self.column_types, row_names='three')
Expand Down

0 comments on commit 039c728

Please sign in to comment.