|
| 1 | +""" |
| 2 | +Collection of Container classes for interacting with aligned and hierarchical dynamic tables |
| 3 | +""" |
| 4 | +from collections import OrderedDict |
| 5 | + |
| 6 | +import numpy as np |
| 7 | +import pandas as pd |
| 8 | + |
| 9 | +from . import register_class |
| 10 | +from .table import DynamicTable |
| 11 | +from ..utils import docval, getargs, call_docval_func, popargs, get_docval |
| 12 | + |
| 13 | + |
| 14 | +@register_class('AlignedDynamicTable') |
| 15 | +class AlignedDynamicTable(DynamicTable): |
| 16 | + """ |
| 17 | + DynamicTable container that supports storing a collection of subtables. Each sub-table is a |
| 18 | + DynamicTable itself that is aligned with the main table by row index. I.e., all |
| 19 | + DynamicTables stored in this group MUST have the same number of rows. This type effectively |
| 20 | + defines a 2-level table in which the main data is stored in the main table implemented by this type |
| 21 | + and additional columns of the table are grouped into categories, with each category being' |
| 22 | + represented by a separate DynamicTable stored within the group. |
| 23 | + """ |
| 24 | + __fields__ = ({'name': 'category_tables', 'child': True}, ) |
| 25 | + |
| 26 | + @docval(*get_docval(DynamicTable.__init__), |
| 27 | + {'name': 'category_tables', 'type': list, |
| 28 | + 'doc': 'List of DynamicTables to be added to the container', 'default': None}, |
| 29 | + {'name': 'categories', 'type': 'array_data', |
| 30 | + 'doc': 'List of names with the ordering of category tables', 'default': None}) |
| 31 | + def __init__(self, **kwargs): |
| 32 | + in_category_tables = popargs('category_tables', kwargs) |
| 33 | + in_categories = popargs('categories', kwargs) |
| 34 | + if in_categories is None and in_category_tables is not None: |
| 35 | + in_categories = [tab.name for tab in in_category_tables] |
| 36 | + if in_categories is not None and in_category_tables is None: |
| 37 | + raise ValueError("Categories provided but no category_tables given") |
| 38 | + # at this point both in_categories and in_category_tables should either both be None or both be a list |
| 39 | + if in_categories is not None: |
| 40 | + if len(in_categories) != len(in_category_tables): |
| 41 | + raise ValueError("%s category_tables given but %s categories specified" % |
| 42 | + (len(in_category_tables), len(in_categories))) |
| 43 | + # Initialize the main dynamic table |
| 44 | + call_docval_func(super().__init__, kwargs) |
| 45 | + # Create and set all sub-categories |
| 46 | + dts = OrderedDict() |
| 47 | + # Add the custom categories given as inputs |
| 48 | + if in_category_tables is not None: |
| 49 | + # We may need to resize our main table when adding categories as the user may not have set ids |
| 50 | + if len(in_category_tables) > 0: |
| 51 | + # We have categories to process |
| 52 | + if len(self.id) == 0: |
| 53 | + # The user did not initialize our main table id's nor set columns for our main table |
| 54 | + for i in range(len(in_category_tables[0])): |
| 55 | + self.id.append(i) |
| 56 | + # Add the user-provided categories in the correct order as described by the categories |
| 57 | + # This is necessary, because we do not store the categories explicitly but we maintain them |
| 58 | + # as the order of our self.category_tables. In this makes sure look-ups are consistent. |
| 59 | + lookup_index = OrderedDict([(k, -1) for k in in_categories]) |
| 60 | + for i, v in enumerate(in_category_tables): |
| 61 | + # Error check that the name of the table is in our categories list |
| 62 | + if v.name not in lookup_index: |
| 63 | + raise ValueError("DynamicTable %s does not appear in categories %s" % (v.name, str(in_categories))) |
| 64 | + # Error check to make sure no two tables with the same name are given |
| 65 | + if lookup_index[v.name] >= 0: |
| 66 | + raise ValueError("Duplicate table name %s found in input dynamic_tables" % v.name) |
| 67 | + lookup_index[v.name] = i |
| 68 | + for table_name, tabel_index in lookup_index.items(): |
| 69 | + # This error case should not be able to occur since the length of the in_categories and |
| 70 | + # in_category_tables must match and we made sure that each DynamicTable we added had its |
| 71 | + # name in the in_categories list. We, therefore, exclude this check from coverage testing |
| 72 | + # but we leave it in just as a backup trigger in case something unexpected happens |
| 73 | + if tabel_index < 0: # pragma: no cover |
| 74 | + raise ValueError("DynamicTable %s listed in categories but does not appear in category_tables" % |
| 75 | + table_name) # pragma: no cover |
| 76 | + # Test that all category tables have the correct number of rows |
| 77 | + category = in_category_tables[tabel_index] |
| 78 | + if len(category) != len(self): |
| 79 | + raise ValueError('Category DynamicTable %s does not align, it has %i rows expected %i' % |
| 80 | + (category.name, len(category), len(self))) |
| 81 | + # Add the category table to our category_tables. |
| 82 | + dts[category.name] = category |
| 83 | + # Set the self.category_tables attribute, which will set the parent/child relationships for the category_tables |
| 84 | + self.category_tables = dts |
| 85 | + |
| 86 | + def __contains__(self, val): |
| 87 | + """ |
| 88 | + Check if the given value (i.e., column) exists in this table |
| 89 | +
|
| 90 | + :param val: If val is a string then check if the given category exists. If val is a tuple |
| 91 | + of two strings (category, colname) then check for the given category if the given colname exists. |
| 92 | + """ |
| 93 | + if isinstance(val, str): |
| 94 | + return val in self.category_tables or val in self.colnames |
| 95 | + elif isinstance(val, tuple): |
| 96 | + if len(val) != 2: |
| 97 | + raise ValueError("Expected tuple of strings of length 2 got tuple of length %i" % len(val)) |
| 98 | + return val[1] in self.get_category(val[0]) |
| 99 | + else: |
| 100 | + return False |
| 101 | + |
| 102 | + @property |
| 103 | + def categories(self): |
| 104 | + """ |
| 105 | + Get the list of names the categories |
| 106 | +
|
| 107 | + Short-hand for list(self.category_tables.keys()) |
| 108 | +
|
| 109 | + :raises: KeyError if the given name is not in self.category_tables |
| 110 | + """ |
| 111 | + return list(self.category_tables.keys()) |
| 112 | + |
| 113 | + @docval({'name': 'category', 'type': DynamicTable, 'doc': 'Add a new DynamicTable category'},) |
| 114 | + def add_category(self, **kwargs): |
| 115 | + """ |
| 116 | + Add a new DynamicTable to the AlignedDynamicTable to create a new category in the table. |
| 117 | +
|
| 118 | + NOTE: The table must align with (i.e, have the same number of rows as) the main data table (and |
| 119 | + other category tables). I.e., if the AlignedDynamicTable is already populated with data |
| 120 | + then we have to populate the new category with the corresponding data before adding it. |
| 121 | +
|
| 122 | + :raises: ValueError is raised if the input table does not have the same number of rows as the main table |
| 123 | + """ |
| 124 | + category = getargs('category', kwargs) |
| 125 | + if len(category) != len(self): |
| 126 | + raise ValueError('New category DynamicTable does not align, it has %i rows expected %i' % |
| 127 | + (len(category), len(self))) |
| 128 | + if category.name in self.category_tables: |
| 129 | + raise ValueError("Category %s already in the table" % category.name) |
| 130 | + self.category_tables[category.name] = category |
| 131 | + category.parent = self |
| 132 | + |
| 133 | + @docval({'name': 'name', 'type': str, 'doc': 'Name of the category we want to retrieve', 'default': None}) |
| 134 | + def get_category(self, **kwargs): |
| 135 | + name = popargs('name', kwargs) |
| 136 | + if name is None or (name not in self.category_tables and name == self.name): |
| 137 | + return self |
| 138 | + else: |
| 139 | + return self.category_tables[name] |
| 140 | + |
| 141 | + @docval(*get_docval(DynamicTable.add_column), |
| 142 | + {'name': 'category', 'type': str, 'doc': 'The category the column should be added to', |
| 143 | + 'default': None}) |
| 144 | + def add_column(self, **kwargs): |
| 145 | + """ |
| 146 | + Add a column to the table |
| 147 | +
|
| 148 | + :raises: KeyError if the category does not exist |
| 149 | +
|
| 150 | + """ |
| 151 | + category_name = popargs('category', kwargs) |
| 152 | + if category_name is None: |
| 153 | + # Add the column to our main table |
| 154 | + call_docval_func(super().add_column, kwargs) |
| 155 | + else: |
| 156 | + # Add the column to a sub-category table |
| 157 | + try: |
| 158 | + category = self.get_category(category_name) |
| 159 | + except KeyError: |
| 160 | + raise KeyError("Category %s not in table" % category_name) |
| 161 | + category.add_column(**kwargs) |
| 162 | + |
| 163 | + @docval({'name': 'data', 'type': dict, 'doc': 'the data to put in this row', 'default': None}, |
| 164 | + {'name': 'id', 'type': int, 'doc': 'the ID for the row', 'default': None}, |
| 165 | + {'name': 'enforce_unique_id', 'type': bool, 'doc': 'enforce that the id in the table must be unique', |
| 166 | + 'default': False}, |
| 167 | + allow_extra=True) |
| 168 | + def add_row(self, **kwargs): |
| 169 | + """ |
| 170 | + We can either provide the row data as a single dict or by specifying a dict for each category |
| 171 | + """ |
| 172 | + data, row_id, enforce_unique_id = popargs('data', 'id', 'enforce_unique_id', kwargs) |
| 173 | + data = data if data is not None else kwargs |
| 174 | + |
| 175 | + # extract the category data |
| 176 | + category_data = {k: data.pop(k) for k in self.categories if k in data} |
| 177 | + |
| 178 | + # Check that we have the approbriate categories provided |
| 179 | + missing_categories = set(self.categories) - set(list(category_data.keys())) |
| 180 | + if missing_categories: |
| 181 | + raise KeyError( |
| 182 | + '\n'.join([ |
| 183 | + 'row data keys don\'t match available categories', |
| 184 | + 'missing {} category keys: {}'.format(len(missing_categories), missing_categories) |
| 185 | + ]) |
| 186 | + ) |
| 187 | + # Add the data to our main dynamic table |
| 188 | + data['id'] = row_id |
| 189 | + data['enforce_unique_id'] = enforce_unique_id |
| 190 | + call_docval_func(super().add_row, data) |
| 191 | + |
| 192 | + # Add the data to all out dynamic table categories |
| 193 | + for category, values in category_data.items(): |
| 194 | + self.category_tables[category].add_row(**values) |
| 195 | + |
| 196 | + @docval({'name': 'ignore_category_ids', 'type': bool, |
| 197 | + 'doc': "Ignore id columns of sub-category tables", 'default': False}) |
| 198 | + def to_dataframe(self, **kwargs): |
| 199 | + """Convert the collection of tables to a single pandas DataFrame""" |
| 200 | + dfs = [super().to_dataframe().reset_index(), ] |
| 201 | + if getargs('ignore_category_ids', kwargs): |
| 202 | + dfs += [category.to_dataframe() for category in self.category_tables.values()] |
| 203 | + else: |
| 204 | + dfs += [category.to_dataframe().reset_index() for category in self.category_tables.values()] |
| 205 | + names = [self.name, ] + list(self.category_tables.keys()) |
| 206 | + res = pd.concat(dfs, axis=1, keys=names) |
| 207 | + res.set_index((self.name, 'id'), drop=True, inplace=True) |
| 208 | + return res |
| 209 | + |
| 210 | + def __getitem__(self, item): |
| 211 | + """ |
| 212 | + If item is: |
| 213 | + * int : Return a single row of the table |
| 214 | + * string : Return a single category of the table |
| 215 | + * tuple: Get a column, row, or cell from a particular category. The tuple is expected to consist |
| 216 | + of (category, selection) where category may be a string with the name of the sub-category |
| 217 | + or None (or the name of this AlignedDynamicTable) if we want to slice into the main table. |
| 218 | +
|
| 219 | + :returns: DataFrame when retrieving a row or category. Returns scalar when selecting a cell. |
| 220 | + Returns a VectorData/VectorIndex when retrieving a single column. |
| 221 | + """ |
| 222 | + if isinstance(item, (int, list, np.ndarray, slice)): |
| 223 | + # get a single full row from all tables |
| 224 | + dfs = ([super().__getitem__(item).reset_index(), ] + |
| 225 | + [category[item].reset_index() for category in self.category_tables.values()]) |
| 226 | + names = [self.name, ] + list(self.category_tables.keys()) |
| 227 | + res = pd.concat(dfs, axis=1, keys=names) |
| 228 | + res.set_index((self.name, 'id'), drop=True, inplace=True) |
| 229 | + return res |
| 230 | + elif isinstance(item, str) or item is None: |
| 231 | + if item in self.colnames: |
| 232 | + # get a specfic column |
| 233 | + return super().__getitem__(item) |
| 234 | + else: |
| 235 | + # get a single category |
| 236 | + return self.get_category(item).to_dataframe() |
| 237 | + elif isinstance(item, tuple): |
| 238 | + if len(item) == 2: |
| 239 | + return self.get_category(item[0])[item[1]] |
| 240 | + elif len(item) == 3: |
| 241 | + return self.get_category(item[0])[item[1]][item[2]] |
| 242 | + else: |
| 243 | + raise ValueError("Expected tuple of length 2 or 3 with (category, column, row) as value.") |
0 commit comments