Skip to content

Commit

Permalink
Merge pull request #225 from data-8/fewerlists
Browse files Browse the repository at this point in the history
Reduce use of lists
  • Loading branch information
papajohn authored Jun 28, 2016
2 parents d5ccaf6 + b0e92c7 commit be6de5b
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 11 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ This project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased]
None yet.

## v0.6.0
### Changed
- Added `make_array` to make arrays without lists. (#224)
- `Table.select`, `drop`, and `with_columns` now accept variable arguments in addition to lists. (#224)

## v0.5.3
### Changed
- Allow charting methods to select particular columns and default to
Expand Down
91 changes: 83 additions & 8 deletions datascience/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,13 +527,15 @@ def copy(self, *, shallow=False):
self._add_column_and_format(table, label, column)
return table

def select(self, column_label_or_labels):
def select(self, *column_label_or_labels):
"""Return a Table with selected column or columns by label or index.
Args:
``column_label_or_labels`` (string or list of strings): The header
names or indices of the columns to be selected. ``column_label_or_labels`` must
be an existing header name, or a valid column index.
``column_label_or_labels`` (string, list of strings, or several
separate argument strings): The header names or indices of the
columns to be selected. ``column_label_or_labels`` must
be an existing header name, or a valid column index, or a list
thereof.
Returns:
An instance of ``Table`` containing only selected columns.
Expand All @@ -557,6 +559,11 @@ def select(self, column_label_or_labels):
6
5
5
>>> t.select('burgers', 'calories')
burgers | calories
cheeseburger | 743
hamburger | 651
veggie burger | 582
>>> t.select(1)
prices
6
Expand All @@ -567,8 +574,13 @@ def select(self, column_label_or_labels):
743 | cheeseburger
651 | hamburger
582 | veggie burger
>>> t.select(2, 0)
calories | burgers
743 | cheeseburger
651 | hamburger
582 | veggie burger
"""
labels = self._as_labels(column_label_or_labels)
labels = self._varargs_as_labels(column_label_or_labels)
table = Table()
for label in labels:
self._add_column_and_format(table, label, np.copy(self[label]))
Expand All @@ -583,7 +595,7 @@ def take(self):
def exclude(self):
raise NotImplementedError()

def drop(self, column_label_or_labels):
def drop(self, *column_label_or_labels):
"""Return a Table with only columns other than selected label or labels.
Args:
Expand Down Expand Up @@ -613,18 +625,28 @@ def drop(self, column_label_or_labels):
6
5
5
>>> t.drop('burgers', 'calories')
prices
6
5
5
>>> t.drop([0, 2])
prices
6
5
5
>>> t.drop(0, 2)
prices
6
5
5
>>> t.drop(1)
burgers | calories
cheeseburger | 743
hamburger | 651
veggie burger | 582
"""
exclude = _as_labels(column_label_or_labels)
exclude = _varargs_labels_as_list(column_label_or_labels)
return self.select([c for (i, c) in enumerate(self.labels) if i not in exclude and c not in exclude])

def where(self, column_or_label, value_or_predicate=None, other=None):
Expand Down Expand Up @@ -1068,6 +1090,11 @@ def _as_labels(self, label_or_labels):
"""Convert single label to list and convert indices to labels."""
return [self._as_label(s) for s in _as_labels(label_or_labels)]

def _varargs_as_labels(self, label_list):
"""Converts a list of labels or singleton list of list of labels into
a list of labels. Useful when labels are passed as varargs."""
return self._as_labels(_varargs_labels_as_list(label_list))

def _unused_label(self, label):
"""Generate an unused label."""
original = label
Expand Down Expand Up @@ -1327,7 +1354,7 @@ def with_column(self, label, values):
new_table.append_column(label, values)
return new_table

def with_columns(self, labels_and_values):
def with_columns(self, *labels_and_values):
"""Return a table with additional or replaced columns.
Args:
Expand All @@ -1341,18 +1368,52 @@ def with_columns(self, labels_and_values):
letter | count
c | 2
d | 4
>>> Table().with_columns(
... 'letter', ['c', 'd'],
... 'count', [2, 4],
... )
letter | count
c | 2
d | 4
>>> Table().with_columns([
... ['letter', ['c', 'd']],
... ['count', [2, 4]],
... ])
letter | count
c | 2
d | 4
>>> Table().with_columns(
... ['letter', ['c', 'd']],
... ['count', [2, 4]],
... )
letter | count
c | 2
d | 4
>>> Table().with_columns([
... ['letter', ['c', 'd']],
... ])
letter
c
d
>>> Table().with_columns(
... 'letter', ['c', 'd'],
... )
letter
c
d
>>> Table().with_columns(
... ['letter', ['c', 'd']],
... )
letter
c
d
>>> Table().with_columns({'letter': ['c', 'd']})
letter
c
d
"""
if len(labels_and_values) == 1:
labels_and_values = labels_and_values[0]
if isinstance(labels_and_values, collections.abc.Mapping):
labels_and_values = list(labels_and_values.items())
if not isinstance(labels_and_values, collections.abc.Sequence):
Expand Down Expand Up @@ -2176,6 +2237,20 @@ def _as_labels(column_label_or_labels):
else:
return column_label_or_labels

def _varargs_labels_as_list(label_list):
"""Return a list of labels for a list of labels or singleton list of list
of labels."""
if len(label_list) == 0:
return []
elif not _is_non_string_iterable(label_list[0]):
# Assume everything is a label. If not, it'll be caught later.
return label_list
elif len(label_list) == 1:
return label_list[0]
else:
raise ValueError("Labels {} contain more than list.".format(label_list),
"Pass just one list of labels.")

def _assert_same(values):
"""Assert that all values are identical and return the unique value."""
assert len(values) > 0
Expand Down
23 changes: 21 additions & 2 deletions datascience/util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Utility functions"""

__all__ = ['percentile', 'plot_cdf_area', 'plot_normal_cdf', 'table_apply',
'minimize']
__all__ = ['make_array', 'percentile', 'plot_cdf_area', 'plot_normal_cdf',
'table_apply', 'minimize']

import numpy as np
import pandas as pd
Expand All @@ -14,6 +14,25 @@
import math


def make_array(*elements):
"""Returns an array containing all the arguments passed to this function.
A simple way to make an array with a few elements.
As with any array, all arguments should have the same type.
>>> make_array(0)
array([0])
>>> make_array(2, 3, 4)
array([2, 3, 4])
>>> make_array("foo", "bar")
array(['foo', 'bar'],
dtype='<U3')
>>> make_array()
array([], dtype=float64)
"""
return np.array(elements)


def percentile(p, arr=None):
"""Returns the pth percentile of the input array (the value that is at
least as great as p% of the values in the array).
Expand Down
2 changes: 1 addition & 1 deletion datascience/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.5.20'
__version__ = '0.6.0'

0 comments on commit be6de5b

Please sign in to comment.