Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions q2_diversity/_alpha/_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,12 +237,15 @@ def alpha_correlation(output_dir: str,


def _reindex_with_metadata(column, columns, merged):
merged.set_index(column, inplace=True)
merged.sort_index(axis=0, ascending=True, inplace=True)
merged = merged.groupby(level=[column])
counts = merged.count()
counts.drop(columns, axis=1, inplace=True, level=0)
median_ = merged.median()
reindexed = merged.set_index(column)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid mutating merged by assigning to a new DataFrame reindexed.

reindexed.sort_index(axis=0, ascending=True, inplace=True)
grouped = reindexed.groupby(level=[column])
counts = grouped.count()
# Removes the column name used to set the index of `merged` above
col_diff = set(columns) - set([column])
if col_diff:
counts.drop(col_diff, axis=1, inplace=True, level=0)
Copy link
Contributor Author

@andrewsanchez andrewsanchez Feb 13, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first kwarg taken by DataFrame.drop is labels and it accepts a single label or list-like. Passing the set directly seems to work fine here, but we can make it a pd.Index or list if that is preferable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nah, this looks good to me

median_ = grouped.median()
return median_, counts


Expand Down
38 changes: 19 additions & 19 deletions q2_diversity/tests/test_alpha_rarefaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,21 +418,21 @@ def test_unique_metadata_groups(self):
[9, 10, 11, 12, 'peanut']],
columns=columns, index=['S1', 'S2', 'S3'])

obs = _reindex_with_metadata('pet', ['pet'], data)
median, counts = _reindex_with_metadata('pet', ['pet'], data)

exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['depth', 'iter'])
exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[0])
pdt.assert_frame_equal(exp, median)

exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[1])
pdt.assert_frame_equal(exp, counts)

def test_some_duplicates_in_column(self):
columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
Expand All @@ -442,21 +442,21 @@ def test_some_duplicates_in_column(self):
[9, 10, 11, 12, 'russ']],
columns=columns, index=['S1', 'S2', 'S3'])

obs = _reindex_with_metadata('pet', ['pet'], data)
median, counts = _reindex_with_metadata('pet', ['pet'], data)

exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['depth', 'iter'])
exp_ind = pd.Index(['milo', 'russ'], name='pet')
exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[0])
pdt.assert_frame_equal(exp, median)

exp = pd.DataFrame(data=[[1, 1, 1, 1], [2, 2, 2, 2]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[1])
pdt.assert_frame_equal(exp, counts)

def test_all_identical(self):
columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
Expand All @@ -466,21 +466,21 @@ def test_all_identical(self):
[9, 10, 11, 12, 'russ']],
columns=columns, index=['S1', 'S2', 'S3'])

obs = _reindex_with_metadata('pet', ['pet'], data)
median, counts = _reindex_with_metadata('pet', ['pet'], data)

exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['depth', 'iter'])
exp_ind = pd.Index(['russ'], name='pet')
exp = pd.DataFrame(data=[[5, 6, 7, 8]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[0])
pdt.assert_frame_equal(exp, median)

exp = pd.DataFrame(data=[[3, 3, 3, 3]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[1])
pdt.assert_frame_equal(exp, counts)

def test_multiple_columns(self):
columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
Expand All @@ -492,34 +492,34 @@ def test_multiple_columns(self):
[9, 10, 11, 12, 'peanut', 'stick']],
columns=columns, index=['S1', 'S2', 'S3'])

obs = _reindex_with_metadata('pet', ['pet', 'toy'], data)
median, counts = _reindex_with_metadata('pet', ['pet', 'toy'], data)

exp_col = pd.MultiIndex(levels=[[1, 200, 'pet', 'toy'], [1, 2, '']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=['depth', 'iter'])
exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[0])
pdt.assert_frame_equal(exp, median)

exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[1])
pdt.assert_frame_equal(exp, counts)

obs = _reindex_with_metadata('toy', ['pet', 'toy'], data)
median, counts = _reindex_with_metadata('toy', ['pet', 'toy'], data)

exp_ind = pd.Index(['stick', 'yeti'], name='toy')
exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[0])
pdt.assert_frame_equal(exp, median)

exp = pd.DataFrame(data=[[2, 2, 2, 2], [1, 1, 1, 1]],
columns=exp_col, index=exp_ind)

pdt.assert_frame_equal(exp, obs[1])
pdt.assert_frame_equal(exp, counts)


class AlphaRarefactionJSONPTests(unittest.TestCase):
Expand Down