From 0d3333f5dba438dc8cfd5d40bdd5dd45f7ad2bc0 Mon Sep 17 00:00:00 2001
From: Andrew Sanchez <inbox.asanchez@gmail.com>
Date: Wed, 12 Feb 2020 16:34:46 -0700
Subject: [PATCH 1/4] Use MultiIndex.codes instead of labels (no longer
 support)

Please see these links for the rational if interested:

https://github.com/pandas-dev/pandas/issues/13443

https://github.com/pandas-dev/pandas/pull/23752
---
 q2_diversity/tests/test_alpha_rarefaction.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/q2_diversity/tests/test_alpha_rarefaction.py b/q2_diversity/tests/test_alpha_rarefaction.py
index 22603115..1531ecdc 100644
--- a/q2_diversity/tests/test_alpha_rarefaction.py
+++ b/q2_diversity/tests/test_alpha_rarefaction.py
@@ -421,7 +421,7 @@ def test_unique_metadata_groups(self):
         obs = _reindex_with_metadata('pet', ['pet'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
-                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
+                                codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                 names=['depth', 'iter'])
         exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
         exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
@@ -445,7 +445,7 @@ def test_some_duplicates_in_column(self):
         obs = _reindex_with_metadata('pet', ['pet'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
-                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
+                                codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                 names=['depth', 'iter'])
         exp_ind = pd.Index(['milo', 'russ'], name='pet')
         exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
@@ -469,7 +469,7 @@ def test_all_identical(self):
         obs = _reindex_with_metadata('pet', ['pet'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
-                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
+                                codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                 names=['depth', 'iter'])
         exp_ind = pd.Index(['russ'], name='pet')
         exp = pd.DataFrame(data=[[5, 6, 7, 8]],
@@ -495,7 +495,7 @@ def test_multiple_columns(self):
         obs = _reindex_with_metadata('pet', ['pet', 'toy'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet', 'toy'], [1, 2, '']],
-                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
+                                codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                 names=['depth', 'iter'])
         exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
         exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],

From b0cc0df0679e3b220549f1c26e5cd48581394254 Mon Sep 17 00:00:00 2001
From: Andrew Sanchez <inbox.asanchez@gmail.com>
Date: Wed, 12 Feb 2020 16:41:46 -0700
Subject: [PATCH 2/4] Unpack and name values returned by _reindex_with_metadata

This makes it easier to see what's going on below with the values
returned by this function.
---
 q2_diversity/tests/test_alpha_rarefaction.py | 30 ++++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/q2_diversity/tests/test_alpha_rarefaction.py b/q2_diversity/tests/test_alpha_rarefaction.py
index 1531ecdc..cef2c10f 100644
--- a/q2_diversity/tests/test_alpha_rarefaction.py
+++ b/q2_diversity/tests/test_alpha_rarefaction.py
@@ -418,7 +418,7 @@ def test_unique_metadata_groups(self):
                                   [9, 10, 11, 12, 'peanut']],
                             columns=columns, index=['S1', 'S2', 'S3'])
 
-        obs = _reindex_with_metadata('pet', ['pet'], data)
+        median, counts = _reindex_with_metadata('pet', ['pet'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                 codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
@@ -427,12 +427,12 @@ def test_unique_metadata_groups(self):
         exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[0])
+        pdt.assert_frame_equal(exp, median)
 
         exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[1])
+        pdt.assert_frame_equal(exp, counts)
 
     def test_some_duplicates_in_column(self):
         columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
@@ -442,7 +442,7 @@ def test_some_duplicates_in_column(self):
                                   [9, 10, 11, 12, 'russ']],
                             columns=columns, index=['S1', 'S2', 'S3'])
 
-        obs = _reindex_with_metadata('pet', ['pet'], data)
+        median, counts = _reindex_with_metadata('pet', ['pet'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                 codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
@@ -451,12 +451,12 @@ def test_some_duplicates_in_column(self):
         exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[0])
+        pdt.assert_frame_equal(exp, median)
 
         exp = pd.DataFrame(data=[[1, 1, 1, 1], [2, 2, 2, 2]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[1])
+        pdt.assert_frame_equal(exp, counts)
 
     def test_all_identical(self):
         columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
@@ -466,7 +466,7 @@ def test_all_identical(self):
                                   [9, 10, 11, 12, 'russ']],
                             columns=columns, index=['S1', 'S2', 'S3'])
 
-        obs = _reindex_with_metadata('pet', ['pet'], data)
+        median, counts = _reindex_with_metadata('pet', ['pet'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                 codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
@@ -475,12 +475,12 @@ def test_all_identical(self):
         exp = pd.DataFrame(data=[[5, 6, 7, 8]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[0])
+        pdt.assert_frame_equal(exp, median)
 
         exp = pd.DataFrame(data=[[3, 3, 3, 3]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[1])
+        pdt.assert_frame_equal(exp, counts)
 
     def test_multiple_columns(self):
         columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
@@ -492,7 +492,7 @@ def test_multiple_columns(self):
                                   [9, 10, 11, 12, 'peanut', 'stick']],
                             columns=columns, index=['S1', 'S2', 'S3'])
 
-        obs = _reindex_with_metadata('pet', ['pet', 'toy'], data)
+        median, counts = _reindex_with_metadata('pet', ['pet', 'toy'], data)
 
         exp_col = pd.MultiIndex(levels=[[1, 200, 'pet', 'toy'], [1, 2, '']],
                                 codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
@@ -501,25 +501,25 @@ def test_multiple_columns(self):
         exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[0])
+        pdt.assert_frame_equal(exp, median)
 
         exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[1])
+        pdt.assert_frame_equal(exp, counts)
 
-        obs = _reindex_with_metadata('toy', ['pet', 'toy'], data)
+        median, counts = _reindex_with_metadata('toy', ['pet', 'toy'], data)
 
         exp_ind = pd.Index(['stick', 'yeti'], name='toy')
         exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[0])
+        pdt.assert_frame_equal(exp, median)
 
         exp = pd.DataFrame(data=[[2, 2, 2, 2], [1, 1, 1, 1]],
                            columns=exp_col, index=exp_ind)
 
-        pdt.assert_frame_equal(exp, obs[1])
+        pdt.assert_frame_equal(exp, counts)
 
 
 class AlphaRarefactionJSONPTests(unittest.TestCase):

From 552152f0948effc6bcd9e5e4939917ee6062dc0e Mon Sep 17 00:00:00 2001
From: Andrew Sanchez <inbox.asanchez@gmail.com>
Date: Wed, 12 Feb 2020 16:43:08 -0700
Subject: [PATCH 3/4] Initial patch to handle new pandas error

This prevents attempting to drop columns that don't exist in merged.columns
after setting the index, while still dropping columns that are present in
merged.columns.  Attempting to do so raises an exception in pandas >= 1.

Please see https://github.com/pandas-dev/pandas/issues/8594
for details.
---
 q2_diversity/_alpha/_visualizer.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/q2_diversity/_alpha/_visualizer.py b/q2_diversity/_alpha/_visualizer.py
index 25387553..fbc0a3e2 100644
--- a/q2_diversity/_alpha/_visualizer.py
+++ b/q2_diversity/_alpha/_visualizer.py
@@ -241,7 +241,10 @@ def _reindex_with_metadata(column, columns, merged):
     merged.sort_index(axis=0, ascending=True, inplace=True)
     merged = merged.groupby(level=[column])
     counts = merged.count()
-    counts.drop(columns, axis=1, inplace=True, level=0)
+    # Removes the column name used to set the index of `merged` above
+    col_diff = set(columns) - set([column])
+    if col_diff:
+        counts.drop(col_diff, axis=1, inplace=True, level=0)
     median_ = merged.median()
     return median_, counts
 

From b9ddead466654806adfcebeae3c824847dda5114 Mon Sep 17 00:00:00 2001
From: Andrew Sanchez <inbox.asanchez@gmail.com>
Date: Thu, 13 Feb 2020 12:20:24 -0700
Subject: [PATCH 4/4] Avoid mutating `merged` in place by assigning to new
 variable

This avoids attempting to drop columns that had already been dropped in previous
calls to _reindex_with_metadata in the for loop in `alpha_rarefaction`.

Co-authored-by: Matthew Dillon <matthewrdillon@gmail.com>
---
 q2_diversity/_alpha/_visualizer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/q2_diversity/_alpha/_visualizer.py b/q2_diversity/_alpha/_visualizer.py
index fbc0a3e2..19446ad3 100644
--- a/q2_diversity/_alpha/_visualizer.py
+++ b/q2_diversity/_alpha/_visualizer.py
@@ -237,15 +237,15 @@ def alpha_correlation(output_dir: str,
 
 
 def _reindex_with_metadata(column, columns, merged):
-    merged.set_index(column, inplace=True)
-    merged.sort_index(axis=0, ascending=True, inplace=True)
-    merged = merged.groupby(level=[column])
-    counts = merged.count()
+    reindexed = merged.set_index(column)
+    reindexed.sort_index(axis=0, ascending=True, inplace=True)
+    grouped = reindexed.groupby(level=[column])
+    counts = grouped.count()
     # Removes the column name used to set the index of `merged` above
     col_diff = set(columns) - set([column])
     if col_diff:
         counts.drop(col_diff, axis=1, inplace=True, level=0)
-    median_ = merged.median()
+    median_ = grouped.median()
     return median_, counts