1010
1111from pandas .core .categorical import Categorical
1212from pandas .core .common import (notnull , _ensure_platform_int , _maybe_promote ,
13- _maybe_upcast )
13+ _maybe_upcast , isnull )
1414from pandas .core .groupby import (get_group_index , _compress_group_index ,
1515 decons_group_index )
1616import pandas .core .common as com
1717import pandas .algos as algos
18-
18+ from pandas import lib
1919
2020from pandas .core .index import MultiIndex , Index
2121
@@ -67,7 +67,14 @@ def __init__(self, values, index, level=-1, value_columns=None):
6767 self .index = index
6868 self .level = self .index ._get_level_number (level )
6969
70- self .new_index_levels = list (index .levels )
70+ levels = index .levels
71+ labels = index .labels
72+ def _make_index (lev ,lab ):
73+ i = lev .__class__ (_make_index_array_level (lev .values ,lab ))
74+ i .name = lev .name
75+ return i
76+
77+ self .new_index_levels = list ([ _make_index (lev ,lab ) for lev ,lab in zip (levels ,labels ) ])
7178 self .new_index_names = list (index .names )
7279
7380 self .removed_name = self .new_index_names .pop (self .level )
@@ -140,6 +147,19 @@ def get_result(self):
140147 values = com .take_nd (values , inds , axis = 1 )
141148 columns = columns [inds ]
142149
150+ # we might have a missing index
151+ if len (index ) != values .shape [0 ]:
152+ mask = isnull (index )
153+ if mask .any ():
154+ l = np .arange (len (index ))
155+ values , orig_values = np .empty ((len (index ),values .shape [1 ])), values
156+ values .fill (np .nan )
157+ values_indexer = com ._ensure_int64 (l [~ mask ])
158+ for i , j in enumerate (values_indexer ):
159+ values [j ] = orig_values [i ]
160+ else :
161+ index = index .take (self .unique_groups )
162+
143163 return DataFrame (values , index = index , columns = columns )
144164
145165 def get_new_values (self ):
@@ -201,11 +221,13 @@ def get_new_columns(self):
201221 def get_new_index (self ):
202222 result_labels = []
203223 for cur in self .sorted_labels [:- 1 ]:
204- result_labels .append (cur .take (self .compressor ))
224+ labels = cur .take (self .compressor )
225+ labels = _make_index_array_level (labels ,cur )
226+ result_labels .append (labels )
205227
206228 # construct the new index
207229 if len (self .new_index_levels ) == 1 :
208- new_index = self .new_index_levels [0 ]. take ( self . unique_groups )
230+ new_index = self .new_index_levels [0 ]
209231 new_index .name = self .new_index_names [0 ]
210232 else :
211233 new_index = MultiIndex (levels = self .new_index_levels ,
@@ -215,6 +237,26 @@ def get_new_index(self):
215237 return new_index
216238
217239
240+ def _make_index_array_level (lev ,lab ):
241+ """ create the combined index array, preserving nans, return an array """
242+ mask = lab == - 1
243+ if not mask .any ():
244+ return lev
245+
246+ l = np .arange (len (lab ))
247+ mask_labels = np .empty (len (mask [mask ]),dtype = object )
248+ mask_labels .fill (np .nan )
249+ mask_indexer = com ._ensure_int64 (l [mask ])
250+
251+ labels = lev
252+ labels_indexer = com ._ensure_int64 (l [~ mask ])
253+
254+ new_labels = np .empty (tuple ([len (lab )]),dtype = object )
255+ new_labels [labels_indexer ] = labels
256+ new_labels [mask_indexer ] = mask_labels
257+
258+ return new_labels
259+
218260def _unstack_multiple (data , clocs ):
219261 if len (clocs ) == 0 :
220262 return data
0 commit comments