88
99import numpy as np
1010
11- from . import duck_array_ops
12- from . import dtypes
1311from . import utils
1412from .indexing import get_indexer_nd
1513from .pycompat import iteritems , OrderedDict , suppress
1614from .utils import is_full_slice , is_dict_like
17- from .variable import Variable , IndexVariable
15+ from .variable import IndexVariable
1816
1917
2018def _get_joiner (join ):
@@ -306,59 +304,51 @@ def reindex_variables(variables, sizes, indexes, indexers, method=None,
306304 from .dataarray import DataArray
307305
308306 # build up indexers for assignment along each dimension
309- to_indexers = {}
310- from_indexers = {}
307+ int_indexers = {}
308+ targets = {}
309+ masked_dims = set ()
310+ unchanged_dims = set ()
311+
311312 # size of reindexed dimensions
312313 new_sizes = {}
313314
314315 for name , index in iteritems (indexes ):
315316 if name in indexers :
316- target = utils .safe_cast_to_index (indexers [name ])
317317 if not index .is_unique :
318318 raise ValueError (
319319 'cannot reindex or align along dimension %r because the '
320320 'index has duplicate values' % name )
321- indexer = get_indexer_nd (index , target , method , tolerance )
322321
322+ target = utils .safe_cast_to_index (indexers [name ])
323323 new_sizes [name ] = len (target )
324- # Note pandas uses negative values from get_indexer_nd to signify
325- # values that are missing in the index
326- # The non-negative values thus indicate the non-missing values
327- to_indexers [name ] = indexer >= 0
328- if to_indexers [name ].all ():
329- # If an indexer includes no negative values, then the
330- # assignment can be to a full-slice (which is much faster,
331- # and means we won't need to fill in any missing values)
332- to_indexers [name ] = slice (None )
333-
334- from_indexers [name ] = indexer [to_indexers [name ]]
335- if np .array_equal (from_indexers [name ], np .arange (len (index ))):
336- # If the indexer is equal to the original index, use a full
337- # slice object to speed up selection and so we can avoid
338- # unnecessary copies
339- from_indexers [name ] = slice (None )
324+
325+ int_indexer = get_indexer_nd (index , target , method , tolerance )
326+
327+ # We uses negative values from get_indexer_nd to signify
328+ # values that are missing in the index.
329+ if (int_indexer < 0 ).any ():
330+ masked_dims .add (name )
331+ elif np .array_equal (int_indexer , np .arange (len (index ))):
332+ unchanged_dims .add (name )
333+
334+ int_indexers [name ] = int_indexer
335+ targets [name ] = target
340336
341337 for dim in sizes :
342338 if dim not in indexes and dim in indexers :
343339 existing_size = sizes [dim ]
344- new_size = utils . safe_cast_to_index ( indexers [dim ]) .size
340+ new_size = indexers [dim ].size
345341 if existing_size != new_size :
346342 raise ValueError (
347343 'cannot reindex or align along dimension %r without an '
348344 'index because its size %r is different from the size of '
349345 'the new index %r' % (dim , existing_size , new_size ))
350346
351- def any_not_full_slices (indexers ):
352- return any (not is_full_slice (idx ) for idx in indexers )
353-
354- def var_indexers (var , indexers ):
355- return tuple (indexers .get (d , slice (None )) for d in var .dims )
356-
357347 # create variables for the new dataset
358348 reindexed = OrderedDict ()
359349
360350 for dim , indexer in indexers .items ():
361- if isinstance (indexer , DataArray ) and indexer .dims != (dim , ):
351+ if isinstance (indexer , DataArray ) and indexer .dims != (dim ,):
362352 warnings .warn (
363353 "Indexer has dimensions {0:s} that are different "
364354 "from that to be indexed along {1:s}. "
@@ -375,47 +365,24 @@ def var_indexers(var, indexers):
375365
376366 for name , var in iteritems (variables ):
377367 if name not in indexers :
378- assign_to = var_indexers (var , to_indexers )
379- assign_from = var_indexers (var , from_indexers )
380-
381- if any_not_full_slices (assign_to ):
382- # there are missing values to in-fill
383- data = var [assign_from ].data
384- dtype , fill_value = dtypes .maybe_promote (var .dtype )
385-
386- if isinstance (data , np .ndarray ):
387- shape = tuple (new_sizes .get (dim , size )
388- for dim , size in zip (var .dims , var .shape ))
389- new_data = np .empty (shape , dtype = dtype )
390- new_data [...] = fill_value
391- # create a new Variable so we can use orthogonal indexing
392- # use fastpath=True to avoid dtype inference
393- new_var = Variable (var .dims , new_data , var .attrs ,
394- fastpath = True )
395- new_var [assign_to ] = data
396-
397- else : # dask array
398- data = data .astype (dtype , copy = False )
399- for axis , indexer in enumerate (assign_to ):
400- if not is_full_slice (indexer ):
401- indices = np .cumsum (indexer )[~ indexer ]
402- data = duck_array_ops .insert (
403- data , indices , fill_value , axis = axis )
404- new_var = Variable (var .dims , data , var .attrs ,
405- fastpath = True )
406-
407- elif any_not_full_slices (assign_from ):
408- # type coercion is not necessary as there are no missing
409- # values
410- new_var = var [assign_from ]
411-
412- else :
413- # no reindexing is necessary
368+ key = tuple (slice (None )
369+ if d in unchanged_dims
370+ else int_indexers .get (d , slice (None ))
371+ for d in var .dims )
372+ needs_masking = any (d in masked_dims for d in var .dims )
373+
374+ if needs_masking :
375+ new_var = var ._getitem_with_mask (key )
376+ elif all (is_full_slice (k ) for k in key ):
377+ # no reindexing necessary
414378 # here we need to manually deal with copying data, since
415379 # we neither created a new ndarray nor used fancy indexing
416380 new_var = var .copy (deep = copy )
381+ else :
382+ new_var = var [key ]
417383
418384 reindexed [name ] = new_var
385+
419386 return reindexed
420387
421388
0 commit comments