Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..core import indexing
from ..core.pycompat import OrderedDict, integer_types, iteritems
from ..core.utils import FrozenOrderedDict, HiddenKeyDict
from .common import AbstractWritableDataStore, ArrayWriter, BackendArray
from .common import AbstractWritableDataStore, BackendArray

# need some special secret attributes to tell us the dimensions
_DIMENSION_KEY = '_ARRAY_DIMENSIONS'
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
assert_coordinate_consistent, remap_label_indexers)
from .dataset import Dataset, merge_indexes, split_indexes
from .formatting import format_item
from .options import OPTIONS, _get_keep_attrs
from .options import OPTIONS
from .pycompat import OrderedDict, basestring, iteritems, range, zip
from .utils import (
decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution)
Expand Down
3 changes: 2 additions & 1 deletion xarray/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def _get_keep_attrs(default):
elif global_choice in [True, False]:
return global_choice
else:
raise ValueError("The global option keep_attrs must be one of True, False or 'default'.")
raise ValueError("The global option keep_attrs must be one of"
" True, False or 'default'.")


class set_options(object):
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _interpolate(self, kind='linear'):
.format(self._obj.data.name)
)

from ..coding.cftimeindex import CFTimeIndex
# from ..coding.cftimeindex import CFTimeIndex
import cftime as cf
import numpy as np
if isinstance(self._obj[self._dim].values[0], cf.datetime):
Expand Down
3 changes: 1 addition & 2 deletions xarray/plot/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np
import pandas as pd

from xarray.core.alignment import align
# from xarray.core.alignment import align
from xarray.core.common import contains_cftime_datetimes
from xarray.core.pycompat import basestring

Expand Down Expand Up @@ -255,7 +255,6 @@ def _infer_line_data(darray, x, y, hue):
huelabel = label_from_attrs(darray[huename])
hueplt = darray[huename]


xlabel = label_from_attrs(xplt)
ylabel = label_from_attrs(yplt)

Expand Down
33 changes: 15 additions & 18 deletions xarray/tests/temp/cftime_resample_pandas_comparison.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import xarray as xr
import pandas as pd
import numpy as np
import pandas as pd

import xarray as xr

# Equal sampling comparisons:
ti = pd.date_range('2000-01-01', periods=9, freq='T', tz='UTC')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('T', label='left', closed='left').mean())
print(ps.resample('T', label='right', closed='left').mean())
print(ps.resample('T', label='left', closed='right').mean())
Expand All @@ -15,7 +15,7 @@
print(ps.resample('60S', label='left', closed='right').mean())
print(ps.resample('60S', label='right', closed='right').mean())
ti = pd.date_range('2000', periods=30, freq='MS', tz='UTC')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('M', label='left', closed='left').max())
print(ps.resample('M', label='right', closed='left').max())
print(ps.resample('M', label='left', closed='right').max())
Expand All @@ -25,10 +25,9 @@
print(ps.resample('MS', label='left', closed='right').max())
print(ps.resample('MS', label='right', closed='right').max())


# Downsampling comparisons:
ti = pd.date_range('2000-01-01', periods=9, freq='MS', tz='UTC')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('3M', label='left', closed='left').max())
print(ps.resample('3M', label='right', closed='left').max())
print(ps.resample('3M', label='left', closed='right').max())
Expand All @@ -55,33 +54,32 @@
print(ps.resample('2MS', label='right', closed='right').mean())
# Checking how label and closed args affect outputs
ti = pd.date_range('2000-01-01', periods=9, freq='T', tz='UTC')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('3T', label='left', closed='left').mean())
print(ps.resample('3T', label='right', closed='left').mean())
print(ps.resample('3T', label='left', closed='right').mean())
print(ps.resample('3T', label='right', closed='right').mean())
ti = pd.date_range('2000', periods=30, freq='MS')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('6MS', label='left', closed='left').max())
print(ps.resample('6MS', label='right', closed='left').max())
print(ps.resample('6MS', label='left', closed='right').max())
print(ps.resample('6MS', label='right', closed='right').max())
# Checking different aggregation funcs, also checking cases when label and closed == None
ti = pd.date_range('2000', periods=30, freq='MS')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('MS').mean()) # essentially doing no resampling, should return original data
print(ps.resample('6MS').mean())
print(ps.resample('6MS').asfreq()) # results do not match since xarray makes asfreq = mean (see resample.py)
print(ps.resample('6MS').sum())
print(ps.resample('6MS').min())
print(ps.resample('6MS').max())


# Upsampling comparisons:
# At seconds-resolution, xr.cftime_range is 1 second off from pd.date_range
ti = pd.date_range('2011-01-01T13:02:03', '2012-01-01T00:00:00', freq='D', tz='UTC')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
print(ps.resample('12T', base=0).interpolate().index) # testing T vs min
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('12T', base=0).interpolate().index) # testing T vs min
print(ps.resample('12min', base=0).interpolate().index)
print(ps.resample('12min', base=1).interpolate().index)
print(ps.resample('12min', base=5).interpolate().index)
Expand All @@ -91,7 +89,7 @@
print(ps.resample('1D', base=0).mean().values) # essentially doing no resampling, should return original data
# Pandas' upsampling behave aberrantly if start times for dates are not neat, should we replicate?
ti = pd.date_range('2000-01-01T13:02:03', '2000-02-01T00:00:00', freq='D', tz='UTC')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti) # results unchanged if array of floats used
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti) # results unchanged if array of floats used
print(ps.resample('8H', base=0, closed='left').interpolate().values)
print(ps.resample('8H', base=0, closed='left').sum().values)
print(ps.resample('8H', base=0, closed='left').mean().values)
Expand All @@ -100,19 +98,18 @@
print(ps.resample('8H', base=0, closed='right').mean().values)
# Neat start times (00:00:00) produces expected behavior when upsampling with pandas
ti = pd.date_range('2000-01-01T00:00:00', '2000-02-01T00:00:00', freq='D', tz='UTC')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('12T', base=0).interpolate())
print(ps.resample('12T', base=24, closed='left').interpolate())
print(ps.resample('12T', base=24, closed='right', label='left').interpolate())
ti = pd.date_range('2000', periods=30, freq='MS')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('D').interpolate())


# Shows how resample-apply produces different results with Series and DataArray
ti = pd.date_range('2000', periods=30, freq='MS')
da = xr.DataArray(np.arange(100, 100+ti.size), [('time', ti)])
da = xr.DataArray(np.arange(100, 100 + ti.size), [('time', ti)])
print(da.resample(time='6MS').sum())
ti = pd.date_range('2000', periods=30, freq='MS')
ps = pd.Series(np.arange(100, 100+ti.size), index=ti)
ps = pd.Series(np.arange(100, 100 + ti.size), index=ti)
print(ps.resample('6MS').sum())
44 changes: 22 additions & 22 deletions xarray/tests/temp/cftime_resample_tests.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import xarray as xr
import numpy as np

import xarray as xr

# Equal sampling comparisons:
times = xr.cftime_range('2000-01-01', periods=9, freq='T', tz='UTC')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='T', label='left', closed='left').mean())
print(da.resample(time='T', label='right', closed='left').mean())
print(da.resample(time='T', label='left', closed='right').mean())
Expand All @@ -14,7 +14,7 @@
print(da.resample(time='60S', label='left', closed='right').mean())
print(da.resample(time='60S', label='right', closed='right').mean())
times = xr.cftime_range('2000', periods=30, freq='MS')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='M', label='left', closed='left').max())
print(da.resample(time='M', label='right', closed='left').max())
print(da.resample(time='M', label='left', closed='right').max())
Expand All @@ -24,10 +24,9 @@
print(da.resample(time='MS', label='left', closed='right').max())
print(da.resample(time='MS', label='right', closed='right').max())


# Downsampling comparisons:
times = xr.cftime_range('2000-01-01', periods=9, freq='MS', tz='UTC')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='3M', label='left', closed='left').max())
print(da.resample(time='3M', label='right', closed='left').max())
print(da.resample(time='3M', label='left', closed='right').max())
Expand All @@ -54,43 +53,43 @@
print(da.resample(time='2MS', label='right', closed='right').mean())
# Checking how label and closed args affect outputs
times = xr.cftime_range('2000-01-01', periods=9, freq='T', tz='UTC')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='3T', label='left', closed='left').mean())
print(da.resample(time='3T', label='right', closed='left').mean())
print(da.resample(time='3T', label='left', closed='right').mean())
print(da.resample(time='3T', label='right', closed='right').mean())
times = xr.cftime_range('2000', periods=30, freq='MS')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='6MS', label='left', closed='left').max())
print(da.resample(time='6MS', label='right', closed='left').max())
print(da.resample(time='6MS', label='left', closed='right').max())
print(da.resample(time='6MS', label='right', closed='right').max())
# Checking different aggregation funcs, also checking cases when label and closed == None
times = xr.cftime_range('2000', periods=30, freq='MS')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='MS').mean()) # essentially doing no resampling, should return original data
print(da.resample(time='6MS').mean())
print(da.resample(time='6MS').asfreq()) # results do not match since xarray makes asfreq = mean (see resample.py)
print(da.resample(time='6MS').sum())
print(da.resample(time='6MS').min())
print(da.resample(time='6MS').max())


# Upsampling comparisons:
# At seconds-resolution, xr.cftime_range is 1 second off from pd.date_range
times = xr.cftime_range('2011-01-01T13:02:03', '2012-01-01T00:00:00', freq='D')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='12T', base=0).interpolate().indexes) # testing 'T' vs 'min'
print(da.resample(time='12min', base=0).interpolate().indexes)
print(da.resample(time='12min', base=1).interpolate().indexes)
print(da.resample(time='12min', base=5).mean().indexes)
print(da.resample(time='12min', base=17).mean().indexes)
print(da.resample(time='12S', base=17).interpolate().indexes)
print(da.resample(time='1D', base=0).interpolate().values) # essentially doing no resampling, should return original data
print(
da.resample(time='1D', base=0).interpolate().values) # essentially doing no resampling, should return original data
print(da.resample(time='1D', base=0).mean().values) # essentially doing no resampling, should return original data
# Upsampling with non 00:00:00 dates. Sum and mean matches pandas behavior but interpolate doesn't.
times = xr.cftime_range('2000-01-01T13:02:03', '2000-02-01T00:00:00', freq='D')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='8H', base=0, closed='left').interpolate().values)
print(da.resample(time='8H', base=0, closed='left').sum().values)
print(da.resample(time='8H', base=0, closed='left').mean().values)
Expand All @@ -99,41 +98,42 @@
print(da.resample(time='8H', base=0, closed='right').mean().values)
# Neat start times (00:00:00) produces behavior matching pandas'
times = xr.cftime_range('2000-01-01T00:00:00', '2000-02-01T00:00:00', freq='D')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='12T', base=0).interpolate())
print(da.resample(time='12T', base=24, closed='left').interpolate())
print(da.resample(time='12T', base=24, closed='right', label='left').interpolate())
times = xr.cftime_range('2000', periods=30, freq='MS')
da = xr.DataArray(np.arange(100, 100+times.size), [('time', times)])
da = xr.DataArray(np.arange(100, 100 + times.size), [('time', times)])
print(da.resample(time='D').interpolate())


# Check that Dataset and DataArray returns the same resampling results
times = xr.cftime_range('2000-01-01', periods=9, freq='T', tz='UTC')
ds = xr.Dataset(data_vars={'data1': ('time', np.arange(100, 100+times.size)),
'data2': ('time', np.arange(500, 500+times.size))},
ds = xr.Dataset(data_vars={'data1': ('time', np.arange(100, 100 + times.size)),
'data2': ('time', np.arange(500, 500 + times.size))},
coords={'time': times})
print(ds.resample(time='3T', label='left', closed='left').mean())
print(ds.resample(time='3T', label='right', closed='left').mean())
print(ds.resample(time='3T', label='left', closed='right').mean())
print(ds.resample(time='3T', label='right', closed='right').mean())
times = xr.cftime_range('2000', periods=30, freq='MS')
ds = xr.Dataset(data_vars={'data1': ('time', np.arange(100, 100+times.size)),
'data2': ('time', np.arange(500, 500+times.size))},
ds = xr.Dataset(data_vars={'data1': ('time', np.arange(100, 100 + times.size)),
'data2': ('time', np.arange(500, 500 + times.size))},
coords={'time': times})
print(ds.resample(time='6MS', label='left', closed='left').max())
print(ds.resample(time='6MS', label='right', closed='left').max())
print(ds.resample(time='6MS', label='left', closed='right').max())
print(ds.resample(time='6MS', label='right', closed='right').max())


# Check that nc files read as dask arrays can be resampled
#
import os
testfilepath = os.path.join(os.path.expanduser('~'), 'Dropbox', 'code', 'Ouranos', 'testdata', 'NRCANdaily', 'nrcan_canada_daily_tasmax_1990.nc')

testfilepath = os.path.join(os.path.expanduser('~'), 'Dropbox', 'code', 'Ouranos', 'testdata', 'NRCANdaily',
'nrcan_canada_daily_tasmax_1990.nc')
xr.set_options(enable_cftimeindex=True)
test_ds = xr.open_dataset(testfilepath, chunks={'time': 10})
test_ds['time'] = xr.cftime_range('1999-01-01', '1999-12-31', freq='D') # regular calendars are still read as pandas date_range even though enable_cftimeindex=True
test_ds['time'] = xr.cftime_range('1999-01-01', '1999-12-31',
freq='D') # regular calendars are still read as pandas date_range even though enable_cftimeindex=True
# test_ds.fillna(0).resample(time='3MS') # NaN in results still present
print(test_ds.resample(time='MS').mean())
monthly_avg = test_ds.resample(time='MS').mean()
Expand Down
Loading