-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Hypothesis tests for roundtrip to & from pandas #3285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
1e8ac35
9f14426
18790cc
2449ac2
54900f0
02fd311
e8fb3da
67c7034
4ba4f7b
fb222c5
7b39a6f
a328739
3f462be
ecd016a
351b40b
044c67d
5b0ae82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| from hypothesis import settings | ||
|
|
||
| # Run for a while - arrays are a bigger search space than usual | ||
| settings.register_profile("ci", deadline=None, print_blob=True) | ||
| settings.load_profile("ci") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,15 +6,10 @@ | |
| """ | ||
| import hypothesis.extra.numpy as npst | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These may need to be guarded too using
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Aha, I was being distracted by the other errors around the real one. Let's see if the latest commit helps. |
||
| import hypothesis.strategies as st | ||
| from hypothesis import given, settings | ||
| from hypothesis import given | ||
|
|
||
| import xarray as xr | ||
|
|
||
| # Run for a while - arrays are a bigger search space than usual | ||
| settings.register_profile("ci", deadline=None) | ||
| settings.load_profile("ci") | ||
|
|
||
|
|
||
| an_array = npst.arrays( | ||
| dtype=st.one_of( | ||
| npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| """ | ||
| Property-based tests for roundtripping between xarray and pandas objects. | ||
| """ | ||
| import hypothesis.extra.numpy as npst | ||
| import hypothesis.extra.pandas as pdst | ||
| import hypothesis.strategies as st | ||
| from hypothesis import given | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
| import xarray as xr | ||
|
|
||
| numeric_dtypes = st.one_of( | ||
| npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() | ||
| ) | ||
|
|
||
| numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt)) | ||
|
|
||
| an_array = npst.arrays( | ||
| dtype=numeric_dtypes, | ||
| shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas | ||
| ) | ||
|
|
||
|
|
||
| @st.composite | ||
| def datasets_1d_vars(draw): | ||
| """Generate datasets with only 1D variables | ||
|
|
||
| Suitable for converting to pandas dataframes. | ||
| """ | ||
| n_vars = draw(st.integers(min_value=1, max_value=3)) | ||
| n_entries = draw(st.integers(min_value=0, max_value=100)) | ||
| dims = ("rows",) | ||
| vars = {} | ||
| for _ in range(n_vars): | ||
|
||
| name = draw(st.text(min_size=0)) | ||
| dt = draw(numeric_dtypes) | ||
| arr = draw(npst.arrays(dtype=dt, shape=(n_entries,))) | ||
| vars[name] = xr.Variable(dims, arr) | ||
|
|
||
| coords = { | ||
| dims[0]: draw(pdst.indexes(dtype="u8", min_size=n_entries, max_size=n_entries)) | ||
| } | ||
|
|
||
| return xr.Dataset(vars, coords=coords) | ||
|
|
||
|
|
||
| @given(st.data(), an_array) | ||
| def test_roundtrip_dataarray(data, arr): | ||
| names = data.draw( | ||
| st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map( | ||
| tuple | ||
| ) | ||
| ) | ||
| coords = {name: np.arange(n) for (name, n) in zip(names, arr.shape)} | ||
| original = xr.DataArray(arr, dims=names, coords=coords) | ||
| roundtripped = xr.DataArray(original.to_pandas()) | ||
| xr.testing.assert_identical(original, roundtripped) | ||
|
|
||
|
|
||
| @given(datasets_1d_vars()) | ||
| def test_roundtrip_dataset(dataset): | ||
| df = dataset.to_dataframe() | ||
| assert isinstance(df, pd.DataFrame) | ||
| roundtripped = xr.Dataset(df) | ||
| xr.testing.assert_identical(dataset, roundtripped) | ||
|
|
||
|
|
||
| @given(numeric_series, st.text()) | ||
| def test_roundtrip_pandas_series(ser, ix_name): | ||
| # Need to name the index, otherwise Xarray calls it 'dim_0'. | ||
| ser.index.name = ix_name | ||
| arr = xr.DataArray(ser) | ||
| roundtripped = arr.to_pandas() | ||
| pd.testing.assert_series_equal(ser, roundtripped) | ||
| xr.testing.assert_identical(arr, roundtripped.to_xarray()) | ||
|
|
||
|
|
||
| # Dataframes with columns of all the same dtype - for roundtrip to DataArray | ||
| numeric_homogeneous_dataframe = numeric_dtypes.flatmap( | ||
| lambda dt: pdst.data_frames(columns=pdst.columns(["a", "b", "c"], dtype=dt)) | ||
| ) | ||
|
|
||
|
|
||
| @given(numeric_homogeneous_dataframe) | ||
| def test_roundtrip_pandas_dataframe(df): | ||
| # Need to name the indexes, otherwise Xarray names them 'dim_0', 'dim_1'. | ||
| df.index.name = "rows" | ||
| df.columns.name = "cols" | ||
| arr = xr.DataArray(df) | ||
| roundtripped = arr.to_pandas() | ||
| pd.testing.assert_frame_equal(df, roundtripped) | ||
This comment was marked as resolved.
Sorry, something went wrong. |
||
| xr.testing.assert_identical(arr, roundtripped.to_xarray()) | ||
Uh oh!
There was an error while loading. Please reload this page.