diff --git a/docs/src/common_links.inc b/docs/src/common_links.inc index ec7e1efd6d..0b1017a7d8 100644 --- a/docs/src/common_links.inc +++ b/docs/src/common_links.inc @@ -21,7 +21,7 @@ .. _isort: https://pycqa.github.io/isort/ .. _issue: https://github.com/SciTools/iris/issues .. _issues: https://github.com/SciTools/iris/issues -.. _legacy documentation: https://scitools.org.uk/iris/docs/v2.4.0/ +.. _legacy documentation: https://github.com/SciTools/scitools.org.uk/tree/master/iris/docs/archive .. _matplotlib: https://matplotlib.org/stable/ .. _napolean: https://sphinxcontrib-napoleon.readthedocs.io/en/latest/sphinxcontrib.napoleon.html .. _nox: https://nox.thea.codes/en/stable/ diff --git a/docs/src/index.rst b/docs/src/index.rst index b9f7faaa03..c5d654ed31 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -88,6 +88,8 @@ Icons made by `FreePik `_ from `Flaticon `_ +.. _iris_support: + Support ~~~~~~~ @@ -101,7 +103,11 @@ The legacy support resources: * `Users Google Group `_ * `Developers Google Group `_ -* `Legacy Documentation`_ (Iris 2.4 or earlier) +* `Legacy Documentation`_ (Iris 2.4 or earlier). This is an archive of zip + files of past documentation. You can download, unzip and view the + documentation locally (index.html). There may be some incorrect rendering + and older javascvript (.js) files may show a warning when uncompressing, in + which case we suggest you use a different unzip tool. .. toctree:: diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index d7f340737f..8ea27d7667 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -35,10 +35,12 @@ This document explains the changes made to Iris for this release non-existing paths, and added expansion functionality to :func:`~iris.io.save`. (:issue:`4772`, :pull:`4913`) -#. `@hsteptoe`_ and `@trexfeathers`_ (reviewer) added :func:`iris.pandas.as_data_frame`, - which provides improved conversion of :class:`~iris.cube.Cube`\s to - :class:`~pandas.DataFrame`\s. This includes better handling of multiple - :class:`~iris.cube.Cube` dimensions, auxiliary coordinates and attribute information. +#. `@hsteptoe`_ and `@trexfeathers`_ improved + :func:`iris.pandas.as_data_frame`\'s conversion of :class:`~iris.cube.Cube`\s to + :class:`~pandas.DataFrame`\s. This includes better handling of multiple + :class:`~iris.cube.Cube` dimensions, auxiliary coordinates and attribute + information. **Note:** the improvements are opt-in, by setting the + :obj:`iris.FUTURE.pandas_ndim` flag (see :class:`iris.Future` for more). (:issue:`4526`, :pull:`4669`) @@ -89,6 +91,8 @@ This document explains the changes made to Iris for this release #. N/A +#. `@tkknight`_ updated the links for the Iris documentation to v2.4 and + earlier to point to the archive of zip files instead. (:pull:`5064`) 💼 Internal =========== diff --git a/docs/src/why_iris.rst b/docs/src/why_iris.rst index 63a515f68e..82b791b4bd 100644 --- a/docs/src/why_iris.rst +++ b/docs/src/why_iris.rst @@ -40,5 +40,4 @@ Interoperability with packages from the wider scientific Python ecosystem comes from Iris' use of standard NumPy/dask arrays as its underlying data storage. Iris is part of SciTools, for more information see https://scitools.org.uk/. -For **Iris 2.4** and earlier documentation please see the -:link-badge:`https://scitools.org.uk/iris/docs/v2.4.0/,"legacy documentation",cls=badge-info text-white`. +For **Iris 2.4** and earlier documentation please see :ref:`iris_support`. \ No newline at end of file diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index 70dcaa60de..896b850541 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -147,21 +147,21 @@ def __init__(self, datum_support=False, pandas_ndim=False): To adjust the values simply update the relevant attribute from within your code. For example:: + # example_future_flag is a fictional example. iris.FUTURE.example_future_flag = False If Iris code is executed with multiple threads, note the values of these options are thread-specific. - .. note:: - - iris.FUTURE.example_future_flag does not exist. It is provided - as an example. - - .. todo:: - - Document the ``pandas_ndim`` flag once iris#4669 is merged - can - add cross-referencing documentation both here and in - iris.pandas.as_dataframe(). + Parameters + ---------- + datum_support : bool, default=False + Opts in to loading coordinate system datum information from NetCDF + files into :class:`~iris.coord_systems.CoordSystem`\\ s, wherever + this information is present. + pandas_ndim : bool, default=False + See :func:`iris.pandas.as_data_frame` for details - opts in to the + newer n-dimensional behaviour. """ # The flag 'example_future_flag' is provided as a reference for the @@ -218,14 +218,11 @@ def context(self, **kwargs): statement, the previous state is restored. For example:: + + # example_future_flag is a fictional example. with iris.FUTURE.context(example_future_flag=False): # ... code that expects some past behaviour - .. note:: - - iris.FUTURE.example_future_flag does not exist and is - provided only as an example. - """ # Save the current context current_state = self.__dict__.copy() diff --git a/lib/iris/pandas.py b/lib/iris/pandas.py index 1bf9509d1b..faa250285e 100644 --- a/lib/iris/pandas.py +++ b/lib/iris/pandas.py @@ -378,7 +378,10 @@ def as_cubes( ) raise ValueError(message) - if not pandas_index.is_monotonic: + if not ( + pandas_index.is_monotonic_increasing + or pandas_index.is_monotonic_decreasing + ): # Need monotonic index for use in DimCoord(s). # This function doesn't sort_index itself since that breaks the # option to return a data view instead of a copy. @@ -627,7 +630,7 @@ def as_data_frame( add_ancillary_variables=False, ): """ - Convert a 2D cube to a Pandas DataFrame. + Convert a :class:`~iris.cube.Cube` to a :class:`pandas.DataFrame`. :attr:`~iris.cube.Cube.dim_coords` and :attr:`~iris.cube.Cube.data` are flattened into a long-style :class:`~pandas.DataFrame`. Other @@ -658,6 +661,29 @@ def as_data_frame( A :class:`~pandas.DataFrame` with :class:`~iris.cube.Cube` dimensions forming a :class:`~pandas.MultiIndex` + Warnings + -------- + #. This documentation is for the new ``as_data_frame()`` behaviour, which + is **currently opt-in** to preserve backwards compatibility. The default + legacy behaviour is documented in pre-``v3.4`` documentation (summary: + limited to 2-dimensional :class:`~iris.cube.Cube`\\ s, with only the + :attr:`~iris.cube.Cube.data` and :attr:`~iris.cube.Cube.dim_coords` + being added). The legacy behaviour will be removed in a future version + of Iris, so please opt-in to the new behaviour at your earliest + convenience, via :class:`iris.Future`: + + >>> iris.FUTURE.pandas_ndim = True + + **Breaking change:** to enable the improvements, the new opt-in + behaviour flattens multi-dimensional data into a single + :class:`~pandas.DataFrame` column (the legacy behaviour preserves 2 + dimensions via rows and columns). + + | + + #. Where the :class:`~iris.cube.Cube` contains masked values, these become + :data:`numpy.nan` in the returned :class:`~pandas.DataFrame`. + Notes ----- Dask ``DataFrame``\\s are not supported. @@ -669,11 +695,6 @@ def as_data_frame( :class:`~iris.cube.Cube` data `dtype` is preserved. - Warnings - -------- - Where the :class:`~iris.cube.Cube` contains masked values, these become - :data:`numpy.nan` in the returned :class:`~pandas.DataFrame`. - Examples -------- >>> import iris @@ -817,37 +838,72 @@ def merge_metadata(meta_var_list): ) return data_frame - # Checks - if not isinstance(cube, iris.cube.Cube): - raise TypeError( - f"Expected input to be iris.cube.Cube instance, got: {type(cube)}" + if iris.FUTURE.pandas_ndim: + # Checks + if not isinstance(cube, iris.cube.Cube): + raise TypeError( + f"Expected input to be iris.cube.Cube instance, got: {type(cube)}" + ) + if copy: + data = cube.data.copy() + else: + data = cube.data + if ma.isMaskedArray(data): + if not copy: + raise ValueError("Masked arrays must always be copied.") + data = data.astype("f").filled(np.nan) + + # Extract dim coord information: separate lists for dim names and dim values + coord_names, coords = _make_dim_coord_list(cube) + # Make base DataFrame + index = pandas.MultiIndex.from_product(coords, names=coord_names) + data_frame = pandas.DataFrame( + data.ravel(), columns=[cube.name()], index=index ) - if copy: - data = cube.data.copy() + + if add_aux_coords: + data_frame = merge_metadata(_make_aux_coord_list(cube)) + if add_ancillary_variables: + data_frame = merge_metadata(_make_ancillary_variables_list(cube)) + if add_cell_measures: + data_frame = merge_metadata(_make_cell_measures_list(cube)) + + if copy: + result = data_frame.reorder_levels(coord_names).sort_index() + else: + data_frame.reorder_levels(coord_names).sort_index(inplace=True) + result = data_frame + else: + message = ( + "You are using legacy 2-dimensional behaviour in" + "'iris.pandas.as_data_frame()'. This will be removed in a future" + "version of Iris. Please opt-in to the improved " + "n-dimensional behaviour at your earliest convenience by setting: " + "'iris.FUTURE.pandas_ndim = True'. More info is in the " + "documentation." + ) + warnings.warn(message, FutureWarning) + + # The legacy behaviour. data = cube.data - if ma.isMaskedArray(data): + if ma.isMaskedArray(data): + if not copy: + raise ValueError("Masked arrays must always be copied.") + data = data.astype("f").filled(np.nan) + elif copy: + data = data.copy() + + index = columns = None + if cube.coords(dimensions=[0]): + index = _as_pandas_coord(cube.coord(dimensions=[0])) + if cube.coords(dimensions=[1]): + columns = _as_pandas_coord(cube.coord(dimensions=[1])) + + data_frame = pandas.DataFrame(data, index, columns) if not copy: - raise ValueError("Masked arrays must always be copied.") - data = data.astype("f").filled(np.nan) - - # Extract dim coord information: separate lists for dim names and dim values - coord_names, coords = _make_dim_coord_list(cube) - # Make base DataFrame - index = pandas.MultiIndex.from_product(coords, names=coord_names) - data_frame = pandas.DataFrame( - data.ravel(), columns=[cube.name()], index=index - ) + _assert_shared(data, data_frame) - if add_aux_coords: - data_frame = merge_metadata(_make_aux_coord_list(cube)) - if add_ancillary_variables: - data_frame = merge_metadata(_make_ancillary_variables_list(cube)) - if add_cell_measures: - data_frame = merge_metadata(_make_cell_measures_list(cube)) + result = data_frame - if copy: - return data_frame.reorder_levels(coord_names).sort_index() - else: - data_frame.reorder_levels(coord_names).sort_index(inplace=True) - return data_frame + return result diff --git a/lib/iris/tests/test_pandas.py b/lib/iris/tests/test_pandas.py index 4841108aa2..60a271c53b 100644 --- a/lib/iris/tests/test_pandas.py +++ b/lib/iris/tests/test_pandas.py @@ -11,6 +11,7 @@ import copy import datetime from termios import IXOFF # noqa: F401 +import warnings import cf_units import cftime @@ -42,10 +43,306 @@ import iris.pandas +@pytest.fixture +def activate_pandas_ndim(): + iris.FUTURE.pandas_ndim = True + yield None + iris.FUTURE.pandas_ndim = False + + @skip_pandas +@pytest.mark.filterwarnings( + "ignore:.*as_series has been deprecated.*:iris._deprecation.IrisDeprecation" +) +class TestAsSeries(tests.IrisTest): + """Test conversion of 1D cubes to Pandas using as_series()""" + + def test_no_dim_coord(self): + cube = Cube(np.array([0, 1, 2, 3, 4]), long_name="foo") + series = iris.pandas.as_series(cube) + expected_index = np.array([0, 1, 2, 3, 4]) + self.assertArrayEqual(series, cube.data) + self.assertArrayEqual(series.index, expected_index) + + def test_simple(self): + cube = Cube(np.array([0, 1, 2, 3, 4.4]), long_name="foo") + dim_coord = DimCoord([5, 6, 7, 8, 9], long_name="bar") + cube.add_dim_coord(dim_coord, 0) + expected_index = np.array([5, 6, 7, 8, 9]) + series = iris.pandas.as_series(cube) + self.assertArrayEqual(series, cube.data) + self.assertArrayEqual(series.index, expected_index) + + def test_masked(self): + data = np.ma.MaskedArray([0, 1, 2, 3, 4.4], mask=[0, 1, 0, 1, 0]) + cube = Cube(data, long_name="foo") + series = iris.pandas.as_series(cube) + self.assertArrayEqual(series, cube.data.astype("f").filled(np.nan)) + + def test_time_standard(self): + cube = Cube(np.array([0, 1, 2, 3, 4]), long_name="ts") + time_coord = DimCoord( + [0, 100.1, 200.2, 300.3, 400.4], + long_name="time", + units="days since 2000-01-01 00:00", + ) + cube.add_dim_coord(time_coord, 0) + expected_index = [ + datetime.datetime(2000, 1, 1, 0, 0), + datetime.datetime(2000, 4, 10, 2, 24), + datetime.datetime(2000, 7, 19, 4, 48), + datetime.datetime(2000, 10, 27, 7, 12), + datetime.datetime(2001, 2, 4, 9, 36), + ] + series = iris.pandas.as_series(cube) + self.assertArrayEqual(series, cube.data) + assert list(series.index) == expected_index + + def test_time_360(self): + cube = Cube(np.array([0, 1, 2, 3, 4]), long_name="ts") + time_unit = cf_units.Unit( + "days since 2000-01-01 00:00", calendar=cf_units.CALENDAR_360_DAY + ) + time_coord = DimCoord( + [0, 100.1, 200.2, 300.3, 400.4], long_name="time", units=time_unit + ) + cube.add_dim_coord(time_coord, 0) + expected_index = [ + cftime.Datetime360Day(2000, 1, 1, 0, 0), + cftime.Datetime360Day(2000, 4, 11, 2, 24), + cftime.Datetime360Day(2000, 7, 21, 4, 48), + cftime.Datetime360Day(2000, 11, 1, 7, 12), + cftime.Datetime360Day(2001, 2, 11, 9, 36), + ] + + series = iris.pandas.as_series(cube) + self.assertArrayEqual(series, cube.data) + self.assertArrayEqual(series.index, expected_index) + + def test_copy_true(self): + cube = Cube(np.array([0, 1, 2, 3, 4]), long_name="foo") + series = iris.pandas.as_series(cube) + series[0] = 99 + assert cube.data[0] == 0 + + def test_copy_int32_false(self): + cube = Cube(np.array([0, 1, 2, 3, 4], dtype=np.int32), long_name="foo") + series = iris.pandas.as_series(cube, copy=False) + series[0] = 99 + assert cube.data[0] == 99 + + def test_copy_int64_false(self): + cube = Cube(np.array([0, 1, 2, 3, 4], dtype=np.int64), long_name="foo") + series = iris.pandas.as_series(cube, copy=False) + series[0] = 99 + assert cube.data[0] == 99 + + def test_copy_float_false(self): + cube = Cube(np.array([0, 1, 2, 3.3, 4]), long_name="foo") + series = iris.pandas.as_series(cube, copy=False) + series[0] = 99 + assert cube.data[0] == 99 + + def test_copy_masked_true(self): + data = np.ma.MaskedArray([0, 1, 2, 3, 4], mask=[0, 1, 0, 1, 0]) + cube = Cube(data, long_name="foo") + series = iris.pandas.as_series(cube) + series[0] = 99 + assert cube.data[0] == 0 + + def test_copy_masked_false(self): + data = np.ma.MaskedArray([0, 1, 2, 3, 4], mask=[0, 1, 0, 1, 0]) + cube = Cube(data, long_name="foo") + with pytest.raises(ValueError): + _ = iris.pandas.as_series(cube, copy=False) + + +@skip_pandas +@pytest.mark.filterwarnings( + "ignore:You are using legacy 2-dimensional behaviour.*:FutureWarning" +) class TestAsDataFrame(tests.IrisTest): """Test conversion of 2D cubes to Pandas using as_data_frame()""" + def test_no_dim_coords(self): + cube = Cube( + np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]), long_name="foo" + ) + expected_index = [0, 1] + expected_columns = [0, 1, 2, 3, 4] + data_frame = iris.pandas.as_data_frame(cube) + self.assertArrayEqual(data_frame, cube.data) + self.assertArrayEqual(data_frame.index, expected_index) + self.assertArrayEqual(data_frame.columns, expected_columns) + + def test_no_x_coord(self): + cube = Cube( + np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]), long_name="foo" + ) + y_coord = DimCoord([10, 11], long_name="bar") + cube.add_dim_coord(y_coord, 0) + expected_index = [10, 11] + expected_columns = [0, 1, 2, 3, 4] + data_frame = iris.pandas.as_data_frame(cube) + self.assertArrayEqual(data_frame, cube.data) + self.assertArrayEqual(data_frame.index, expected_index) + self.assertArrayEqual(data_frame.columns, expected_columns) + + def test_no_y_coord(self): + cube = Cube( + np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]), long_name="foo" + ) + x_coord = DimCoord([10, 11, 12, 13, 14], long_name="bar") + cube.add_dim_coord(x_coord, 1) + expected_index = [0, 1] + expected_columns = [10, 11, 12, 13, 14] + data_frame = iris.pandas.as_data_frame(cube) + self.assertArrayEqual(data_frame, cube.data) + self.assertArrayEqual(data_frame.index, expected_index) + self.assertArrayEqual(data_frame.columns, expected_columns) + + def test_simple(self): + cube = Cube( + np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]), long_name="foo" + ) + x_coord = DimCoord([10, 11, 12, 13, 14], long_name="bar") + y_coord = DimCoord([15, 16], long_name="milk") + cube.add_dim_coord(x_coord, 1) + cube.add_dim_coord(y_coord, 0) + expected_index = [15, 16] + expected_columns = [10, 11, 12, 13, 14] + data_frame = iris.pandas.as_data_frame(cube) + self.assertArrayEqual(data_frame, cube.data) + self.assertArrayEqual(data_frame.index, expected_index) + self.assertArrayEqual(data_frame.columns, expected_columns) + + def test_masked(self): + data = np.ma.MaskedArray( + [[0, 1, 2, 3, 4.4], [5, 6, 7, 8, 9]], + mask=[[0, 1, 0, 1, 0], [1, 0, 1, 0, 1]], + ) + cube = Cube(data, long_name="foo") + expected_index = [0, 1] + expected_columns = [0, 1, 2, 3, 4] + data_frame = iris.pandas.as_data_frame(cube) + self.assertArrayEqual(data_frame, cube.data.astype("f").filled(np.nan)) + self.assertArrayEqual(data_frame.index, expected_index) + self.assertArrayEqual(data_frame.columns, expected_columns) + + def test_time_standard(self): + cube = Cube( + np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]), long_name="ts" + ) + day_offsets = [0, 100.1, 200.2, 300.3, 400.4] + time_coord = DimCoord( + day_offsets, long_name="time", units="days since 2000-01-01 00:00" + ) + cube.add_dim_coord(time_coord, 1) + data_frame = iris.pandas.as_data_frame(cube) + self.assertArrayEqual(data_frame, cube.data) + nanoseconds_per_day = 24 * 60 * 60 * 1000000000 + days_to_2000 = 365 * 30 + 7 + # pandas Timestamp class cannot handle floats in pandas