From c0cafbb50bac92f2406c547111e201a77138f73b Mon Sep 17 00:00:00 2001 From: Bhuvana KA Date: Wed, 2 Oct 2019 11:59:50 +0530 Subject: [PATCH 01/22] DOC: Styler errors PR08 and PR09 (#28743) --- pandas/io/formats/style.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 95e1084747aa3..c1af3f93f44eb 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -54,14 +54,18 @@ class Styler: Parameters ---------- data : Series or DataFrame + Data to be styled - either a Series or DataFrame. precision : int - precision to round floats to, defaults to pd.options.display.precision + Precision to round floats to, defaults to pd.options.display.precision. table_styles : list-like, default None - list of {selector: (attr, value)} dicts; see Notes + List of {selector: (attr, value)} dicts; see Notes. uuid : str, default None - a unique identifier to avoid CSS collisions; generated automatically + A unique identifier to avoid CSS collisions; generated automatically. caption : str, default None - caption to attach to the table + Caption to attach to the table. + table_attributes : str, default None + Items that show up in the opening ```` tag + in addition to automatic (by default) id. cell_ids : bool, default True If True, each cell will have an ``id`` attribute in their HTML tag. The ``id`` takes the form ``T__row_col`` @@ -76,7 +80,8 @@ class Styler: See Also -------- - DataFrame.style + DataFrame.style : Return a Styler object containing methods for building + a styled HTML representation for the DataFrame. Notes ----- From b29869694c8d5081ca3b5b52b84fa4eefad10b85 Mon Sep 17 00:00:00 2001 From: Joshua Smith Date: Wed, 2 Oct 2019 02:43:08 -0400 Subject: [PATCH 02/22] TST: add test to confirm adding 'm' to a Series of strings does not error (#28736) --- pandas/tests/series/test_arithmetic.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 89557445cafb4..68d6169fa4f34 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -66,6 +66,21 @@ def test_add_series_with_period_index(self): with pytest.raises(IncompatibleFrequency, match=msg): ts + ts.asfreq("D", how="end") + @pytest.mark.parametrize( + "target_add,input_value,expected_value", + [ + ("!", ["hello", "world"], ["hello!", "world!"]), + ("m", ["hello", "world"], ["hellom", "worldm"]), + ], + ) + def test_string_addition(self, target_add, input_value, expected_value): + # GH28658 - ensure adding 'm' does not raise an error + a = Series(input_value) + + result = a + target_add + expected = Series(expected_value) + tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ # Comparisons From 411dd249e755d7e281603fe3e0ab9e0e48383df9 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Wed, 2 Oct 2019 00:50:50 -0600 Subject: [PATCH 03/22] BUG: Fix RangeIndex.get_indexer for decreasing RangeIndex (#28680) --- doc/source/whatsnew/v0.25.2.rst | 1 + pandas/core/indexes/range.py | 5 +++-- pandas/tests/indexes/test_range.py | 8 ++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index f904d69d6421b..9789c9fce3541 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -50,6 +50,7 @@ Indexing ^^^^^^^^ - Fix regression in :meth:`DataFrame.reindex` not following ``limit`` argument (:issue:`28631`). +- Fix regression in :meth:`RangeIndex.get_indexer` for decreasing :class:`RangeIndex` where target values may be improperly identified as missing/present (:issue:`28678`) - - diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 43445a0d5d5a2..6e2d500f4c5ab 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -388,8 +388,9 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if self.step > 0: start, stop, step = self.start, self.stop, self.step else: - # Work on reversed range for simplicity: - start, stop, step = (self.stop - self.step, self.start + 1, -self.step) + # GH 28678: work on reversed range for simplicity + reverse = self._range[::-1] + start, stop, step = reverse.start, reverse.stop, reverse.step target_array = np.asarray(target) if not (is_integer_dtype(target_array) and target_array.ndim == 1): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 7e08a5deaff7a..627c5cc56e010 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -424,6 +424,14 @@ def test_get_indexer_limit(self): expected = np.array([0, 1, 2, 3, 3, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("stop", [0, -1, -2]) + def test_get_indexer_decreasing(self, stop): + # GH 28678 + index = RangeIndex(7, stop, -3) + result = index.get_indexer(range(9)) + expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + def test_join_outer(self): # join with Int64Index other = Int64Index(np.arange(25, 14, -1)) From e9487b437876da221065ab75bd40bcaefa8bf414 Mon Sep 17 00:00:00 2001 From: Victoria Zdanovskaya Date: Wed, 2 Oct 2019 09:50:19 +0100 Subject: [PATCH 04/22] replaced safe_import with a corresponding test decorator (#28731) --- pandas/tests/io/test_gcs.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 05d86d2c8aa5b..8313b85d75ca7 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -108,9 +108,7 @@ def mock_get_filepath_or_buffer(*args, **kwargs): assert_frame_equal(df1, df2) -@pytest.mark.skipif( - td.safe_import("gcsfs"), reason="Only check when gcsfs not installed" -) +@td.skip_if_installed("gcsfs") def test_gcs_not_present_exception(): with pytest.raises(ImportError) as e: read_csv("gs://test/test.csv") From d9ac8c0412716be05dd0232219688ee53eef5141 Mon Sep 17 00:00:00 2001 From: "Laura Collard, PhD" <35954013+LauraCollard@users.noreply.github.com> Date: Wed, 2 Oct 2019 09:59:25 +0100 Subject: [PATCH 05/22] DOC: Fixed PR06 docstrings errors in pandas.timedelta_range (#28719) --- pandas/core/indexes/timedeltas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b03d60c7b5b37..49dcea4da5760 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -717,17 +717,17 @@ def timedelta_range( Parameters ---------- - start : string or timedelta-like, default None + start : str or timedelta-like, default None Left bound for generating timedeltas - end : string or timedelta-like, default None + end : str or timedelta-like, default None Right bound for generating timedeltas - periods : integer, default None + periods : int, default None Number of periods to generate - freq : string or DateOffset, default 'D' + freq : str or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H' - name : string, default None + name : str, default None Name of the resulting TimedeltaIndex - closed : string, default None + closed : str, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) From 4c97af98d13794e8f08ddeed1fcff6ac0c8e6892 Mon Sep 17 00:00:00 2001 From: Oluokun Adedayo Date: Wed, 2 Oct 2019 11:04:30 +0100 Subject: [PATCH 06/22] Added doctstring to fixture (#28727) --- pandas/tests/io/excel/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py index 54acd2128369d..843b3c08421b3 100644 --- a/pandas/tests/io/excel/conftest.py +++ b/pandas/tests/io/excel/conftest.py @@ -7,6 +7,9 @@ @pytest.fixture def frame(float_frame): + """ + Returns the first ten items in fixture "float_frame". + """ return float_frame[:10] From 0f06fd099a5f5884bd4fafa400cb62bcf5476c10 Mon Sep 17 00:00:00 2001 From: tadashigaki Date: Wed, 2 Oct 2019 20:44:26 +0900 Subject: [PATCH 07/22] BUG: Fix dep generation (#28734) Closes #28714 --- requirements-dev.txt | 2 +- scripts/generate_pip_deps_from_conda.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 698e4f3aea094..e677d835b56a5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -17,7 +17,7 @@ numpydoc>=0.9.0 nbconvert>=5.4.1 nbsphinx pandoc -dask-core +dask toolz>=0.7.3 fsspec>=0.5.1 partd>=0.3.10 diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 44fe50b99560a..f1c7c3298fb26 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -48,6 +48,9 @@ def conda_package_to_pip(package): break + if package in RENAME: + return RENAME[package] + return package From fc6247db240de201af8692db5562ac047acefa0c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 04:52:26 -0700 Subject: [PATCH 08/22] REF: Consolidate alignment calls in DataFrame ops (#28638) --- pandas/core/frame.py | 19 ++++++------------- pandas/core/ops/__init__.py | 21 ++++++++++++--------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0f6fa43e02089..67360122ed021 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5279,24 +5279,17 @@ def _arith_op(left, right): new_data = dispatch_fill_zeros(func, this.values, other.values, res_values) return this._construct_result(new_data) - def _combine_match_index(self, other, func, level=None): - left, right = self.align(other, join="outer", axis=0, level=level, copy=False) - # at this point we have `left.index.equals(right.index)` + def _combine_match_index(self, other, func): + # at this point we have `self.index.equals(other.index)` - if left._is_mixed_type or right._is_mixed_type: + if self._is_mixed_type or other._is_mixed_type: # operate column-wise; avoid costly object-casting in `.values` - new_data = ops.dispatch_to_series(left, right, func) + new_data = ops.dispatch_to_series(self, other, func) else: # fastpath --> operate directly on values with np.errstate(all="ignore"): - new_data = func(left.values.T, right.values).T - return left._construct_result(new_data) - - def _combine_match_columns(self, other: Series, func, level=None): - left, right = self.align(other, join="outer", axis=1, level=level, copy=False) - # at this point we have `left.columns.equals(right.index)` - new_data = ops.dispatch_to_series(left, right, func, axis="columns") - return left._construct_result(new_data) + new_data = func(self.values.T, other.values).T + return new_data def _construct_result(self, result) -> "DataFrame": """ diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 79272c5643281..c979b473ad09a 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -384,7 +384,7 @@ def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} elif isinstance(right, ABCSeries) and axis == "columns": - # We only get here if called via left._combine_match_columns, + # We only get here if called via _combine_frame_series, # in which case we specifically want to operate row-by-row assert right.index.equals(left.columns) @@ -613,15 +613,18 @@ def _combine_series_frame(self, other, func, fill_value=None, axis=None, level=N "fill_value {fill} not supported.".format(fill=fill_value) ) - if axis is not None: - axis = self._get_axis_number(axis) - if axis == 0: - return self._combine_match_index(other, func, level=level) - else: - return self._combine_match_columns(other, func, level=level) + if axis is None: + # default axis is columns + axis = 1 + + axis = self._get_axis_number(axis) + left, right = self.align(other, join="outer", axis=axis, level=level, copy=False) + if axis == 0: + new_data = left._combine_match_index(right, func) + else: + new_data = dispatch_to_series(left, right, func, axis="columns") - # default axis is columns - return self._combine_match_columns(other, func, level=level) + return left._construct_result(new_data) def _align_method_FRAME(left, right, axis): From 7ae2c09dbc1b1d194af14de47b0c4fc62fd07258 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 04:55:35 -0700 Subject: [PATCH 09/22] TST: Fix broken test cases where Timedelta/Timestamp raise (#28729) --- pandas/core/dtypes/cast.py | 19 +++++++++++++------ pandas/tests/dtypes/cast/test_promote.py | 18 +----------------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 08176af2b326d..5801384bf8db9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -350,14 +350,21 @@ def maybe_promote(dtype, fill_value=np.nan): # returns tuple of (dtype, fill_value) if issubclass(dtype.type, np.datetime64): - fill_value = tslibs.Timestamp(fill_value).to_datetime64() + try: + fill_value = tslibs.Timestamp(fill_value).to_datetime64() + except (TypeError, ValueError): + dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.timedelta64): - fv = tslibs.Timedelta(fill_value) - if fv is NaT: - # NaT has no `to_timedelta6` method - fill_value = np.timedelta64("NaT", "ns") + try: + fv = tslibs.Timedelta(fill_value) + except ValueError: + dtype = np.dtype(np.object_) else: - fill_value = fv.to_timedelta64() + if fv is NaT: + # NaT has no `to_timedelta64` method + fill_value = np.timedelta64("NaT", "ns") + else: + fill_value = fv.to_timedelta64() elif is_datetime64tz_dtype(dtype): if isna(fill_value): fill_value = NaT diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index cf7a168074e9e..1b7de9b20f42f 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -272,10 +272,6 @@ def test_maybe_promote_any_with_bool(any_numpy_dtype_reduced, box): pytest.xfail("falsely upcasts to object") if boxed and dtype not in (str, object) and box_dtype is None: pytest.xfail("falsely upcasts to object") - if not boxed and dtype.kind == "M": - pytest.xfail("raises error") - if not boxed and dtype.kind == "m": - pytest.xfail("raises error") # filling anything but bool with bool casts to object expected_dtype = np.dtype(object) if dtype != bool else dtype @@ -348,8 +344,6 @@ def test_maybe_promote_any_with_datetime64( or (box_dtype is None and is_datetime64_dtype(type(fill_value))) ): pytest.xfail("mix of lack of upcasting, resp. wrong missing value") - if not boxed and is_timedelta64_dtype(dtype): - pytest.xfail("raises error") # special case for box_dtype box_dtype = np.dtype(datetime64_dtype) if box_dtype == "dt_dtype" else box_dtype @@ -490,9 +484,7 @@ def test_maybe_promote_any_numpy_dtype_with_datetimetz( fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) boxed, box_dtype = box # read from parametrized fixture - if dtype.kind == "m" and not boxed: - pytest.xfail("raises error") - elif dtype.kind == "M" and not boxed: + if dtype.kind == "M" and not boxed: pytest.xfail("Comes back as M8 instead of object") fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] @@ -549,8 +541,6 @@ def test_maybe_promote_any_with_timedelta64( else: if boxed and box_dtype is None and is_timedelta64_dtype(type(fill_value)): pytest.xfail("does not upcast correctly") - if not boxed and is_datetime64_dtype(dtype): - pytest.xfail("raises error") # special case for box_dtype box_dtype = np.dtype(timedelta64_dtype) if box_dtype == "td_dtype" else box_dtype @@ -622,9 +612,6 @@ def test_maybe_promote_any_with_string(any_numpy_dtype_reduced, string_dtype, bo fill_dtype = np.dtype(string_dtype) boxed, box_dtype = box # read from parametrized fixture - if is_datetime_or_timedelta_dtype(dtype) and box_dtype != object: - pytest.xfail("does not upcast or raises") - # create array of given dtype fill_value = "abc" @@ -678,9 +665,6 @@ def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype, bo dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if not boxed and is_datetime_or_timedelta_dtype(dtype): - pytest.xfail("raises error") - # create array of object dtype from a scalar value (i.e. passing # dtypes.common.is_scalar), which can however not be cast to int/float etc. fill_value = pd.DateOffset(1) From 4e5c9d413ab58033529899de990bf6b25bcdeb22 Mon Sep 17 00:00:00 2001 From: Oluokun Adedayo Date: Wed, 2 Oct 2019 15:35:27 +0100 Subject: [PATCH 10/22] DOC: Fixed doctring errors PR08, PR09 in pandas.io (#28748) --- pandas/io/formats/style.py | 62 ++++++++++++++++----------------- pandas/io/json/_table_schema.py | 2 +- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c1af3f93f44eb..9c4746f4d68e3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -642,16 +642,16 @@ def apply(self, func, axis=0, subset=None, **kwargs): ``func`` should take a Series or DataFrame (depending on ``axis``), and return an object with the same shape. Must return a DataFrame with identical index and - column labels when ``axis=None`` + column labels when ``axis=None``. axis : {0 or 'index', 1 or 'columns', None}, default 0 - apply to each column (``axis=0`` or ``'index'``), to each row + Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once with ``axis=None``. subset : IndexSlice - a valid indexer to limit ``data`` to *before* applying the - function. Consider using a pandas.IndexSlice + A valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice. **kwargs : dict - pass along to ``func`` + Pass along to ``func``. Returns ------- @@ -698,12 +698,12 @@ def applymap(self, func, subset=None, **kwargs): Parameters ---------- func : function - ``func`` should take a scalar and return a scalar + ``func`` should take a scalar and return a scalar. subset : IndexSlice - a valid indexer to limit ``data`` to *before* applying the - function. Consider using a pandas.IndexSlice + A valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice. **kwargs : dict - pass along to ``func`` + Pass along to ``func``. Returns ------- @@ -729,16 +729,16 @@ def where(self, cond, value, other=None, subset=None, **kwargs): Parameters ---------- cond : callable - ``cond`` should take a scalar and return a boolean + ``cond`` should take a scalar and return a boolean. value : str - applied when ``cond`` returns true + Applied when ``cond`` returns true. other : str - applied when ``cond`` returns false + Applied when ``cond`` returns false. subset : IndexSlice - a valid indexer to limit ``data`` to *before* applying the - function. Consider using a pandas.IndexSlice + A valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice. **kwargs : dict - pass along to ``cond`` + Pass along to ``cond``. Returns ------- @@ -819,7 +819,7 @@ def use(self, styles): Parameters ---------- styles : list - list of style functions + List of style functions. Returns ------- @@ -969,19 +969,19 @@ def background_gradient( Parameters ---------- cmap : str or colormap - matplotlib colormap + Matplotlib colormap. low : float - compress the range by the low. + Compress the range by the low. high : float - compress the range by the high. + Compress the range by the high. axis : {0 or 'index', 1 or 'columns', None}, default 0 - apply to each column (``axis=0`` or ``'index'``), to each row + Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once with ``axis=None``. subset : IndexSlice - a valid slice for ``data`` to limit the style application to. + A valid slice for ``data`` to limit the style application to. text_color_threshold : float or int - luminance threshold for determining text color. Facilitates text + Luminance threshold for determining text color. Facilitates text visibility across varying background colors. From 0 to 1. 0 = all text is dark colored, 1 = all text is light colored. @@ -1084,9 +1084,9 @@ def set_properties(self, subset=None, **kwargs): Parameters ---------- subset : IndexSlice - a valid slice for ``data`` to limit the style application to + A valid slice for ``data`` to limit the style application to. **kwargs : dict - property: value pairs to be set for each cell + A dictionary of property, value pairs to be set for each cell. Returns ------- @@ -1180,7 +1180,7 @@ def bar( subset : IndexSlice, optional A valid slice for `data` to limit the style application to. axis : {0 or 'index', 1 or 'columns', None}, default 0 - apply to each column (``axis=0`` or ``'index'``), to each row + Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once with ``axis=None``. color : str or 2-tuple/list @@ -1256,10 +1256,10 @@ def highlight_max(self, subset=None, color="yellow", axis=0): Parameters ---------- subset : IndexSlice, default None - a valid slice for ``data`` to limit the style application to. + A valid slice for ``data`` to limit the style application to. color : str, default 'yellow' axis : {0 or 'index', 1 or 'columns', None}, default 0 - apply to each column (``axis=0`` or ``'index'``), to each row + Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once with ``axis=None``. @@ -1276,10 +1276,10 @@ def highlight_min(self, subset=None, color="yellow", axis=0): Parameters ---------- subset : IndexSlice, default None - a valid slice for ``data`` to limit the style application to. + A valid slice for ``data`` to limit the style application to. color : str, default 'yellow' axis : {0 or 'index', 1 or 'columns', None}, default 0 - apply to each column (``axis=0`` or ``'index'``), to each row + Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once with ``axis=None``. @@ -1328,9 +1328,9 @@ def from_custom_template(cls, searchpath, name): Parameters ---------- searchpath : str or list - Path or paths of directories containing the templates + Path or paths of directories containing the templates. name : str - Name of your custom template to use for rendering + Name of your custom template to use for rendering. Returns ------- diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index b142dbf76e6b3..9016e8a98e5ba 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -199,7 +199,7 @@ def build_table_schema(data, index=True, primary_key=None, version=True): index : bool, default True Whether to include ``data.index`` in the schema. primary_key : bool or None, default True - column names to designate as the primary key. + Column names to designate as the primary key. The default `None` will set `'primaryKey'` to the index level or levels if the index is unique. version : bool, default True From 9ab347808e8fd170ef68276c3edb8246b5d30cb4 Mon Sep 17 00:00:00 2001 From: Angela Ambroz Date: Wed, 2 Oct 2019 11:47:12 -0400 Subject: [PATCH 11/22] DOC: Pandas.Series.drop docstring PR02 (#27976) (#28742) --- pandas/core/series.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7836ba16bc58d..44a29f73c51e7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4157,9 +4157,13 @@ def drop( Index labels to drop. axis : 0, default 0 Redundant for application on Series. - index, columns : None - Redundant for application on Series, but index can be used instead - of labels. + index : single label or list-like + Redundant for application on Series, but 'index' can be used instead + of 'labels'. + + .. versionadded:: 0.21.0 + columns : single label or list-like + No change is made to the Series; use 'index' or 'labels' instead. .. versionadded:: 0.21.0 level : int or level name, optional From 0436570f05c3b6e7bbb7c7d8fc8fa2f28a0420a8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 12:58:33 -0700 Subject: [PATCH 12/22] BUG: Fix TypeError raised in libreduction (#28643) --- pandas/_libs/reduction.pyx | 26 ++++++++++++++-------- pandas/core/apply.py | 32 +++++++++++++++++++++------- pandas/tests/groupby/test_groupby.py | 6 +----- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 361c21c18c4da..a7d6d19bbc80d 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -15,7 +15,7 @@ from numpy cimport (ndarray, cnp.import_array() cimport pandas._libs.util as util -from pandas._libs.lib import maybe_convert_objects, values_from_object +from pandas._libs.lib import maybe_convert_objects cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): @@ -23,7 +23,7 @@ cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt): if (util.is_array(obj) or (isinstance(obj, list) and len(obj) == cnt) or getattr(obj, 'shape', None) == (cnt,)): - raise ValueError('function does not reduce') + raise ValueError('Function does not reduce') return np.empty(size, dtype='O') @@ -103,7 +103,7 @@ cdef class Reducer: ndarray arr, result, chunk Py_ssize_t i, incr flatiter it - bint has_labels + bint has_labels, has_ndarray_labels object res, name, labels, index object cached_typ=None @@ -113,14 +113,18 @@ cdef class Reducer: chunk.data = arr.data labels = self.labels has_labels = labels is not None + has_ndarray_labels = util.is_array(labels) has_index = self.index is not None incr = self.increment try: for i in range(self.nresults): - if has_labels: + if has_ndarray_labels: name = util.get_value_at(labels, i) + elif has_labels: + # labels is an ExtensionArray + name = labels[i] else: name = None @@ -362,7 +366,8 @@ cdef class SeriesGrouper: def get_result(self): cdef: - ndarray arr, result + # Define result to avoid UnboundLocalError + ndarray arr, result = None ndarray[int64_t] labels, counts Py_ssize_t i, n, group_size, lab object res @@ -428,6 +433,9 @@ cdef class SeriesGrouper: islider.reset() vslider.reset() + if result is None: + raise ValueError("No result.") + if result.dtype == np.object_: result = maybe_convert_objects(result) @@ -639,11 +647,11 @@ def compute_reduction(arr, f, axis=0, dummy=None, labels=None): """ if labels is not None: - if labels._has_complex_internals: - raise Exception('Cannot use shortcut') + # Caller is responsible for ensuring we don't have MultiIndex + assert not labels._has_complex_internals - # pass as an ndarray - labels = values_from_object(labels) + # pass as an ndarray/ExtensionArray + labels = labels._values reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels) return reducer.get_result() diff --git a/pandas/core/apply.py b/pandas/core/apply.py index d093d7a145382..714423de34222 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -223,10 +223,12 @@ def apply_empty_result(self): def apply_raw(self): """ apply to the values as a numpy array """ - try: result = libreduction.compute_reduction(self.values, self.f, axis=self.axis) - except Exception: + except ValueError as err: + if "Function does not reduce" not in str(err): + # catch only ValueError raised intentionally in libreduction + raise result = np.apply_along_axis(self.f, self.axis, self.values) # TODO: mixed type case @@ -273,24 +275,38 @@ def apply_standard(self): if ( self.result_type in ["reduce", None] and not self.dtypes.apply(is_extension_type).any() + # Disallow complex_internals since libreduction shortcut + # cannot handle MultiIndex + and not self.agg_axis._has_complex_internals ): - # Create a dummy Series from an empty array - from pandas import Series - values = self.values index = self.obj._get_axis(self.axis) labels = self.agg_axis empty_arr = np.empty(len(index), dtype=values.dtype) - dummy = Series(empty_arr, index=index, dtype=values.dtype) + + # Preserve subclass for e.g. test_subclassed_apply + dummy = self.obj._constructor_sliced( + empty_arr, index=index, dtype=values.dtype + ) try: result = libreduction.compute_reduction( values, self.f, axis=self.axis, dummy=dummy, labels=labels ) - return self.obj._constructor_sliced(result, index=labels) - except Exception: + except ValueError as err: + if "Function does not reduce" not in str(err): + # catch only ValueError raised intentionally in libreduction + raise + except TypeError: + # e.g. test_apply_ignore_failures we just ignore + if not self.ignore_failures: + raise + except ZeroDivisionError: + # reached via numexpr; fall back to python implementation pass + else: + return self.obj._constructor_sliced(result, index=labels) # compute the result using the series generator self.apply_series_generator() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bec5cbc5fecb8..6212a37472000 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -775,11 +775,7 @@ def test_omit_nuisance(df): # won't work with axis = 1 grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) - msg = ( - r'\("unsupported operand type\(s\) for \+: ' - "'Timestamp' and 'float'\"" - r", 'occurred at index 0'\)" - ) + msg = r'\("unsupported operand type\(s\) for \+: ' "'Timestamp' and 'float'\", 0" with pytest.raises(TypeError, match=msg): grouped.agg(lambda x: x.sum(0, numeric_only=False)) From 868cf9eb9e503fac2b723c67c73c7340357b4472 Mon Sep 17 00:00:00 2001 From: Aniruddha Bhattacharjee Date: Thu, 3 Oct 2019 04:20:16 +0530 Subject: [PATCH 13/22] Fixed docstring errors in pandas.period range and pandas.PeriodIndex (#28756) --- pandas/core/indexes/period.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ee85b0fb91acb..0fc74f4e78c9f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -85,11 +85,11 @@ class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): Parameters ---------- - data : array-like (1d integer np.ndarray or PeriodArray), optional + data : array-like (1d int np.ndarray or PeriodArray), optional Optional period-like data to construct index with copy : bool Make a copy of input ndarray - freq : string or period object, optional + freq : str or period object, optional One of pandas period strings or corresponding objects start : starting value, period-like, optional If data is None, used as the start point in generating regular @@ -1001,18 +1001,18 @@ def period_range(start=None, end=None, periods=None, freq=None, name=None): Parameters ---------- - start : string or period-like, default None + start : str or period-like, default None Left bound for generating periods - end : string or period-like, default None + end : str or period-like, default None Right bound for generating periods - periods : integer, default None + periods : int, default None Number of periods to generate - freq : string or DateOffset, optional + freq : str or DateOffset, optional Frequency alias. By default the freq is taken from `start` or `end` if those are Period objects. Otherwise, the default is ``"D"`` for daily frequency. - name : string, default None + name : str, default None Name of the resulting PeriodIndex Returns From ef936f963ce3a8f7808ba8ffe77120c5cb3a5415 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Wed, 2 Oct 2019 18:55:31 -0500 Subject: [PATCH 14/22] CLN: Fix typo in contributing.rst (#28761) --- doc/source/development/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index 3cdf9b83e96f3..10d702808606a 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -952,7 +952,7 @@ the expected correct result:: Transitioning to ``pytest`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*pandas* existing test structure is *mostly* classed based, meaning that you will typically find tests wrapped in a class. +*pandas* existing test structure is *mostly* class-based, meaning that you will typically find tests wrapped in a class. .. code-block:: python From bf6c5c3c7b466d211df663ae11e3c027b85982bc Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 3 Oct 2019 07:56:10 +0100 Subject: [PATCH 15/22] DEPR: Deprecate Index.set_value (#28621) --- doc/source/reference/indexing.rst | 1 - doc/source/whatsnew/v1.0.0.rst | 4 +++- pandas/core/indexes/base.py | 14 +++++++++++++- pandas/tests/indexes/test_base.py | 13 +++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 576f734d517aa..dd59a99b3df9e 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -166,7 +166,6 @@ Selecting Index.get_slice_bound Index.get_value Index.get_values - Index.set_value Index.isin Index.slice_indexer Index.slice_locs diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2668734031ee1..16d23d675a8bb 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -123,7 +123,9 @@ Documentation Improvements Deprecations ~~~~~~~~~~~~ -- +- ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, + value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` + is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). - .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0b5f9fb61fce8..afa4f1a5a8c76 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -205,7 +205,9 @@ class Index(IndexOpsMixin, PandasObject): """ # tolist is not actually deprecated, just suppressed in the __dir__ - _deprecations = DirNamesMixin._deprecations | frozenset(["tolist", "dtype_str"]) + _deprecations = DirNamesMixin._deprecations | frozenset( + ["tolist", "dtype_str", "set_value"] + ) # To hand over control to subclasses _join_precedence = 1 @@ -4680,10 +4682,20 @@ def set_value(self, arr, key, value): """ Fast lookup of value from 1-dimensional ndarray. + .. deprecated:: 1.0 + Notes ----- Only use this if you know what you're doing. """ + warnings.warn( + ( + "The 'set_value' method is deprecated, and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=2, + ) self._engine.set_value( com.values_from_object(arr), com.values_from_object(key), value ) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d1ed79118d2fa..82d5ddd1ac358 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1908,16 +1908,21 @@ def test_is_monotonic_incomparable(self, attr): index = Index([5, datetime.now(), 7]) assert not getattr(index, attr) - def test_get_set_value(self): + def test_set_value_deprecated(self): + # GH 28621 + idx = self.create_index() + arr = np.array([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + idx.set_value(arr, idx[1], 80) + assert arr[1] == 80 + + def test_get_value(self): # TODO: Remove function? GH 19728 values = np.random.randn(100) date = self.dateIndex[67] assert_almost_equal(self.dateIndex.get_value(values, date), values[67]) - self.dateIndex.set_value(values, date, 10) - assert values[67] == 10 - @pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}]) @pytest.mark.parametrize( "index,expected", From 5686e9a1ca7fa392c9913497cbfe19830a38ac6f Mon Sep 17 00:00:00 2001 From: Tirth Jain Date: Thu, 3 Oct 2019 22:41:45 +0530 Subject: [PATCH 16/22] CLN: Centralised _check_percentile (#27584) --- pandas/core/algorithms.py | 3 --- pandas/core/frame.py | 8 ++++++-- pandas/core/generic.py | 23 ++++++----------------- pandas/core/series.py | 4 ++-- pandas/util/_validators.py | 35 +++++++++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 24 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5a479667f0227..4073ede84c6f6 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -246,7 +246,6 @@ def _get_hashtable_algo(values): def _get_data_algo(values, func_map): - if is_categorical_dtype(values): values = values._values_for_rank() @@ -297,7 +296,6 @@ def match(to_match, values, na_sentinel=-1): result = table.lookup(to_match) if na_sentinel != -1: - # replace but return a numpy array # use a Series because it handles dtype conversions properly from pandas import Series @@ -1163,7 +1161,6 @@ def compute(self, method): # slow method if n >= len(self.obj): - reverse_it = self.keep == "last" or method == "nlargest" ascending = method == "nsmallest" slc = np.s_[::-1] if reverse_it else np.s_[:] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 67360122ed021..1ab62d7a9e3bf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -31,7 +31,11 @@ deprecate_kwarg, rewrite_axis_style_signature, ) -from pandas.util._validators import validate_axis_style_args, validate_bool_kwarg +from pandas.util._validators import ( + validate_axis_style_args, + validate_bool_kwarg, + validate_percentile, +) from pandas.core.dtypes.cast import ( cast_scalar_to_array, @@ -8178,7 +8182,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"): C 1 days 12:00:00 Name: 0.5, dtype: object """ - self._check_percentile(q) + validate_percentile(q) data = self._get_numeric_data() if numeric_only else self axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cb21588c8ba1a..ddbdb48ab0441 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -32,7 +32,11 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_percentile, +) from pandas.core.dtypes.common import ( ensure_int64, @@ -10168,7 +10172,7 @@ def describe(self, percentiles=None, include=None, exclude=None): percentiles = list(percentiles) # get them all to be in [0, 1] - self._check_percentile(percentiles) + validate_percentile(percentiles) # median should always be included if 0.5 not in percentiles: @@ -10272,21 +10276,6 @@ def describe_1d(data): d.columns = data.columns.copy() return d - def _check_percentile(self, q): - """ - Validate percentiles (used by describe and quantile). - """ - - msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." - q = np.asarray(q) - if q.ndim == 0: - if not 0 <= q <= 1: - raise ValueError(msg.format(q / 100.0)) - else: - if not all(0 <= qs <= 1 for qs in q): - raise ValueError(msg.format(q / 100.0)) - return q - _shared_docs[ "pct_change" ] = """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 44a29f73c51e7..97e8a2dbac7f5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -16,7 +16,7 @@ from pandas.compat import PY36 from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, deprecate -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_percentile from pandas.core.dtypes.common import ( _is_unorderable_exception, @@ -2317,7 +2317,7 @@ def quantile(self, q=0.5, interpolation="linear"): dtype: float64 """ - self._check_percentile(q) + validate_percentile(q) # We dispatch to DataFrame so that core.internals only has to worry # about 2D cases. diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 8d5f9f7749682..f5a472596f58f 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,8 +2,11 @@ Module that contains many useful utilities for validating data or function arguments """ +from typing import Iterable, Union import warnings +import numpy as np + from pandas.core.dtypes.common import is_bool @@ -370,3 +373,35 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): raise ValueError("Cannot specify both 'value' and 'method'.") return value, method + + +def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: + """ + Validate percentiles (used by describe and quantile). + + This function checks if the given float oriterable of floats is a valid percentile + otherwise raises a ValueError. + + Parameters + ---------- + q: float or iterable of floats + A single percentile or an iterable of percentiles. + + Returns + ------- + ndarray + An ndarray of the percentiles if valid. + + Raises + ------ + ValueError if percentiles are not in given interval([0, 1]). + """ + msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." + q_arr = np.asarray(q) + if q_arr.ndim == 0: + if not 0 <= q_arr <= 1: + raise ValueError(msg.format(q_arr / 100.0)) + else: + if not all(0 <= qs <= 1 for qs in q_arr): + raise ValueError(msg.format(q_arr / 100.0)) + return q_arr From 3b19e1ddbb0dab26c50711741bd3e1fe7012ea77 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 3 Oct 2019 10:19:36 -0700 Subject: [PATCH 17/22] reenable codecov (#28750) --- ci/run_tests.sh | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 57f1ecf1e56f7..d1a9447c97d4e 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -43,10 +43,9 @@ do # if no tests are found (the case of "single and slow"), pytest exits with code 5, and would make the script fail, if not for the below code sh -c "$PYTEST_CMD; ret=\$?; [ \$ret = 5 ] && exit 0 || exit \$ret" - # 2019-08-21 disabling because this is hitting HTTP 400 errors GH#27602 - # if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then - # echo "uploading coverage for $TYPE tests" - # echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME" - # bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME - # fi + if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then + echo "uploading coverage for $TYPE tests" + echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME" + bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME + fi done From 4375daffeed16531bae3fdaf85324b590d1dcb59 Mon Sep 17 00:00:00 2001 From: Sidharthan Nair Date: Thu, 3 Oct 2019 18:25:03 +0100 Subject: [PATCH 18/22] Bugfix/groupby datetime issue (#28569) --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/groupby/generic.py | 4 +++- pandas/tests/groupby/test_apply.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 16d23d675a8bb..f8c4f9f3dc410 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -191,7 +191,7 @@ Datetimelike - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) -- +- Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b5aec189700ce..e556708dc9283 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1913,7 +1913,9 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame: result = result.copy() obj_cols = [ - idx for idx in range(len(result.columns)) if is_object_dtype(result.dtypes[idx]) + idx + for idx in range(len(result.columns)) + if is_object_dtype(result.dtypes.iloc[idx]) ] # See GH#26285 diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 76588549532b1..4d0063b773bc5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -657,3 +657,22 @@ def test_apply_with_mixed_types(): result = g.apply(lambda x: x / x.sum()) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "group_column_dtlike", + [datetime.today(), datetime.today().date(), datetime.today().time()], +) +def test_apply_datetime_issue(group_column_dtlike): + # GH-28247 + # groupby-apply throws an error if one of the columns in the DataFrame + # is a datetime object and the column labels are different from + # standard int values in range(len(num_columns)) + + df = pd.DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) + result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) + + expected = pd.DataFrame( + ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] + ) + tm.assert_frame_equal(result, expected) From 88a6ee1eb1702e31e74d0bdaeae4b70568d07149 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 3 Oct 2019 10:50:41 -0700 Subject: [PATCH 19/22] TST: port maybe_promote tests from #23982 (#28764) --- pandas/tests/dtypes/cast/test_promote.py | 153 +++++++++++++++++++++-- 1 file changed, 144 insertions(+), 9 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 1b7de9b20f42f..7acff3477ce0f 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -227,9 +227,72 @@ def test_maybe_promote_float_with_int(float_dtype, any_int_dtype, box): ) -def test_maybe_promote_float_with_float(): - # placeholder due to too many xfails; see GH 23982 / 25425 - pass +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # float filled with float + ("float32", 1, "float32"), + ("float32", np.finfo("float32").max * 1.1, "float64"), + ("float64", 1, "float64"), + ("float64", np.finfo("float32").max * 1.1, "float64"), + # complex filled with float + ("complex64", 1, "complex64"), + ("complex64", np.finfo("float32").max * 1.1, "complex128"), + ("complex128", 1, "complex128"), + ("complex128", np.finfo("float32").max * 1.1, "complex128"), + # float filled with complex + ("float32", 1 + 1j, "complex64"), + ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("float64", 1 + 1j, "complex128"), + ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + # complex filled with complex + ("complex64", 1 + 1j, "complex64"), + ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("complex128", 1 + 1j, "complex128"), + ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ], +) +def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype, box): + + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + boxed, box_dtype = box # read from parametrized fixture + + if box_dtype == object: + pytest.xfail("falsely upcasts to object") + if boxed and is_float_dtype(dtype) and is_complex_dtype(expected_dtype): + pytest.xfail("does not upcast to complex") + if (dtype, expected_dtype) in [ + ("float32", "float64"), + ("float32", "complex64"), + ("complex64", "complex128"), + ]: + pytest.xfail("does not upcast correctly depending on value") + # this following xfails are "only" a consequence of the - now strictly + # enforced - principle that maybe_promote_with_scalar always casts + if not boxed and abs(fill_value) < 2: + pytest.xfail("wrong return type of fill_value") + if ( + not boxed + and dtype == "complex128" + and expected_dtype == "complex128" + and is_float_dtype(type(fill_value)) + ): + pytest.xfail("wrong return type of fill_value") + + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + exp_val_for_array = np.nan + + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) def test_maybe_promote_bool_with_any(any_numpy_dtype_reduced, box): @@ -300,9 +363,45 @@ def test_maybe_promote_any_with_bytes(): pass -def test_maybe_promote_datetime64_with_any(): - # placeholder due to too many xfails; see GH 23982 / 25425 - pass +def test_maybe_promote_datetime64_with_any( + datetime64_dtype, any_numpy_dtype_reduced, box +): + dtype = np.dtype(datetime64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + boxed, box_dtype = box # read from parametrized fixture + + if is_datetime64_dtype(fill_dtype): + if box_dtype == object: + pytest.xfail("falsely upcasts to object") + else: + if boxed and box_dtype is None: + pytest.xfail("does not upcast to object") + if not boxed: + pytest.xfail("does not upcast to object or raises") + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(fill_dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to to_datetime64 + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + exp_val_for_array = np.datetime64("NaT", "ns") + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) # override parametrization of box to add special case for dt_dtype @@ -505,9 +604,45 @@ def test_maybe_promote_any_numpy_dtype_with_datetimetz( ) -def test_maybe_promote_timedelta64_with_any(): - # placeholder due to too many xfails; see GH 23982 / 25425 - pass +def test_maybe_promote_timedelta64_with_any( + timedelta64_dtype, any_numpy_dtype_reduced, box +): + dtype = np.dtype(timedelta64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + boxed, box_dtype = box # read from parametrized fixture + + if is_timedelta64_dtype(fill_dtype): + if box_dtype == object: + pytest.xfail("falsely upcasts to object") + else: + if boxed and box_dtype is None: + pytest.xfail("does not upcast to object") + if not boxed: + pytest.xfail("does not upcast to object or raises") + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling timedelta with anything but timedelta casts to object + if is_timedelta64_dtype(fill_dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + exp_val_for_array = np.timedelta64("NaT", "ns") + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) @pytest.mark.parametrize( From 9dce9c82df1a631446d4f11353cc80406a39bbfe Mon Sep 17 00:00:00 2001 From: Aniruddha Bhattacharjee Date: Fri, 4 Oct 2019 07:32:15 +0530 Subject: [PATCH 20/22] DOC: Minor fixes in pandas/testing.py docstring. (#28752) --- pandas/util/testing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1c0a8dbc19ccd..32f88b13ac041 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -294,7 +294,7 @@ def assert_almost_equal( ---------- left : object right : object - check_dtype : bool / string {'equiv'}, default 'equiv' + check_dtype : bool or {'equiv'}, default 'equiv' Check dtype if both a and b are the same type. If 'equiv' is passed in, then `RangeIndex` and `Int64Index` are also considered equivalent when doing type checking. @@ -585,7 +585,7 @@ def assert_index_equal( ---------- left : Index right : Index - exact : bool / string {'equiv'}, default 'equiv' + exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', then RangeIndex can be substituted for Int64Index as well. @@ -860,7 +860,7 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray") ---------- left, right : IntervalArray The IntervalArrays to compare. - exact : bool / string {'equiv'}, default 'equiv' + exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', then RangeIndex can be substituted for Int64Index as well. @@ -1089,7 +1089,7 @@ def assert_series_equal( right : Series check_dtype : bool, default True Whether to check the Series dtype is identical. - check_index_type : bool / string {'equiv'}, default 'equiv' + check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. check_series_type : bool, default True @@ -1251,10 +1251,10 @@ def assert_frame_equal( Second DataFrame to compare. check_dtype : bool, default True Whether to check the DataFrame dtype is identical. - check_index_type : bool / string {'equiv'}, default 'equiv' + check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. - check_column_type : bool / string {'equiv'}, default 'equiv' + check_column_type : bool or {'equiv'}, default 'equiv' Whether to check the columns class, dtype and inferred_type are identical. Is passed as the ``exact`` argument of :func:`assert_index_equal`. From 069e1243d8fd9f622f7d66b593769f3a169c3952 Mon Sep 17 00:00:00 2001 From: Dorothy Kabarozi <30805504+dorothykiz1@users.noreply.github.com> Date: Fri, 4 Oct 2019 05:13:38 +0300 Subject: [PATCH 21/22] WEB: Add diversity note to team.md (#28630) --- web/pandas/about/team.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/web/pandas/about/team.md b/web/pandas/about/team.md index 41da3a0e82bdb..8eb2edebec817 100644 --- a/web/pandas/about/team.md +++ b/web/pandas/about/team.md @@ -36,6 +36,16 @@ If you want to support pandas development, you can find information in the [dona {% endfor %} +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({base_url}/community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + ## Governance Wes McKinney is the Benevolent Dictator for Life (BDFL). From ac39473a8f325b80642b3819e78fa42e5760f4a7 Mon Sep 17 00:00:00 2001 From: Josiah Baker Date: Fri, 4 Oct 2019 11:33:28 -0400 Subject: [PATCH 22/22] DOC: fix PR09,PR08 errors for pandas.Timestamp (#28739) --- pandas/_libs/tslibs/nattype.pyx | 166 ++++++++++++++++------------- pandas/_libs/tslibs/timedeltas.pyx | 29 +++-- pandas/_libs/tslibs/timestamps.pyx | 166 ++++++++++++++++------------- 3 files changed, 201 insertions(+), 160 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 328fc26e4fef6..0bd4b78d51e4e 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -396,7 +396,7 @@ class NaTType(_NaT): Parameters ---------- locale : string, default None (English locale) - locale determining the language in which to return the month name + Locale determining the language in which to return the month name. Returns ------- @@ -411,7 +411,7 @@ class NaTType(_NaT): Parameters ---------- locale : string, default None (English locale) - locale determining the language in which to return the day name + Locale determining the language in which to return the day name. Returns ------- @@ -509,11 +509,11 @@ class NaTType(_NaT): Parameters ---------- ordinal : int - date corresponding to a proleptic Gregorian ordinal + Date corresponding to a proleptic Gregorian ordinal. freq : str, DateOffset - Offset which Timestamp will have + Offset to apply to the Timestamp. tz : str, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will have. + Time zone for the Timestamp. """) # _nat_methods @@ -534,7 +534,7 @@ class NaTType(_NaT): Parameters ---------- tz : str or timezone object, default None - Timezone to localize to + Timezone to localize to. """) today = _make_nat_func('today', # noqa:E128 """ @@ -547,7 +547,7 @@ class NaTType(_NaT): Parameters ---------- tz : str or timezone object, default None - Timezone to localize to + Timezone to localize to. """) round = _make_nat_func('round', # noqa:E128 """ @@ -555,27 +555,30 @@ class NaTType(_NaT): Parameters ---------- - freq : a freq string indicating the rounding resolution - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. .. versionadded:: 0.24.0 - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ -default 'raise' + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. .. versionadded:: 0.24.0 @@ -593,33 +596,36 @@ default 'raise' Parameters ---------- - freq : a freq string indicating the flooring resolution - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. .. versionadded:: 0.24.0 - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ -default 'raise' + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. .. versionadded:: 0.24.0 Raises ------ - ValueError if the freq cannot be converted + ValueError if the freq cannot be converted. """) ceil = _make_nat_func('ceil', # noqa:E128 """ @@ -627,33 +633,36 @@ default 'raise' Parameters ---------- - freq : a freq string indicating the ceiling resolution - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. .. versionadded:: 0.24.0 - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ -default 'raise' + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. .. versionadded:: 0.24.0 Raises ------ - ValueError if the freq cannot be converted + ValueError if the freq cannot be converted. """) tz_convert = _make_nat_func('tz_convert', # noqa:E128 @@ -694,35 +703,42 @@ default 'raise' `ambiguous` parameter dictates how ambiguous times should be handled. - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + The behavior is as follows: - .. versionadded:: 0.24.0 + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + .. versionadded:: 0.24.0 errors : 'raise', 'coerce', default None - - 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - - 'coerce' will return NaT if the timestamp can not be converted + Determine how errors should be handled. + + The behavior is as follows: + + * 'raise' will raise a NonExistentTimeError if a timestamp is not + valid in the specified timezone (e.g. due to a transition from + or to DST time). Use ``nonexistent='raise'`` instead. + * 'coerce' will return NaT if the timestamp can not be converted into the specified timezone. Use ``nonexistent='NaT'`` instead. - .. deprecated:: 0.24.0 + .. deprecated:: 0.24.0 Returns ------- diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index b232042c70eac..f9cb35eb79ae3 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1215,14 +1215,20 @@ class Timedelta(_Timedelta): Parameters ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : str, optional - Denote the unit of the input, if input is an integer. Default 'ns'. + unit : str, default 'ns' + Denote the unit of the input, if input is an integer. + Possible values: - {'Y', 'M', 'W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h', - 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', 'sec', 'second', - 'ms', 'milliseconds', 'millisecond', 'milli', 'millis', 'L', - 'us', 'microseconds', 'microsecond', 'micro', 'micros', 'U', - 'ns', 'nanoseconds', 'nano', 'nanos', 'nanosecond', 'N'} + + * 'Y', 'M', 'W', 'D', 'T', 'S', 'L', 'U', or 'N' + * 'days' or 'day' + * 'hours', 'hour', 'hr', or 'h' + * 'minutes', 'minute', 'min', or 'm' + * 'seconds', 'second', or 'sec' + * 'milliseconds', 'millisecond', 'millis', or 'milli' + * 'microseconds', 'microsecond', 'micros', or 'micro' + * 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'. + **kwargs Available kwargs: {days, seconds, microseconds, milliseconds, minutes, hours, weeks}. @@ -1323,7 +1329,8 @@ class Timedelta(_Timedelta): Parameters ---------- - freq : a freq string indicating the rounding resolution + freq : str + Frequency string indicating the rounding resolution. Returns ------- @@ -1341,7 +1348,8 @@ class Timedelta(_Timedelta): Parameters ---------- - freq : a freq string indicating the flooring resolution + freq : str + Frequency string indicating the flooring resolution. """ return self._round(freq, np.floor) @@ -1351,7 +1359,8 @@ class Timedelta(_Timedelta): Parameters ---------- - freq : a freq string indicating the ceiling resolution + freq : str + Frequency string indicating the ceiling resolution. """ return self._round(freq, np.ceil) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 6ca39d83afd25..c1575ce4f48b3 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -251,11 +251,11 @@ class Timestamp(_Timestamp): Parameters ---------- ordinal : int - date corresponding to a proleptic Gregorian ordinal + Date corresponding to a proleptic Gregorian ordinal. freq : str, DateOffset - Offset which Timestamp will have + Offset to apply to the Timestamp. tz : str, pytz.timezone, dateutil.tz.tzfile or None - Time zone for time which Timestamp will have. + Time zone for the Timestamp. """ return cls(datetime.fromordinal(ordinal), freq=freq, tz=tz) @@ -271,7 +271,7 @@ class Timestamp(_Timestamp): Parameters ---------- tz : str or timezone object, default None - Timezone to localize to + Timezone to localize to. """ if isinstance(tz, str): tz = maybe_get_tz(tz) @@ -289,7 +289,7 @@ class Timestamp(_Timestamp): Parameters ---------- tz : str or timezone object, default None - Timezone to localize to + Timezone to localize to. """ return cls.now(tz) @@ -445,27 +445,30 @@ class Timestamp(_Timestamp): Parameters ---------- - freq : a freq string indicating the rounding resolution - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. .. versionadded:: 0.24.0 - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ -default 'raise' + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. .. versionadded:: 0.24.0 @@ -487,33 +490,36 @@ default 'raise' Parameters ---------- - freq : a freq string indicating the flooring resolution - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. .. versionadded:: 0.24.0 - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ -default 'raise' + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. .. versionadded:: 0.24.0 Raises ------ - ValueError if the freq cannot be converted + ValueError if the freq cannot be converted. """ return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) @@ -523,33 +529,36 @@ default 'raise' Parameters ---------- - freq : a freq string indicating the ceiling resolution - ambiguous : bool, 'NaT', default 'raise' - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. .. versionadded:: 0.24.0 - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ -default 'raise' + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. .. versionadded:: 0.24.0 Raises ------ - ValueError if the freq cannot be converted + ValueError if the freq cannot be converted. """ return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) @@ -606,7 +615,7 @@ default 'raise' Parameters ---------- locale : string, default None (English locale) - locale determining the language in which to return the day name + Locale determining the language in which to return the day name. Returns ------- @@ -623,7 +632,7 @@ default 'raise' Parameters ---------- locale : string, default None (English locale) - locale determining the language in which to return the month name + Locale determining the language in which to return the month name. Returns ------- @@ -779,35 +788,42 @@ default 'raise' `ambiguous` parameter dictates how ambiguous times should be handled. - - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) - - 'NaT' will return NaT for an ambiguous time - - 'raise' will raise an AmbiguousTimeError for an ambiguous time + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. - - 'shift_forward' will shift the nonexistent time forward to the - closest existing time - - 'shift_backward' will shift the nonexistent time backward to the - closest existing time - - 'NaT' will return NaT where there are nonexistent times - - timedelta objects will shift nonexistent times by the timedelta - - 'raise' will raise an NonExistentTimeError if there are - nonexistent times + The behavior is as follows: - .. versionadded:: 0.24.0 + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + .. versionadded:: 0.24.0 errors : 'raise', 'coerce', default None - - 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - - 'coerce' will return NaT if the timestamp can not be converted + Determine how errors should be handled. + + The behavior is as follows: + + * 'raise' will raise a NonExistentTimeError if a timestamp is not + valid in the specified timezone (e.g. due to a transition from + or to DST time). Use ``nonexistent='raise'`` instead. + * 'coerce' will return NaT if the timestamp can not be converted into the specified timezone. Use ``nonexistent='NaT'`` instead. - .. deprecated:: 0.24.0 + .. deprecated:: 0.24.0 Returns -------