- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 19.2k
DEPR: passing mixed offsets with utc=False into to_datetime #54014
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 29 commits
bcbe2ac
              ba99e93
              441e17f
              5e568bd
              2aa5c10
              8197005
              ca4b214
              a0e970a
              156ea8a
              643d3d6
              61d4deb
              f2bbedf
              5c4904a
              f5e5e1e
              88f3845
              3d74972
              88ed6c1
              180ccce
              6ecd997
              dc7c54d
              b5bbd2b
              1220130
              0549e6d
              b01c3ef
              04ef036
              c42f143
              5e5adc6
              3aa091f
              acee8de
              b7a2207
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -295,8 +295,45 @@ Other API changes | |
| .. --------------------------------------------------------------------------- | ||
| .. _whatsnew_210.deprecations: | ||
|  | ||
| Deprecations | ||
| ~~~~~~~~~~~~ | ||
| Deprecate parsing datetimes with mixed time zones | ||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|  | ||
| Parsing datetimes with mixed time zones is deprecated and shows a warning unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) | ||
|  | ||
| *Previous behavior*: | ||
|  | ||
| .. code-block:: ipython | ||
|  | ||
| In [7]: data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] | ||
|  | ||
| In [8]: pd.to_datetime(data, utc=False) | ||
| Out[8]: | ||
| Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') | ||
|  | ||
| *New behavior*: | ||
|  | ||
| .. code-block:: ipython | ||
|  | ||
| In [9]: pd.to_datetime(data, utc=False) | ||
| FutureWarning: | ||
| In a future version of pandas, parsing datetimes with mixed time zones will raise | ||
| a warning unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour | ||
| and silence this warning. | ||
| Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') | ||
|  | ||
| In order to silence this warning and avoid an error in a future version of pandas, | ||
| please specify ``utc=True``: | ||
|  | ||
| .. ipython:: python | ||
|  | ||
| data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] | ||
| pd.to_datetime(data, utc=True) | ||
|  | ||
| To create a ``Series`` with mixed offsets and ``object`` dtype, please use ``apply`` | ||
| and ``datetime.datetime.strptime``. | ||
|          | ||
|  | ||
| Other Deprecations | ||
| ~~~~~~~~~~~~~~~~~~ | ||
| - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) | ||
| - Deprecated 'downcast' keyword in :meth:`Index.fillna` (:issue:`53956`) | ||
| - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -620,6 +620,7 @@ cdef _array_to_datetime_object( | |
| # 1) NaT or NaT-like values | ||
| # 2) datetime strings, which we return as datetime.datetime | ||
| # 3) special strings - "now" & "today" | ||
| unique_timezones = set() | ||
| for i in range(n): | ||
| # Analogous to: val = values[i] | ||
| val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0] | ||
|  | @@ -649,6 +650,7 @@ cdef _array_to_datetime_object( | |
| tzinfo=tsobj.tzinfo, | ||
| fold=tsobj.fold, | ||
| ) | ||
| unique_timezones.add(tsobj.tzinfo) | ||
|  | ||
| except (ValueError, OverflowError) as ex: | ||
| ex.args = (f"{ex}, at position {i}", ) | ||
|  | @@ -666,6 +668,15 @@ cdef _array_to_datetime_object( | |
|  | ||
| cnp.PyArray_MultiIter_NEXT(mi) | ||
|  | ||
| if len(unique_timezones) > 1: | ||
|         
                  jbrockmendel marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| warnings.warn( | ||
| "In a future version of pandas, parsing datetimes with mixed time " | ||
| "zones will raise a warning unless `utc=True`. " | ||
| 
      Comment on lines
    
      +673
     to 
      +674
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The message here says that this will raise a warning in the future. But is that indeed the intent, or should that be "error" instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you're right, thanks - @natmokval fancy addressing this in a separate PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure, I replaced "warning" with "error" in warning message and made a new PR | ||
| "Please specify `utc=True` to opt in to the new behaviour " | ||
| "and silence this warning.", | ||
| FutureWarning, | ||
| stacklevel=find_stack_level(), | ||
| ) | ||
| return oresult_nd, None | ||
|  | ||
|  | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -340,6 +340,7 @@ def _return_parsed_timezone_results( | |
| tz_result : Index-like of parsed dates with timezone | ||
| """ | ||
| tz_results = np.empty(len(result), dtype=object) | ||
| non_na_timezones = set() | ||
| for zone in unique(timezones): | ||
| mask = timezones == zone | ||
| dta = DatetimeArray(result[mask]).tz_localize(zone) | ||
|  | @@ -348,8 +349,18 @@ def _return_parsed_timezone_results( | |
| dta = dta.tz_localize("utc") | ||
| else: | ||
| dta = dta.tz_convert("utc") | ||
| else: | ||
| if not dta.isna().all(): | ||
| non_na_timezones.add(zone) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this might break if we ever had a tzaware datetime object with a dateutil/pytz tzinfo in the input array bc those tzinfos are not hashable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks fine: In [2]: import pytz
In [3]: result = pd.to_datetime(
   ...:     [
   ...:         "2000-01-03 12:34:56.123456+01:00",
   ...:         datetime(2020, 1, 1, tzinfo=pytz.timezone('Asia/Kathmandu'))
   ...:     ],
   ...:     exact=False,
   ...: )
<ipython-input-3-36f1f20a96cd>:1: FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`
  result = pd.to_datetime(There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. woops, its dateutil tzs that aren't hashable. though your example still seems to work with one, so never mind | ||
| tz_results[mask] = dta | ||
|  | ||
| if len(non_na_timezones) > 1: | ||
| warnings.warn( | ||
| "In a future version of pandas, parsing datetimes with mixed time " | ||
| "zones will raise a warning unless `utc=True`. Please specify `utc=True` " | ||
| "to opt in to the new behaviour and silence this warning.", | ||
| FutureWarning, | ||
| stacklevel=find_stack_level(), | ||
| ) | ||
| return Index(tz_results, name=name) | ||
|  | ||
|  | ||
|  | @@ -772,6 +783,13 @@ def to_datetime( | |
| offsets (typically, daylight savings), see :ref:`Examples | ||
| <to_datetime_tz_examples>` section for details. | ||
|  | ||
| .. warning:: | ||
|  | ||
| In a future version of pandas, parsing datetimes with mixed time | ||
| zones will raise a warning unless `utc=True`. | ||
| Please specify `utc=True` to opt in to the new behaviour | ||
| and silence this warning. | ||
|  | ||
| See also: pandas general documentation about `timezone conversion and | ||
| localization | ||
| <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html | ||
|  | @@ -993,19 +1011,29 @@ def to_datetime( | |
|  | ||
| - However, timezone-aware inputs *with mixed time offsets* (for example | ||
| issued from a timezone with daylight savings, such as Europe/Paris) | ||
| are **not successfully converted** to a :class:`DatetimeIndex`. Instead a | ||
| simple :class:`Index` containing :class:`datetime.datetime` objects is | ||
| returned: | ||
|  | ||
| >>> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) | ||
| are **not successfully converted** to a :class:`DatetimeIndex`. | ||
| Parsing datetimes with mixed time zones will show a warning unless | ||
| `utc=True`. If you specify `utc=False` the warning below will be shown | ||
| and a simple :class:`Index` containing :class:`datetime.datetime` | ||
| objects will be returned: | ||
|  | ||
| >>> pd.to_datetime(['2020-10-25 02:00 +0200', | ||
| ... '2020-10-25 04:00 +0100']) # doctest: +SKIP | ||
| FutureWarning: In a future version of pandas, parsing datetimes with mixed | ||
| time zones will raise a warning unless `utc=True`. Please specify `utc=True` | ||
| to opt in to the new behaviour and silence this warning. | ||
| Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], | ||
| dtype='object') | ||
|  | ||
| - A mix of timezone-aware and timezone-naive inputs is also converted to | ||
| a simple :class:`Index` containing :class:`datetime.datetime` objects: | ||
|  | ||
| >>> from datetime import datetime | ||
| >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)]) | ||
| >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", | ||
| ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP | ||
| FutureWarning: In a future version of pandas, parsing datetimes with mixed | ||
| time zones will raise a warning unless `utc=True`. Please specify `utc=True` | ||
| to opt in to the new behaviour and silence this warning. | ||
| Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') | ||
|  | ||
| | | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -1144,37 +1144,59 @@ def converter(*date_cols, col: Hashable): | |
| date_format.get(col) if isinstance(date_format, dict) else date_format | ||
| ) | ||
|  | ||
| result = tools.to_datetime( | ||
| ensure_object(strs), | ||
| format=date_fmt, | ||
| utc=False, | ||
| dayfirst=dayfirst, | ||
| errors="ignore", | ||
| cache=cache_dates, | ||
| ) | ||
| with warnings.catch_warnings(): | ||
| warnings.filterwarnings( | ||
| "ignore", | ||
| ".*parsing datetimes with mixed time zones will raise a warning", | ||
| category=FutureWarning, | ||
| ) | ||
| result = tools.to_datetime( | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there's a couple more usages of  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you, I added e.g the tests below failed in ci, but passed locally: we don't even call this function ( | ||
| ensure_object(strs), | ||
| format=date_fmt, | ||
| utc=False, | ||
| dayfirst=dayfirst, | ||
| errors="ignore", | ||
| cache=cache_dates, | ||
| ) | ||
| if isinstance(result, DatetimeIndex): | ||
| arr = result.to_numpy() | ||
| arr.flags.writeable = True | ||
| return arr | ||
| return result._values | ||
| else: | ||
| try: | ||
| result = tools.to_datetime( | ||
| date_parser(*(unpack_if_single_element(arg) for arg in date_cols)), | ||
| errors="ignore", | ||
| cache=cache_dates, | ||
| ) | ||
| with warnings.catch_warnings(): | ||
| warnings.filterwarnings( | ||
| "ignore", | ||
| ".*parsing datetimes with mixed time zones " | ||
| "will raise a warning", | ||
| category=FutureWarning, | ||
| ) | ||
| result = tools.to_datetime( | ||
| date_parser( | ||
| *(unpack_if_single_element(arg) for arg in date_cols) | ||
| ), | ||
| errors="ignore", | ||
| cache=cache_dates, | ||
| ) | ||
| if isinstance(result, datetime.datetime): | ||
| raise Exception("scalar parser") | ||
| return result | ||
| except Exception: | ||
| return tools.to_datetime( | ||
| parsing.try_parse_dates( | ||
| parsing.concat_date_cols(date_cols), | ||
| parser=date_parser, | ||
| ), | ||
| errors="ignore", | ||
| ) | ||
| with warnings.catch_warnings(): | ||
| warnings.filterwarnings( | ||
| "ignore", | ||
| ".*parsing datetimes with mixed time zones " | ||
| "will raise a warning", | ||
| category=FutureWarning, | ||
| ) | ||
| return tools.to_datetime( | ||
| parsing.try_parse_dates( | ||
| parsing.concat_date_cols(date_cols), | ||
| parser=date_parser, | ||
| ), | ||
| errors="ignore", | ||
| ) | ||
|  | ||
| return converter | ||
|  | ||
|  | ||
Uh oh!
There was an error while loading. Please reload this page.