From 8ebf46f70c6be793e10c736ac0d2af727d107914 Mon Sep 17 00:00:00 2001 From: Michael Overmeyer Date: Tue, 19 Jun 2018 09:42:19 -0400 Subject: [PATCH 1/4] Change style of calls within tests to make it easier to use the tests against other libraries (ex. `from pendulum.parsing import parse_iso8601 as parse_datetime`) --- tests.py | 102 +++++++++++++++++++++++++++---------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/tests.py b/tests.py index 875dd50..4fe1f17 100644 --- a/tests.py +++ b/tests.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -import ciso8601 import datetime import sys +from ciso8601 import parse_datetime, parse_datetime_as_naive, parse_rfc3339 from generate_test_timestamps import generate_valid_timestamp_and_datetime, generate_invalid_timestamp if sys.version_info.major == 2: @@ -19,7 +19,7 @@ class ValidTimestampTestCase(unittest.TestCase): def test_auto_generated_valid_formats(self): for (timestamp, expected_datetime) in generate_valid_timestamp_and_datetime(): try: - self.assertEqual(ciso8601.parse_datetime(timestamp), expected_datetime) + self.assertEqual(parse_datetime(timestamp), expected_datetime) except Exception: print("Had problems parsing: {timestamp}".format(timestamp=timestamp)) raise @@ -27,14 +27,14 @@ def test_auto_generated_valid_formats(self): def test_parse_as_naive_auto_generated_valid_formats(self): for (timestamp, expected_datetime) in generate_valid_timestamp_and_datetime(): try: - self.assertEqual(ciso8601.parse_datetime_as_naive(timestamp), expected_datetime.replace(tzinfo=None)) + self.assertEqual(parse_datetime_as_naive(timestamp), expected_datetime.replace(tzinfo=None)) except Exception: print("Had problems parsing: {timestamp}".format(timestamp=timestamp)) raise def test_excessive_subsecond_precision(self): self.assertEqual( - ciso8601.parse_datetime("20140203T103527.234567891234"), + parse_datetime("20140203T103527.234567891234"), datetime.datetime(2014, 2, 3, 10, 35, 27, 234567), ) @@ -43,13 +43,13 @@ def test_leap_year(self): # We just want to make sure that they work in general. for leap_year in (1600, 2000, 2016): self.assertEqual( - ciso8601.parse_datetime("{}-02-29".format(leap_year)), + parse_datetime("{}-02-29".format(leap_year)), datetime.datetime(leap_year, 2, 29, 0, 0, 0, 0), ) def test_special_midnight(self): self.assertEqual( - ciso8601.parse_datetime("2014-02-03T24:00:00"), + parse_datetime("2014-02-03T24:00:00"), datetime.datetime(2014, 2, 4, 0, 0, 0), ) @@ -63,7 +63,7 @@ def test_parse_auto_generated_invalid_formats(self): for timestamp in generate_invalid_timestamp(): try: with self.assertRaises(ValueError, msg="Timestamp '{0}' was supposed to be invalid, but parsing it didn't raise ValueError.".format(timestamp)): - ciso8601.parse_datetime(timestamp) + parse_datetime(timestamp) except Exception as exc: print("Timestamp '{0}' was supposed to raise ValueError, but raised {1} instead".format(timestamp, type(exc).__name__)) raise @@ -73,26 +73,26 @@ def test_non_ascii_characters(self): self.assertRaisesRegex( ValueError, r"Invalid character while parsing date separator \('-'\) \('🐡', Index: 7\)", - ciso8601.parse_datetime, + parse_datetime, "2019-01🐡01", ) self.assertRaisesRegex( ValueError, r"Invalid character while parsing day \('🐡', Index: 8\)", - ciso8601.parse_datetime, + parse_datetime, "2019-01-🐡", ) else: self.assertRaisesRegex( ValueError, r"Invalid character while parsing date separator \('-'\) \(Index: 7\)", - ciso8601.parse_datetime, + parse_datetime, "2019-01🐡01", ) self.assertRaisesRegex( ValueError, r"Invalid character while parsing day \(Index: 8\)", - ciso8601.parse_datetime, + parse_datetime, "2019-01-🐡", ) @@ -100,28 +100,28 @@ def test_invalid_calendar_separator(self): self.assertRaisesRegex( ValueError, r"Invalid character while parsing month", - ciso8601.parse_datetime, + parse_datetime, "2018=01=01", ) self.assertRaisesRegex( ValueError, r"Invalid character while parsing date separator \('-'\) \('=', Index: 7\)", - ciso8601.parse_datetime, + parse_datetime, "2018-01=01", ) self.assertRaisesRegex( ValueError, r"Invalid character while parsing date separator \('-'\) \('0', Index: 7\)", - ciso8601.parse_datetime, + parse_datetime, "2018-0101", ) self.assertRaisesRegex( ValueError, r"Invalid character while parsing day \('-', Index: 6\)", - ciso8601.parse_datetime, + parse_datetime, "201801-01", ) @@ -129,63 +129,63 @@ def test_invalid_empty_but_required_fields(self): self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing year. Expected 4 more characters", - ciso8601.parse_datetime, + parse_datetime, "", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing month. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "2018-", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing day. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "2018-01-", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing hour. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing minute. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing second. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00:", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing subsecond. Expected 1 more character", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00:00.", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing tz hour. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00:00.00+", ) self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing tz minute. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00:00.00-00:", ) @@ -194,28 +194,28 @@ def test_invalid_day_for_month(self): self.assertRaisesRegex( ValueError, r"day is out of range for month", - ciso8601.parse_datetime, + parse_datetime, "{}-02-29".format(non_leap_year), ) self.assertRaisesRegex( ValueError, r"day is out of range for month", - ciso8601.parse_datetime, + parse_datetime, "2014-01-32", ) self.assertRaisesRegex( ValueError, r"day is out of range for month", - ciso8601.parse_datetime, + parse_datetime, "2014-06-31", ) self.assertRaisesRegex( ValueError, r"day is out of range for month", - ciso8601.parse_datetime, + parse_datetime, "2014-06-00", ) @@ -223,7 +223,7 @@ def test_invalid_yyyymm_format(self): self.assertRaisesRegex( ValueError, r"Unexpected end of string while parsing day. Expected 2 more characters", - ciso8601.parse_datetime, + parse_datetime, "201406", ) @@ -231,7 +231,7 @@ def test_invalid_date_and_time_separator(self): self.assertRaisesRegex( ValueError, r"Invalid character while parsing date and time separator \(ie. 'T' or ' '\) \('_', Index: 10\)", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01_00:00:00", ) @@ -240,7 +240,7 @@ def test_invalid_hour_24(self): self.assertRaisesRegex( ValueError, r"hour must be in 0..23", - ciso8601.parse_datetime, + parse_datetime, "2014-02-03T24:35:27", ) @@ -248,21 +248,21 @@ def test_invalid_time_separator(self): self.assertRaisesRegex( ValueError, r"Invalid character while parsing time separator \(':'\) \('=', Index: 16\)", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00=00", ) self.assertRaisesRegex( ValueError, r"Invalid character while parsing time separator \(':'\) \('0', Index: 16\)", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:0000", ) self.assertRaisesRegex( ValueError, r"Invalid character while parsing second \(':', Index: 15\)", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T0000:00", ) @@ -270,7 +270,7 @@ def test_invalid_tz_minute(self): self.assertRaisesRegex( ValueError, r"tzminute must be in 0..59", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00:00.00-00:99", ) @@ -281,14 +281,14 @@ def test_invalid_tz_offsets_too_large(self): ValueError, # Error message differs whether or not we are using pytz or datetime.timezone r"^offset must be a timedelta strictly between" if sys.version_info.major >= 3 else r"\('absolute offset is too large', -5940\)", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00:00.00-99", ) self.assertRaisesRegex( ValueError, r"tzminute must be in 0..59", - ciso8601.parse_datetime, + parse_datetime, "2018-01-01T00:00:00.00-23:60", ) @@ -301,14 +301,14 @@ def test_mixed_basic_and_extended_formats(self): self.assertRaisesRegex( ValueError, r"Cannot combine \"extended\" date format with \"basic\" time format", - ciso8601.parse_datetime, + parse_datetime, "2014-01-02T010203", ), self.assertRaisesRegex( ValueError, r"Cannot combine \"basic\" date format with \"extended\" time format", - ciso8601.parse_datetime, + parse_datetime, "20140102T01:02:03", ) @@ -331,7 +331,7 @@ def test_valid_rfc3339_timestamps(self): "2018-01-02T03:04:05-12:34", ]: self.assertEqual( - ciso8601.parse_datetime(string), ciso8601.parse_rfc3339(string) + parse_datetime(string), parse_rfc3339(string) ) def test_invalid_rfc3339_timestamps(self): @@ -356,7 +356,7 @@ def test_invalid_rfc3339_timestamps(self): "2018-01-02T03:04:05,12345Z", # Invalid comma fractional second separator ]: with self.assertRaisesRegex(ValueError, r"RFC 3339", msg="Timestamp '{0}' was supposed to be invalid, but parsing it didn't raise ValueError.".format(timestamp)): - ciso8601.parse_rfc3339(timestamp) + parse_rfc3339(timestamp) class GithubIssueRegressionTestCase(unittest.TestCase): @@ -368,7 +368,7 @@ def test_issue_5(self): self.assertRaisesRegex( ValueError, r"Invalid character while parsing minute \(':', Index: 14\)", - ciso8601.parse_datetime, + parse_datetime, "2014-02-03T10::27", ) @@ -376,7 +376,7 @@ def test_issue_6(self): self.assertRaisesRegex( ValueError, r"Invalid character while parsing second \('.', Index: 17\)", - ciso8601.parse_datetime, + parse_datetime, "2014-02-03 04:05:.123456", ) @@ -384,14 +384,14 @@ def test_issue_8(self): self.assertRaisesRegex( ValueError, r"hour must be in 0..23", - ciso8601.parse_datetime, + parse_datetime, "2001-01-01T24:01:01", ) self.assertRaisesRegex( ValueError, r"month must be in 1..12", - ciso8601.parse_datetime, + parse_datetime, "07722968", ) @@ -399,7 +399,7 @@ def test_issue_13(self): self.assertRaisesRegex( ValueError, r"month must be in 1..12", - ciso8601.parse_datetime, + parse_datetime, "2014-13-01", ) @@ -407,7 +407,7 @@ def test_issue_22(self): self.assertRaisesRegex( ValueError, r"day is out of range for month", - ciso8601.parse_datetime, + parse_datetime, "2016-11-31T12:34:34.521059", ) @@ -415,7 +415,7 @@ def test_issue_35(self): self.assertRaisesRegex( ValueError, r"Invalid character while parsing date separator \('-'\) \('1', Index: 7\)", - ciso8601.parse_datetime, + parse_datetime, "2017-0012-27T13:35:19+0200", ) @@ -423,7 +423,7 @@ def test_issue_42(self): self.assertRaisesRegex( ValueError, r"day is out of range for month", - ciso8601.parse_datetime, + parse_datetime, "20140200", ) @@ -431,14 +431,14 @@ def test_issue_71(self): self.assertRaisesRegex( ValueError, r"Cannot combine \"basic\" date format with \"extended\" time format", - ciso8601.parse_datetime, + parse_datetime, "20010203T04:05:06Z", ) self.assertRaisesRegex( ValueError, r"Cannot combine \"basic\" date format with \"extended\" time format", - ciso8601.parse_datetime, + parse_datetime, "20010203T04:05", ) From 92f72b63d9beb0c3663c9634c0f9075239fe6adf Mon Sep 17 00:00:00 2001 From: Michael Overmeyer Date: Sun, 27 Jun 2021 14:00:21 -0400 Subject: [PATCH 2/4] Move tests into subdirectory/module --- tests/__init__.py | 0 tests.py => tests/tests.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__init__.py rename tests.py => tests/tests.py (100%) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests.py b/tests/tests.py similarity index 100% rename from tests.py rename to tests/tests.py From 6069f4874af68613bedc1c56e4fe63a8bc8c2ff1 Mon Sep 17 00:00:00 2001 From: Michael Overmeyer Date: Tue, 19 Jun 2018 20:03:06 -0400 Subject: [PATCH 3/4] Switch to use Pendulum's C timezone implementation. Removes dependency on pytz. Maintains the same error messages so its backwards compatible with 2.0.x versions. --- CHANGELOG.md | 5 + MANIFEST.in | 1 + README.rst | 75 +++++-------- module.c | 92 ++++++++-------- setup.py | 2 +- tests/test_timezone.py | 68 ++++++++++++ tests/tests.py | 46 +++++++- timezone.c | 239 +++++++++++++++++++++++++++++++++++++++++ timezone.h | 12 +++ 9 files changed, 437 insertions(+), 103 deletions(-) create mode 100644 tests/test_timezone.py create mode 100644 timezone.c create mode 100644 timezone.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 43faadc..ad6821b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,11 @@ # Unreleased * Added Python 3.9 support +* Switched to using a C implementation of `timezone` objects. + * Much faster parse times for timestamps with timezone information + * ~2.5x faster on Python 2.7, ~10% faster on Python 3.9 + * Thanks to [`pendulum`](https://github.com/sdispater/pendulum) and @sdispater for the code. + * Python 2.7 users no longer need to install `pytz` dependency :smiley: # 2.x.x diff --git a/MANIFEST.in b/MANIFEST.in index 3ce73c4..1edc9c6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include LICENSE include README.rst include CHANGELOG.md +include timezone.h diff --git a/README.rst b/README.rst index d38a942..5a0d3ff 100644 --- a/README.rst +++ b/README.rst @@ -76,7 +76,7 @@ Parsing a timestamp with no time zone information (ex. ``2014-01-09T21:48:00``): .. -.. table:: +.. table:: +---------------+----------+----------+----------+----------+----------+-------------------------------+-----------------------------------------------+ | Module |Python 3.8|Python 3.7|Python 3.6|Python 3.5|Python 3.4| Python 2.7 |Relative Slowdown (versus ciso8601, Python 3.8)| @@ -118,7 +118,7 @@ Parsing a timestamp with time zone information (ex. ``2014-01-09T21:48:00-05:30` .. -.. table:: +.. table:: +---------------+-------------------------------+-------------------------------+-------------------------------+-------------------------------+----------+-------------------------------+-----------------------------------------------+ | Module | Python 3.8 | Python 3.7 | Python 3.6 | Python 3.5 |Python 3.4| Python 2.7 |Relative Slowdown (versus ciso8601, Python 3.8)| @@ -185,29 +185,6 @@ For full benchmarking details (or to run the benchmark yourself), see `benchmark .. _`benchmarking/README.rst`: https://github.com/closeio/ciso8601/blob/master/benchmarking/README.rst -Dependency on pytz (Python 2) ------------------------------ - -In Python 2, ``ciso8601`` uses the `pytz`_ library while parsing timestamps with time zone information. This means that if you wish to parse such timestamps, you must first install ``pytz``: - -.. _pytz: http://pytz.sourceforge.net/ - -.. code:: python - - pip install pytz - -Otherwise, ``ciso8601`` will raise an exception when you try to parse a timestamp with time zone information: - -.. code:: python - - In [2]: ciso8601.parse_datetime('2014-12-05T12:30:45.123456-05:30') - Out[2]: ImportError: Cannot parse a timestamp with time zone information without the pytz dependency. Install it with `pip install pytz`. - -``pytz`` is intentionally not an explicit dependency of ``ciso8601``. This is because many users use ``ciso8601`` to parse only naive timestamps, and therefore don't need this extra dependency. -In Python 3, ``ciso8601`` makes use of the built-in `datetime.timezone`_ class instead, so ``pytz`` is not necessary. - -.. _datetime.timezone: https://docs.python.org/3/library/datetime.html#timezone-objects - Supported Subset of ISO 8601 ---------------------------- @@ -227,11 +204,11 @@ The following date formats are supported: ``YYYY-MM-DD`` ``2018-04-29`` βœ… ``YYYY-MM`` ``2018-04`` βœ… ``YYYYMMDD`` ``2018-04`` βœ… - ``--MM-DD`` (omitted year) ``--04-29`` ❌ + ``--MM-DD`` (omitted year) ``--04-29`` ❌ ``--MMDD`` (omitted year) ``--0429`` ❌ - ``Β±YYYYY-MM`` (>4 digit year) ``+10000-04`` ❌ - ``+YYYY-MM`` (leading +) ``+2018-04`` ❌ - ``-YYYY-MM`` (negative -) ``-2018-04`` ❌ + ``Β±YYYYY-MM`` (>4 digit year) ``+10000-04`` ❌ + ``+YYYY-MM`` (leading +) ``+2018-04`` ❌ + ``-YYYY-MM`` (negative -) ``-2018-04`` ❌ ============================= ============== ================== Week dates or ordinal dates are not currently supported. @@ -247,7 +224,7 @@ Week dates or ordinal dates are not currently supported. ``YYYY-Www-D`` (week date) ``2009-W01-1`` ❌ ``YYYYWwwD`` (week date) ``2009-W01-1`` ❌ ``YYYY-DDD`` (ordinal date) ``1981-095`` ❌ - ``YYYYDDD`` (ordinal date) ``1981095`` ❌ + ``YYYYDDD`` (ordinal date) ``1981095`` ❌ ============================= ============== ================== Time Formats @@ -264,22 +241,22 @@ The following time formats are supported: .. table:: :widths: auto - =================================== =================== ============== - Format Example Supported - =================================== =================== ============== - ``hh`` ``11`` βœ… - ``hhmm`` ``1130`` βœ… - ``hh:mm`` ``11:30`` βœ… - ``hhmmss`` ``113059`` βœ… - ``hh:mm:ss`` ``11:30:59`` βœ… - ``hhmmss.ssssss`` ``113059.123456`` βœ… - ``hh:mm:ss.ssssss`` ``11:30:59.123456`` βœ… - ``hhmmss,ssssss`` ``113059,123456`` βœ… - ``hh:mm:ss,ssssss`` ``11:30:59,123456`` βœ… - Midnight (special case) ``24:00:00`` βœ… - ``hh.hhh`` (fractional hours) ``11.5`` ❌ - ``hh:mm.mmm`` (fractional minutes) ``11:30.5`` ❌ - =================================== =================== ============== + =================================== =================== ============== + Format Example Supported + =================================== =================== ============== + ``hh`` ``11`` βœ… + ``hhmm`` ``1130`` βœ… + ``hh:mm`` ``11:30`` βœ… + ``hhmmss`` ``113059`` βœ… + ``hh:mm:ss`` ``11:30:59`` βœ… + ``hhmmss.ssssss`` ``113059.123456`` βœ… + ``hh:mm:ss.ssssss`` ``11:30:59.123456`` βœ… + ``hhmmss,ssssss`` ``113059,123456`` βœ… + ``hh:mm:ss,ssssss`` ``11:30:59,123456`` βœ… + Midnight (special case) ``24:00:00`` βœ… + ``hh.hhh`` (fractional hours) ``11.5`` ❌ + ``hh:mm.mmm`` (fractional minutes) ``11:30.5`` ❌ + =================================== =================== ============== **Note:** Python datetime objects only have microsecond precision (6 digits). Any additional precision will be truncated. @@ -291,9 +268,9 @@ Time zone information may be provided in one of the following formats: .. table:: :widths: auto - ========== ========== =========== - Format Example Supported - ========== ========== =========== + ========== ========== =========== + Format Example Supported + ========== ========== =========== ``Z`` ``Z`` βœ… ``z`` ``z`` βœ… ``Β±hh`` ``+11`` βœ… diff --git a/module.c b/module.c index c9f1677..ecf414c 100644 --- a/module.c +++ b/module.c @@ -1,12 +1,11 @@ #include #include #include +#include "timezone.h" #define STRINGIZE(x) #x #define EXPAND_AND_STRINGIZE(x) STRINGIZE(x) -#define PY_VERSION_AT_LEAST_32 \ - ((PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 2) || PY_MAJOR_VERSION > 3) #define PY_VERSION_AT_LEAST_33 \ ((PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3) || PY_MAJOR_VERSION > 3) #define PY_VERSION_AT_LEAST_36 \ @@ -14,8 +13,14 @@ #define PY_VERSION_AT_LEAST_37 \ ((PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 7) || PY_MAJOR_VERSION > 3) -#if !PY_VERSION_AT_LEAST_37 -static PyObject *fixed_offset; +// PyPy compatibility for cPython 3.7's Timezone API was added to PyPy 7.3.6 +// https://foss.heptapod.net/pypy/pypy/-/merge_requests/826 +#ifdef PYPY_VERSION + #define SUPPORTS_37_TIMEZONE_API \ + (PYPY_VERSION_NUM >= 0x07030600) +#else + #define SUPPORTS_37_TIMEZONE_API \ + PY_VERSION_AT_LEAST_37 #endif static PyObject *utc; @@ -427,32 +432,34 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo, int rfc3339_only) tzminute += 60 * tzhour; tzminute *= tzsign; -#if !PY_VERSION_AT_LEAST_32 - if (fixed_offset == NULL || utc == NULL) { - PyErr_SetString(PyExc_ImportError, - "Cannot parse a timestamp with time zone " - "information without the pytz dependency. " - "Install it with `pip install pytz`."); - return NULL; - } -#endif - if (tzminute == 0) { tzinfo = utc; } - else { -#if PY_VERSION_AT_LEAST_37 - delta = PyDelta_FromDSU(0, 60 * tzminute, 0); - tzinfo = PyTimeZone_FromOffset(delta); + else if (abs(tzminute) >= 1440) { + /* Format the error message as if we were still using pytz + * for Python 2 and datetime.timezone for Python 3. + * This is done to maintain complete backwards + * compatibility with ciso8601 2.0.x. Perhaps change to a + * simpler message in ciso8601 v3.0.0. + */ +#if PY_MAJOR_VERSION >= 3 + delta = PyDelta_FromDSU(0, tzminute * 60, 0); + PyErr_Format(PyExc_ValueError, + "offset must be a timedelta" + " strictly between -timedelta(hours=24) and" + " timedelta(hours=24)," + " not %R.", + delta); Py_DECREF(delta); -#elif PY_VERSION_AT_LEAST_32 - tzinfo = PyObject_CallFunction( - fixed_offset, "N", - PyDelta_FromDSU(0, 60 * tzminute, 0)); #else - tzinfo = - PyObject_CallFunction(fixed_offset, "i", tzminute); + PyErr_Format(PyExc_ValueError, + "('absolute offset is too large', %d)", + tzminute); #endif + return NULL; + } + else { + tzinfo = new_fixed_offset(60 * tzminute); if (tzinfo == NULL) /* ie. PyErr_Occurred() */ return NULL; } @@ -542,12 +549,6 @@ PyInit_ciso8601(void) initciso8601(void) #endif { -#if !PY_VERSION_AT_LEAST_32 - PyObject *pytz; -#elif !PY_VERSION_AT_LEAST_37 - PyObject *datetime; -#endif - #if PY_MAJOR_VERSION >= 3 PyObject *module = PyModule_Create(&moduledef); #else @@ -558,28 +559,23 @@ initciso8601(void) EXPAND_AND_STRINGIZE(CISO8601_VERSION)); PyDateTime_IMPORT; -#if PY_VERSION_AT_LEAST_37 - utc = PyDateTime_TimeZone_UTC; -#elif PY_VERSION_AT_LEAST_32 - datetime = PyImport_ImportModule("datetime"); - if (datetime == NULL) - return NULL; - fixed_offset = PyObject_GetAttrString(datetime, "timezone"); - if (fixed_offset == NULL) - return NULL; - utc = PyObject_GetAttrString(fixed_offset, "utc"); - if (utc == NULL) + + // PyMODINIT_FUNC is void in Python 2, returns PyObject* in Python 3 + if (initialize_timezone_code(module) < 0) { +#if PY_MAJOR_VERSION >= 3 return NULL; #else - pytz = PyImport_ImportModule("pytz"); - if (pytz == NULL) { - PyErr_Clear(); - } - else { - fixed_offset = PyObject_GetAttrString(pytz, "FixedOffset"); - utc = PyObject_GetAttrString(pytz, "UTC"); + return; +#endif } + +#if SUPPORTS_37_TIMEZONE_API + utc = PyDateTime_TimeZone_UTC; +#else + utc = new_fixed_offset(0); #endif + +// PyMODINIT_FUNC is void in Python 2, returns PyObject* in Python 3 #if PY_MAJOR_VERSION >= 3 return module; #endif diff --git a/setup.py b/setup.py index 303f1cd..e5de271 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ ext_modules=[ Extension( "ciso8601", - sources=["module.c"], + sources=["module.c", "timezone.c"], define_macros=[("CISO8601_VERSION", VERSION)], ) ], diff --git a/tests/test_timezone.py b/tests/test_timezone.py new file mode 100644 index 0000000..caa6955 --- /dev/null +++ b/tests/test_timezone.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +import sys + +from datetime import datetime, timedelta + +from ciso8601 import FixedOffset + +if sys.version_info.major == 2: + # We use unittest2 since it has a backport of the `unittest.TestCase.assertRaisesRegex` method, + # which is called `assertRaisesRegexp` in Python 2. This saves us the hassle of monkey-patching + # the class ourselves. + import unittest2 as unittest +else: + import unittest + + +class TimezoneTestCase(unittest.TestCase): + def test_utcoffset(self): + if sys.version_info >= (3, 2): + from datetime import timezone + for minutes in range(-1439, 1440): + td = timedelta(minutes=minutes) + tz = timezone(td) + built_in_dt = datetime(2014, 2, 3, 10, 35, 27, 234567, tzinfo=tz) + our_dt = datetime(2014, 2, 3, 10, 35, 27, 234567, tzinfo=FixedOffset(minutes * 60)) + self.assertEqual(built_in_dt.utcoffset(), our_dt.utcoffset(), "`utcoffset` output did not match for offset: {minutes}".format(minutes=minutes)) + else: + self.assertEqual(FixedOffset(0).utcoffset(), timedelta(minutes=0)) + self.assertEqual(FixedOffset(+0).utcoffset(), timedelta(minutes=0)) + self.assertEqual(FixedOffset(-0).utcoffset(), timedelta(minutes=0)) + self.assertEqual(FixedOffset(-4980).utcoffset(), timedelta(hours=-1, minutes=-23)) + self.assertEqual(FixedOffset(+45240).utcoffset(), timedelta(hours=12, minutes=34)) + + def test_dst(self): + if sys.version_info >= (3, 2): + from datetime import timezone + for minutes in range(-1439, 1440): + td = timedelta(minutes=minutes) + tz = timezone(td) + built_in_dt = datetime(2014, 2, 3, 10, 35, 27, 234567, tzinfo=tz) + our_dt = datetime(2014, 2, 3, 10, 35, 27, 234567, tzinfo=FixedOffset(minutes * 60)) + self.assertEqual(built_in_dt.dst(), our_dt.dst(), "`dst` output did not match for offset: {minutes}".format(minutes=minutes)) + else: + self.assertIsNone(FixedOffset(0).dst(), "UTC") + self.assertIsNone(FixedOffset(+0).dst(), "UTC") + self.assertIsNone(FixedOffset(-0).dst(), "UTC") + self.assertIsNone(FixedOffset(-4980).dst(), "UTC-01:23") + self.assertIsNone(FixedOffset(+45240).dst(), "UTC+12:34") + + def test_tzname(self): + if sys.version_info >= (3, 2): + from datetime import timezone + for minutes in range(-1439, 1440): + td = timedelta(minutes=minutes) + tz = timezone(td) + built_in_dt = datetime(2014, 2, 3, 10, 35, 27, 234567, tzinfo=tz) + our_dt = datetime(2014, 2, 3, 10, 35, 27, 234567, tzinfo=FixedOffset(minutes * 60)) + self.assertEqual(built_in_dt.tzname(), our_dt.tzname(), "`tzname` output did not match for offset: {minutes}".format(minutes=minutes)) + else: + self.assertEqual(FixedOffset(0).tzname(), "UTC+00:00") + self.assertEqual(FixedOffset(+0).tzname(), "UTC+00:00") + self.assertEqual(FixedOffset(-0).tzname(), "UTC+00:00") + self.assertEqual(FixedOffset(-4980).tzname(), "UTC-01:23") + self.assertEqual(FixedOffset(+45240).tzname(), "UTC+12:34") + +if __name__ == '__main__': + unittest.main() diff --git a/tests/tests.py b/tests/tests.py index 4fe1f17..51ff81c 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1,9 +1,12 @@ # -*- coding: utf-8 -*- +import copy import datetime +import pickle +import re import sys -from ciso8601 import parse_datetime, parse_datetime_as_naive, parse_rfc3339 +from ciso8601 import FixedOffset, parse_datetime, parse_datetime_as_naive, parse_rfc3339 from generate_test_timestamps import generate_valid_timestamp_and_datetime, generate_invalid_timestamp if sys.version_info.major == 2: @@ -53,6 +56,13 @@ def test_special_midnight(self): datetime.datetime(2014, 2, 4, 0, 0, 0), ) + def test_returns_built_in_utc_if_available(self): + # Python 3.7 added a built-in UTC object + timestamp = '2018-01-01T00:00:00.00Z' + if sys.version_info >= (3, 7): + self.assertIs(parse_datetime(timestamp).tzinfo, datetime.timezone.utc) + else: + self.assertIsInstance(parse_datetime(timestamp).tzinfo, FixedOffset) class InvalidTimestampTestCase(unittest.TestCase): # Many invalid test cases are covered by `test_parse_auto_generated_invalid_formats`, @@ -275,12 +285,21 @@ def test_invalid_tz_minute(self): ) def test_invalid_tz_offsets_too_large(self): - # The Python interpreter crashes if you give the datetime constructor a TZ offset with an absolute value >= 1440 - # TODO: Determine whether these are valid ISO 8601 values and therefore whether ciso8601 should support them. + # The TZ offsets with an absolute value >= 1440 minutes are not supported by the tzinfo spec. + # See https://docs.python.org/3/library/datetime.html#datetime.tzinfo.utcoffset + + # Error message differs whether or not we are using pytz or datetime.timezone + # (and also by which Python version. Python 3.7 has different timedelta.repr()) + # Of course we no longer use either, but for backwards compatibility + # with v2.0.x, we did not change the error messages. + if sys.version_info.major >= 3: + expected_error_message = re.escape("offset must be a timedelta strictly between -timedelta(hours=24) and timedelta(hours=24), not {0}.".format(repr(datetime.timedelta(minutes=-5940)))) + else: + expected_error_message = r"\('absolute offset is too large', -5940\)" + self.assertRaisesRegex( ValueError, - # Error message differs whether or not we are using pytz or datetime.timezone - r"^offset must be a timedelta strictly between" if sys.version_info.major >= 3 else r"\('absolute offset is too large', -5940\)", + expected_error_message, parse_datetime, "2018-01-01T00:00:00.00-99", ) @@ -359,6 +378,23 @@ def test_invalid_rfc3339_timestamps(self): parse_rfc3339(timestamp) +class PicklingTestCase(unittest.TestCase): + # Found as a result of https://github.com/movermeyer/backports.datetime_fromisoformat/issues/12 + def test_basic_pickle_and_copy(self): + dt = parse_datetime('2018-11-01 20:42:09') + dt2 = pickle.loads(pickle.dumps(dt)) + self.assertEqual(dt, dt2) + dt3 = copy.deepcopy(dt) + self.assertEqual(dt, dt3) + + # FixedOffset + dt = parse_datetime('2018-11-01 20:42:09+01:30') + dt2 = pickle.loads(pickle.dumps(dt)) + self.assertEqual(dt, dt2) + dt3 = copy.deepcopy(dt) + self.assertEqual(dt, dt3) + + class GithubIssueRegressionTestCase(unittest.TestCase): # These are test cases that were provided in GitHub issues submitted to ciso8601. # They are kept here as regression tests. diff --git a/timezone.c b/timezone.c new file mode 100644 index 0000000..8b4abdb --- /dev/null +++ b/timezone.c @@ -0,0 +1,239 @@ +/* This code was originally copied from Pendulum +(https://github.com/sdispater/pendulum/blob/13ff4a0250177f77e4ff2e7bd1f442d954e66b22/pendulum/parsing/_iso8601.c#L176) +Pendulum (like ciso8601) is MIT licensed, so we have included a copy of its +license here. +*/ + +/* +Copyright (c) 2015 SΓ©bastien Eustace + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include "timezone.h" + +#include +#include +#include + +#define SECS_PER_MIN 60 +#define SECS_PER_HOUR (60 * SECS_PER_MIN) +#define TWENTY_FOUR_HOURS_IN_SECONDS 86400 + +#define PY_VERSION_AT_LEAST_36 \ + ((PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 6) || PY_MAJOR_VERSION > 3) + +/* + * class FixedOffset(tzinfo): + */ +typedef struct { + // Seconds offset from UTC. + // Must be in range (-86400, 86400) seconds exclusive. + // ie. (-1440, 1440) minutes exclusive. + PyObject_HEAD int offset; +} FixedOffset; + +/* + * def __init__(self, offset): + * self.offset = offset + */ +static int +FixedOffset_init(FixedOffset *self, PyObject *args, PyObject *kwargs) +{ + int offset; + if (!PyArg_ParseTuple(args, "i", &offset)) + return -1; + + if (abs(offset) >= TWENTY_FOUR_HOURS_IN_SECONDS) { + PyErr_Format(PyExc_ValueError, + "offset must be an integer in the range (-86400, 86400), " + "exclusive"); + return -1; + } + + self->offset = offset; + return 0; +} + +/* + * def utcoffset(self, dt): + * return timedelta(seconds=self.offset * 60) + */ +static PyObject * +FixedOffset_utcoffset(FixedOffset *self, PyObject *args) +{ + return PyDelta_FromDSU(0, self->offset, 0); +} + +/* + * def dst(self, dt): + * return timedelta(seconds=self.offset * 60) + */ +static PyObject * +FixedOffset_dst(FixedOffset *self, PyObject *args) +{ + Py_RETURN_NONE; +} + +/* + * def tzname(self, dt): + * sign = '+' + * if self.offset < 0: + * sign = '-' + * return "%s%d:%d" % (sign, self.offset / 60, self.offset % 60) + */ +static PyObject * +FixedOffset_tzname(FixedOffset *self, PyObject *args) +{ + + int offset = self->offset; + + if (offset == 0){ +#if PY_VERSION_AT_LEAST_36 + return PyUnicode_FromString("UTC"); +#else + return PyUnicode_FromString("UTC+00:00"); +#endif + } else { + char result_tzname[10] = {0}; + char sign = '+'; + + if (offset < 0) { + sign = '-'; + offset *= -1; + } + snprintf(result_tzname, 10, "UTC%c%02u:%02u", sign, + (offset / SECS_PER_HOUR) & 31, + offset / SECS_PER_MIN % SECS_PER_MIN); + return PyUnicode_FromString(result_tzname); + } +} + +/* + * def __repr__(self): + * return self.tzname() + */ +static PyObject * +FixedOffset_repr(FixedOffset *self) +{ + return FixedOffset_tzname(self, NULL); +} + +/* + * def __getinitargs__(self): + * return (self.offset,) + */ +static PyObject * +FixedOffset_getinitargs(FixedOffset *self) +{ + PyObject *args = PyTuple_Pack(1, PyLong_FromLong(self->offset)); + return args; +} + +/* + * Class member / class attributes + */ +static PyMemberDef FixedOffset_members[] = { + {"offset", T_INT, offsetof(FixedOffset, offset), 0, "UTC offset"}, {NULL}}; + +/* + * Class methods + */ +static PyMethodDef FixedOffset_methods[] = { + {"utcoffset", (PyCFunction)FixedOffset_utcoffset, METH_VARARGS, ""}, + {"dst", (PyCFunction)FixedOffset_dst, METH_VARARGS, ""}, + {"tzname", (PyCFunction)FixedOffset_tzname, METH_VARARGS, ""}, + {"__getinitargs__", (PyCFunction)FixedOffset_getinitargs, METH_VARARGS, + ""}, + {NULL}}; + +static PyTypeObject FixedOffset_type = { + PyVarObject_HEAD_INIT(NULL, 0) "ciso8601.FixedOffset", /* tp_name */ + sizeof(FixedOffset), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_as_async */ + (reprfunc)FixedOffset_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + (reprfunc)FixedOffset_repr, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + "TZInfo with fixed offset", /* tp_doc */ +}; + +/* + * Instantiate new FixedOffset_type object + * Skip overhead of calling PyObject_New and PyObject_Init. + * Directly allocate object. + * Note that this also doesn't do any validation of the offset parameter. + * Callers must ensure that offset is within \ + * the range (-86400, 86400), exclusive. + */ +PyObject * +new_fixed_offset_ex(int offset, PyTypeObject *type) +{ + FixedOffset *self = (FixedOffset *)(type->tp_alloc(type, 0)); + + if (self != NULL) + self->offset = offset; + + return (PyObject *)self; +} + +PyObject * +new_fixed_offset(int offset) +{ + return new_fixed_offset_ex(offset, &FixedOffset_type); +} + +/* ------------------------------------------------------------- */ + +int +initialize_timezone_code(PyObject *module) +{ + PyDateTime_IMPORT; + FixedOffset_type.tp_new = PyType_GenericNew; + FixedOffset_type.tp_base = PyDateTimeAPI->TZInfoType; + FixedOffset_type.tp_methods = FixedOffset_methods; + FixedOffset_type.tp_members = FixedOffset_members; + FixedOffset_type.tp_init = (initproc)FixedOffset_init; + + if (PyType_Ready(&FixedOffset_type) < 0) + return -1; + + Py_INCREF(&FixedOffset_type); + if (PyModule_AddObject(module, "FixedOffset", + (PyObject *)&FixedOffset_type) < 0) { + Py_DECREF(module); + Py_DECREF(&FixedOffset_type); + return -1; + } + + return 0; +} diff --git a/timezone.h b/timezone.h new file mode 100644 index 0000000..dd0d829 --- /dev/null +++ b/timezone.h @@ -0,0 +1,12 @@ +#ifndef CISO_TZINFO_H +#define CISO_TZINFO_H + +#include + +PyObject * +new_fixed_offset(int offset); + +int +initialize_timezone_code(PyObject *module); + +#endif From cd7387b5b7cd44c1d600588b7f71bbbec1c7a579 Mon Sep 17 00:00:00 2001 From: Michael Overmeyer Date: Sat, 26 Jun 2021 21:21:07 -0400 Subject: [PATCH 4/4] Fix memory leak in cases where there are unused characters in the timestamp --- module.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/module.c b/module.c index ecf414c..b301fff 100644 --- a/module.c +++ b/module.c @@ -480,6 +480,8 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo, int rfc3339_only) /* Make sure that there is no more to parse. */ if (*c != '\0') { PyErr_Format(PyExc_ValueError, "unconverted data remains: '%s'", c); + if (tzinfo != Py_None && tzinfo != utc) + Py_DECREF(tzinfo); return NULL; }