diff --git a/README.rst b/README.rst index 6bae91d..89901a8 100644 --- a/README.rst +++ b/README.rst @@ -12,10 +12,12 @@ ciso8601 :target: https://pypi.org/project/ciso8601/ ``ciso8601`` converts `ISO 8601`_ date time strings into Python datetime objects. + Since it's written as a C module, it is much faster than other Python libraries. Tested with Python 2.7, 3.4, 3.5, 3.6, 3.7b. .. _ISO 8601: https://en.wikipedia.org/wiki/ISO_8601 +.. _RFC 3339: https://tools.ietf.org/html/rfc3339 (Interested in working on projects like this? `Close.io`_ is looking for `great engineers`_ to join our team) @@ -208,9 +210,9 @@ Time Formats Times are optional and are separated from the date by the letter ``T``. -Consistent with `RFC 3339`_, ``ciso860`` also allows either a space character, or a lower-case ``t``, to be used instead of a ``T``. +Consistent with `RFC 3339`__, ``ciso860`` also allows either a space character, or a lower-case ``t``, to be used instead of a ``T``. -.. _RFC 3339: https://stackoverflow.com/questions/522251/whats-the-difference-between-iso-8601-and-rfc-3339-date-formats) +__ https://stackoverflow.com/questions/522251/whats-the-difference-between-iso-8601-and-rfc-3339-date-formats The following time formats are supported: @@ -258,6 +260,18 @@ While the ISO 8601 specification allows the use of MINUS SIGN (U+2212) in the ti Consistent with `RFC 3339`_, ``ciso860`` also allows a lower-case ``z`` to be used instead of a ``Z``. +Strict RFC 3339 Parsing +----------------------- + +``ciso8601`` parses ISO 8601 datetimes, which can be thought of as a superset of `RFC 3339`_ (`roughly`_). In cases where you might want strict RFC 3339 parsing, ``ciso8601`` offers a ``parse_rfc3339`` method, which behaves in a similar manner to ``parse_datetime``: + +.. _roughly: https://stackoverflow.com/questions/522251/whats-the-difference-between-iso-8601-and-rfc-3339-date-formats + +``parse_rfc3339(dt: String): datetime`` is a function that takes a string and either: + +* Returns a properly parsed Python datetime, **if and only if** the **entire** string conforms to RFC 3339. +* Raises a ``ValueError`` with a description of the reason why the string doesn't conform to RFC 3339. + Ignoring Timezone Information While Parsing ------------------------------------------- diff --git a/ciso8601/__init__.pyi b/ciso8601/__init__.pyi index be7f8a1..e51f055 100644 --- a/ciso8601/__init__.pyi +++ b/ciso8601/__init__.pyi @@ -1,4 +1,5 @@ from datetime import datetime def parse_datetime(datetime_string: str) -> datetime: ... +def parse_rfc3339(datetime_string: str) -> datetime: ... def parse_datetime_as_naive(datetime_string: str) -> datetime: ... diff --git a/module.c b/module.c index d0bac49..1f58d68 100644 --- a/module.c +++ b/module.c @@ -80,10 +80,10 @@ format_unexpected_character_exception(char *field_name, char c, size_t index, #define IS_TIME_SEPARATOR (*c == ':') #define IS_TIME_ZONE_SEPARATOR \ (*c == 'Z' || *c == '-' || *c == '+' || *c == 'z') -#define IS_FRACTIONAL_SEPARATOR (*c == '.' || *c == ',') +#define IS_FRACTIONAL_SEPARATOR (*c == '.' || (*c == ',' && !rfc3339_only)) static PyObject * -_parse(PyObject *self, PyObject *args, int parse_any_tzinfo) +_parse(PyObject *self, PyObject *args, int parse_any_tzinfo, int rfc3339_only) { PyObject *obj; PyObject *tzinfo = Py_None; @@ -131,10 +131,20 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo) /* Day */ PARSE_INTEGER(day, 2, "day") } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "Datetime string not in RFC 3339 format."); + return NULL; + } else { day = 1; } } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "Datetime string not in RFC 3339 format."); + return NULL; + } else { /* Non-separated Month and Day (ie. MMDD) */ /* Month */ PARSE_INTEGER(month, 2, "month") @@ -234,6 +244,18 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo) PARSE_FRACTIONAL_SECOND() } } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "RFC 3339 requires the second to be " + "specified."); + return NULL; + } + } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "Colons separating time components are " + "mandatory in RFC 3339."); + return NULL; } else { /* Non-separated Minute and Second (ie. mmss) */ /* Minute */ @@ -251,11 +273,23 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo) } } } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "Minute and second are mandatory in RFC 3339"); + return NULL; + } if (hour == 24 && minute == 0 && second == 0 && usecond == 0) { /* Special case of 24:00:00, that is allowed in ISO 8601. It is * equivalent to 00:00:00 the following day */ + if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "An hour value of 24, while sometimes legal " + "in ISO 8601, is explicitly forbidden by RFC " + "3339."); + return NULL; + } hour = 0, minute = 0, second = 0, usecond = 0; time_is_midnight = 1; } @@ -298,6 +332,12 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo) /* tz minute */ PARSE_INTEGER(tzminute, 2, "tz minute") } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "Separator between hour and minute in UTC " + "offset is mandatory in RFC 3339"); + return NULL; + } else if (*c != '\0') { /* Optional tz minute */ PARSE_INTEGER(tzminute, 2, "tz minute") } @@ -348,6 +388,16 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo) } } } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "UTC offset is mandatory in RFC 3339 format."); + return NULL; + } + } + else if (rfc3339_only) { + PyErr_SetString(PyExc_ValueError, + "Time is mandatory in RFC 3339 format."); + return NULL; } /* Make sure that there is no more to parse. */ @@ -377,13 +427,19 @@ _parse(PyObject *self, PyObject *args, int parse_any_tzinfo) static PyObject * parse_datetime_as_naive(PyObject *self, PyObject *args) { - return _parse(self, args, 0); + return _parse(self, args, 0, 0); } static PyObject * parse_datetime(PyObject *self, PyObject *args) { - return _parse(self, args, 1); + return _parse(self, args, 1, 0); +} + +static PyObject * +parse_rfc3339(PyObject *self, PyObject *args) +{ + return _parse(self, args, 1, 1); } static PyMethodDef CISO8601Methods[] = { @@ -391,6 +447,8 @@ static PyMethodDef CISO8601Methods[] = { "Parse a ISO8601 date time string."}, {"parse_datetime_as_naive", parse_datetime_as_naive, METH_VARARGS, "Parse a ISO8601 date time string, ignoring the time zone component."}, + {"parse_rfc3339", parse_rfc3339, METH_VARARGS, + "Parse an RFC 3339 date time string."}, {NULL, NULL, 0, NULL}}; #if PY_MAJOR_VERSION >= 3 diff --git a/tests.py b/tests.py index 8670099..b30650c 100644 --- a/tests.py +++ b/tests.py @@ -264,6 +264,51 @@ def test_invalid_tz_offsets_too_large(self): ) +class Rfc3339TestCase(unittest.TestCase): + def test_valid_rfc3339_timestamps(self): + """ + Validate that valid RFC 3339 datetimes are parseable by parse_rfc3339 + and produce the same result as parse_datetime. + """ + for string in [ + '2018-01-02T03:04:05Z', + '2018-01-02t03:04:05z', + '2018-01-02 03:04:05z', + '2018-01-02T03:04:05+00:00', + '2018-01-02T03:04:05-00:00', + '2018-01-02T03:04:05.12345Z', + '2018-01-02T03:04:05+01:23', + '2018-01-02T03:04:05-12:34', + '2018-01-02T03:04:05-12:34', + ]: + self.assertEqual(ciso8601.parse_datetime(string), + ciso8601.parse_rfc3339(string)) + + def test_invalid_rfc3339_timestamps(self): + """ + Validate that datetime strings that are valid ISO 8601 but invalid RFC + 3339 trigger a ValueError when passed to RFC 3339, and that this + ValueError explicitly mentions RFC 3339. + """ + for timestamp in [ + "2018-01-02", # Missing mandatory time + "2018-01-02T03", # Missing mandatory minute and second + "2018-01-02T03Z", # Missing mandatory minute and second + "2018-01-02T03:04", # Missing mandatory minute and second + "2018-01-02T03:04Z", # Missing mandatory minute and second + "2018-01-02T03:04:01+04", # Missing mandatory offset minute + "2018-01-02T03:04:05", # Missing mandatory offset + "2018-01-02T03:04:05.12345", # Missing mandatory offset + "2018-01-02T24:00:00Z", # 24:00:00 is not valid in RFC 3339 + '20180102T03:04:05-12:34', # Missing mandatory date separators + '2018-01-02T030405-12:34', # Missing mandatory time separators + '2018-01-02T03:04:05-1234', # Missing mandatory offset separator + '2018-01-02T03:04:05,12345Z' # Invalid comma fractional second separator + ]: + with self.assertRaisesRegex(ValueError, r"RFC 3339", msg="Timestamp '{0}' was supposed to be invalid, but parsing it didn't raise ValueError.".format(timestamp)): + ciso8601.parse_rfc3339(timestamp) + + class GithubIssueRegressionTestCase(unittest.TestCase): # These are test cases that were provided in GitHub issues submitted to ciso8601. # They are kept here as regression tests.