From 00d37dd8f09ff56b8668dfaff9900143ebd3bcfc Mon Sep 17 00:00:00 2001 From: Lukas Plank <128675670+lu-pl@users.noreply.github.com> Date: Sat, 31 May 2025 11:51:02 +0200 Subject: [PATCH 1/2] fix: remove Literal.toPython date conversion for gYear/gYearMonth (#3115) * fix: remove Literal.toPython casting for gYear and gYearMonth Issue #3078 reports, that rdflib.Literal.toPython casting of xsd:gYear and xsd:gYearMonth to datetime objects is not possible, as there is no appropriate Python equivalence for those types. The current implementation casts xsd:gYear and xsd:gYearMonth to datetime objects assuming January 1st for xsd:gYear and the 1st day of the given month for xsd:gYearMonth. This is plain wrong. The change removes casting to datetime objects in rdflib.Literal.toPython for xsd:gYear and xsd:gYearMonth. Closes #3078 . * test: adapt rdflib.Literal tests to gYear/gYearMonth toPython change --------- Co-authored-by: Nicholas Car --- rdflib/term.py | 4 -- rdflib/xsd_datetime.py | 73 ------------------------------- test/test_literal/test_literal.py | 26 ++++++----- 3 files changed, 14 insertions(+), 89 deletions(-) diff --git a/rdflib/term.py b/rdflib/term.py index 3e397674b0..3e1b56ed6e 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -76,8 +76,6 @@ parse_time, parse_xsd_date, parse_xsd_duration, - parse_xsd_gyear, - parse_xsd_gyearmonth, ) if TYPE_CHECKING: @@ -2065,8 +2063,6 @@ def _castPythonToLiteral( # noqa: N802 None: None, # plain literals map directly to value space URIRef(_XSD_PFX + "time"): parse_time, URIRef(_XSD_PFX + "date"): parse_xsd_date, - URIRef(_XSD_PFX + "gYear"): parse_xsd_gyear, - URIRef(_XSD_PFX + "gYearMonth"): parse_xsd_gyearmonth, URIRef(_XSD_PFX + "dateTime"): parse_datetime, URIRef(_XSD_PFX + "duration"): parse_xsd_duration, URIRef(_XSD_PFX + "dayTimeDuration"): parse_xsd_duration, diff --git a/rdflib/xsd_datetime.py b/rdflib/xsd_datetime.py index bc3bebd67c..e05dd3c137 100644 --- a/rdflib/xsd_datetime.py +++ b/rdflib/xsd_datetime.py @@ -593,79 +593,6 @@ def parse_xsd_date(date_string: str): return parse_date(date_string if not minus else ("-" + date_string)) -def parse_xsd_gyear(gyear_string: str): - """ - XSD gYear has more features than ISO8601 dates, specifically - XSD allows timezones on a gYear, that must be stripped off. - """ - if gyear_string.endswith("Z") or gyear_string.endswith("z"): - gyear_string = gyear_string[:-1] - if gyear_string.startswith("-"): - gyear_string = gyear_string[1:] - minus = True - else: - minus = False - has_plus = gyear_string.rfind("+") - if has_plus > 0: - # Drop the +07:00 timezone part - gyear_string = gyear_string[:has_plus] - else: - split_parts = gyear_string.rsplit("-", 1) - if len(split_parts) > 1 and ":" in split_parts[-1]: - # Drop the -09:00 timezone part - gyear_string = split_parts[0] - if len(gyear_string) < 4: - raise ValueError("gYear string must be at least 4 numerals in length") - gyear_string = gyear_string.lstrip("0") # strip all leading zeros - try: - y = int(gyear_string if not minus else ("-" + gyear_string)) - except ValueError: - raise ValueError("gYear string must be a valid integer") - return date(y, 1, 1) - - -def parse_xsd_gyearmonth(gym_string: str): - """ - XSD gYearMonth has more features than ISO8601 dates, specifically - XSD allows timezones on a gYearMonth, that must be stripped off. - """ - if gym_string.endswith("Z") or gym_string.endswith("z"): - gym_string = gym_string[:-1] - if gym_string.startswith("-"): - gym_string = gym_string[1:] - minus = True - else: - minus = False - has_plus = gym_string.rfind("+") - if has_plus > 0: - # Drop the +07:00 timezone part - gym_string = gym_string[:has_plus] - else: - split_parts = gym_string.rsplit("-", 1) - if len(split_parts) > 1 and ":" in split_parts[-1]: - # Drop the -09:00 timezone part - gym_string = split_parts[0] - year_month_parts = gym_string.split("-", 1) - if len(year_month_parts) < 2: - raise ValueError("XSD gYearMonth string must contain one dash") - - if len(year_month_parts[0]) < 4: - raise ValueError("gYearMonth Year part must be at least 4 numerals in length") - elif len(year_month_parts[1]) < 2: - raise ValueError("gYearMonth Month part must be exactly 2 numerals in length") - year_string = year_month_parts[0].lstrip("0") # strip all leading zeros - month_string = year_month_parts[1].lstrip("0") # strip all leading zeros - try: - y = int(year_string if not minus else ("-" + year_string)) - except ValueError: - raise ValueError("gYearMonth Year part must be a valid integer") - try: - m = int(month_string) - except ValueError: - raise ValueError("gYearMonth Month part must be a valid integer") - return date(y, m, 1) - - # Parse XSD Datetime is the same as ISO8601 Datetime # It uses datetime.fromisoformat for python 3.11 and above # or isodate.parse_datetime for older versions diff --git a/test/test_literal/test_literal.py b/test/test_literal/test_literal.py index a28e67e12e..d25c6120fd 100644 --- a/test/test_literal/test_literal.py +++ b/test/test_literal/test_literal.py @@ -848,21 +848,23 @@ def unlexify(s: str) -> str: ("0000-00-00", XSD.date, None), ("NOT A VALID HEX STRING", XSD.hexBinary, None), ("NOT A VALID BASE64 STRING", XSD.base64Binary, None), + # xsd:gYear and xsd:gYearMonth also do not get converted + ("1921-05", XSD.gYearMonth, None), + ("0001-01", XSD.gYearMonth, None), + ("0001-12", XSD.gYearMonth, None), + ("2002-01", XSD.gYearMonth, None), + ("9999-01", XSD.gYearMonth, None), + ("9999-12", XSD.gYearMonth, None), + ("1921", XSD.gYear, None), + ("2000", XSD.gYear, None), + ("0001", XSD.gYear, None), + ("9999", XSD.gYear, None), + ("1982", XSD.gYear, None), + ("2002", XSD.gYear, None), + # these literals get converted to python types ("1921-05-01", XSD.date, datetime.date), ("1921-05-01T00:00:00", XSD.dateTime, datetime.datetime), - ("1921-05", XSD.gYearMonth, datetime.date), - ("0001-01", XSD.gYearMonth, datetime.date), - ("0001-12", XSD.gYearMonth, datetime.date), - ("2002-01", XSD.gYearMonth, datetime.date), - ("9999-01", XSD.gYearMonth, datetime.date), - ("9999-12", XSD.gYearMonth, datetime.date), - ("1921", XSD.gYear, datetime.date), - ("2000", XSD.gYear, datetime.date), - ("0001", XSD.gYear, datetime.date), - ("9999", XSD.gYear, datetime.date), - ("1982", XSD.gYear, datetime.date), - ("2002", XSD.gYear, datetime.date), ("1921-05-01T00:00:00+00:30", XSD.dateTime, datetime.datetime), ("1921-05-01T00:00:00-00:30", XSD.dateTime, datetime.datetime), ("true", XSD.boolean, bool), From 6b8b4940b36a28b87f1605cdbcac3f6f4e1e5275 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Tue, 14 Oct 2025 15:53:39 +1000 Subject: [PATCH 2/2] style: apply black formatting --- test/test_literal/test_literal.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_literal/test_literal.py b/test/test_literal/test_literal.py index d25c6120fd..7e31fb0401 100644 --- a/test/test_literal/test_literal.py +++ b/test/test_literal/test_literal.py @@ -861,7 +861,6 @@ def unlexify(s: str) -> str: ("9999", XSD.gYear, None), ("1982", XSD.gYear, None), ("2002", XSD.gYear, None), - # these literals get converted to python types ("1921-05-01", XSD.date, datetime.date), ("1921-05-01T00:00:00", XSD.dateTime, datetime.datetime),