From 8702405b7090d9234b185e154193e5b98aa1910f Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Sat, 25 Jan 2025 18:30:38 -0800 Subject: [PATCH 1/6] added n3 test to check for internal float normalization made as a separate commit to illustrate the old broken behavior priro to the fix in the next commit --- test/test_n3.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/test_n3.py b/test/test_n3.py index f3d7eeb07..fe25f6014 100644 --- a/test/test_n3.py +++ b/test/test_n3.py @@ -251,6 +251,32 @@ def test_empty_prefix(self): g2 ), "Document with declared empty prefix must match default #" + def test_float_no_norm(self): + import rdflib + _ps = rdflib.NORMALIZE_LITERALS + try: + bads = [] + for norm_lit in (True, False): + rdflib.NORMALIZE_LITERALS = norm_lit + g1 = Graph() + g1.parse(data=":a :b 1e10, 1e0 .", format="n3") + strep = [str(o) for o in g1.objects()] + if norm_lit: + if '1e10' not in strep and '1e0' not in strep: + pass + else: + bads.append(('NOT normalized when should have been', strep)) + else: + if '1e10' in strep and '1e0' in strep: + pass + else: + bads.append(('normalized when it should NOT have been', strep)) + + finally: + rdflib.NORMALIZE_LITERALS = _ps + + assert not bads, bads + class TestRegularExpressions: def test_exponents(self): From 66f626c169f7972c212797251256b85a2d73441b Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Sat, 25 Jan 2025 18:39:27 -0800 Subject: [PATCH 2/6] notation3.py: don't normalize float representation fix behavior of the n3 parser family to avoid normalizing raw float string representation which makes it impossible to roundtrip the exact original string representation of e.g. 1e10 --- rdflib/plugins/parsers/notation3.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index acc56215b..7dd87d50a 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -376,6 +376,10 @@ def unicodeExpand(m: Match) -> str: langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") +class sfloat(str): + """ don't normalize raw XSD.double string representation """ + + class SinkParser: def __init__( self, @@ -1522,7 +1526,7 @@ def nodeOrLiteral(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: m = exponent_syntax.match(argstr, i) if m: j = m.end() - res.append(float(argstr[i:j])) + res.append(sfloat(argstr[i:j])) return j m = decimal_syntax.match(argstr, i) @@ -1913,7 +1917,7 @@ def normalise(self, f: Formula | Graph | None, n: int) -> Literal: ... def normalise(self, f: Formula | Graph | None, n: Decimal) -> Literal: ... @overload - def normalise(self, f: Formula | Graph | None, n: float) -> Literal: ... + def normalise(self, f: Formula | Graph | None, n: sfloat) -> Literal: ... @overload def normalise(self, f: Formula | Graph | None, n: Node) -> Node: ... @@ -1921,7 +1925,7 @@ def normalise(self, f: Formula | Graph | None, n: Node) -> Node: ... def normalise( self, f: Formula | Graph | None, - n: Union[tuple[int, str], bool, int, Decimal, float, Node, _AnyT], + n: Union[tuple[int, str], bool, int, Decimal, sfloat, Node, _AnyT], ) -> Union[URIRef, Literal, BNode, Node, _AnyT]: if isinstance(n, tuple): return URIRef(str(n[1])) @@ -1941,7 +1945,7 @@ def normalise( s = Literal(value, datatype=DECIMAL_DATATYPE) return s - if isinstance(n, float): + if isinstance(n, sfloat): s = Literal(str(n), datatype=DOUBLE_DATATYPE) return s From 296bc7760ee0e61b23653c90126d0ad64a9b6aec Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 12 Sep 2025 06:43:36 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rdflib/plugins/parsers/notation3.py | 2 +- test/test_n3.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index ddaf1ce2e..d9c2e7c0b 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -385,7 +385,7 @@ def unicodeExpand(m: Match) -> str: class sfloat(str): - """ don't normalize raw XSD.double string representation """ + """don't normalize raw XSD.double string representation""" class SinkParser: diff --git a/test/test_n3.py b/test/test_n3.py index 518c190d6..433fcf578 100644 --- a/test/test_n3.py +++ b/test/test_n3.py @@ -253,6 +253,7 @@ def test_empty_prefix(self): def test_float_no_norm(self): import rdflib + _ps = rdflib.NORMALIZE_LITERALS try: bads = [] @@ -262,15 +263,15 @@ def test_float_no_norm(self): g1.parse(data=":a :b 1e10, 1e0 .", format="n3") strep = [str(o) for o in g1.objects()] if norm_lit: - if '1e10' not in strep and '1e0' not in strep: + if "1e10" not in strep and "1e0" not in strep: pass else: - bads.append(('NOT normalized when should have been', strep)) + bads.append(("NOT normalized when should have been", strep)) else: - if '1e10' in strep and '1e0' in strep: + if "1e10" in strep and "1e0" in strep: pass else: - bads.append(('normalized when it should NOT have been', strep)) + bads.append(("normalized when it should NOT have been", strep)) finally: rdflib.NORMALIZE_LITERALS = _ps From 720c74e4fa6bd3d951aa16315fef7d7f13e7bd30 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Fri, 12 Sep 2025 16:49:25 +1000 Subject: [PATCH 4/6] style: add noqa to sfloat class --- rdflib/plugins/parsers/notation3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index d9c2e7c0b..b0c0db727 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -384,7 +384,7 @@ def unicodeExpand(m: Match) -> str: langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") -class sfloat(str): +class sfloat(str): # noqa: N801 """don't normalize raw XSD.double string representation""" From 854dc6eb47259760c3c9258bfb916f96e493bfc6 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Fri, 12 Sep 2025 16:53:25 +1000 Subject: [PATCH 5/6] chore: remove unused mypy type ignore --- rdflib/plugins/parsers/notation3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index b0c0db727..882d08664 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -1969,7 +1969,7 @@ def normalise( # f.universals[n] = f.newBlankNode() # return f.universals[n] # type error: Incompatible return value type (got "Union[int, _AnyT]", expected "Union[URIRef, Literal, BNode, _AnyT]") [return-value] - return n # type: ignore[return-value] + return n def intern(self, something: _AnyT) -> _AnyT: return something From 06bf1b10466a12c3d22c070e44e786ea155013c7 Mon Sep 17 00:00:00 2001 From: Edmond Chuc Date: Thu, 18 Sep 2025 10:57:28 +1000 Subject: [PATCH 6/6] test: refactor test_float_no_norm to use pytest parametrization --- test/test_n3.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/test/test_n3.py b/test/test_n3.py index 433fcf578..40f871868 100644 --- a/test/test_n3.py +++ b/test/test_n3.py @@ -251,32 +251,22 @@ def test_empty_prefix(self): g2 ), "Document with declared empty prefix must match default #" - def test_float_no_norm(self): + @pytest.mark.parametrize( + "do_normalize_literal, expected_result", + [(True, {"1.0", "10000000000.0"}), (False, {"1e10", "1e0"})], + ) + def test_float_no_norm(self, do_normalize_literal, expected_result): import rdflib - _ps = rdflib.NORMALIZE_LITERALS + original_normalize_literal = rdflib.NORMALIZE_LITERALS try: - bads = [] - for norm_lit in (True, False): - rdflib.NORMALIZE_LITERALS = norm_lit - g1 = Graph() - g1.parse(data=":a :b 1e10, 1e0 .", format="n3") - strep = [str(o) for o in g1.objects()] - if norm_lit: - if "1e10" not in strep and "1e0" not in strep: - pass - else: - bads.append(("NOT normalized when should have been", strep)) - else: - if "1e10" in strep and "1e0" in strep: - pass - else: - bads.append(("normalized when it should NOT have been", strep)) - + rdflib.NORMALIZE_LITERALS = do_normalize_literal + g1 = Graph() + g1.parse(data=":a :b 1e10, 1e0 .", format="n3") + values = set(str(o) for o in g1.objects()) + assert values == expected_result finally: - rdflib.NORMALIZE_LITERALS = _ps - - assert not bads, bads + rdflib.NORMALIZE_LITERALS = original_normalize_literal class TestRegularExpressions: