From 0d19ad4be18a56c75b5267cc6cdbcf5bd43924d1 Mon Sep 17 00:00:00 2001
From: Jeff Lerman <jeff@sironamedical.com>
Date: Sat, 11 Mar 2023 20:48:27 -0800
Subject: [PATCH 1/3] add type-hints; including minor refactor of
 prettify_parsetree() to improve type-hinting

---
 rdflib/plugins/sparql/parser.py      | 25 ++++++++++++--------
 rdflib/plugins/sparql/parserutils.py | 35 +++++++++++++++++-----------
 test/test_sparql/test_sparql.py      | 24 ++++++++++++++++++-
 3 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/rdflib/plugins/sparql/parser.py b/rdflib/plugins/sparql/parser.py
index 2a897f822..8989cbfac 100644
--- a/rdflib/plugins/sparql/parser.py
+++ b/rdflib/plugins/sparql/parser.py
@@ -6,6 +6,9 @@
 
 import re
 import sys
+from typing import Any
+from typing import Optional as OptionalType
+from typing import TextIO, Tuple, Union
 
 from pyparsing import CaselessKeyword as Keyword  # watch out :)
 from pyparsing import (
@@ -37,15 +40,15 @@
 # ---------------- ACTIONS
 
 
-def neg(literal):
+def neg(literal) -> rdflib.Literal:
     return rdflib.Literal(-literal, datatype=literal.datatype)
 
 
-def setLanguage(terms):
+def setLanguage(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal:
     return rdflib.Literal(terms[0], lang=terms[1])
 
 
-def setDataType(terms):
+def setDataType(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal:
     return rdflib.Literal(terms[0], datatype=terms[1])
 
 
@@ -1508,25 +1511,27 @@ def expandCollection(terms):
 UpdateUnit.ignore("#" + restOfLine)
 
 
-expandUnicodeEscapes_re = re.compile(r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I)
+expandUnicodeEscapes_re: re.Pattern = re.compile(
+    r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I
+)
 
 
-def expandUnicodeEscapes(q):
+def expandUnicodeEscapes(q: str) -> str:
     r"""
     The syntax of the SPARQL Query Language is expressed over code points in Unicode [UNICODE]. The encoding is always UTF-8 [RFC3629].
     Unicode code points may also be expressed using an \ uXXXX (U+0 to U+FFFF) or \ UXXXXXXXX syntax (for U+10000 onwards) where X is a hexadecimal digit [0-9A-F]
     """
 
-    def expand(m):
+    def expand(m: re.Match) -> str:
         try:
             return chr(int(m.group(1), 16))
-        except:  # noqa: E722
-            raise Exception("Invalid unicode code point: " + m)
+        except (ValueError, OverflowError) as e:
+            raise ValueError("Invalid unicode code point: " + m.group(1)) from e
 
     return expandUnicodeEscapes_re.sub(expand, q)
 
 
-def parseQuery(q):
+def parseQuery(q: Union[str, bytes, TextIO]) -> ParseResults:
     if hasattr(q, "read"):
         q = q.read()
     if isinstance(q, bytes):
@@ -1536,7 +1541,7 @@ def parseQuery(q):
     return Query.parseString(q, parseAll=True)
 
 
-def parseUpdate(q):
+def parseUpdate(q: Union[str, bytes, TextIO]):
     if hasattr(q, "read"):
         q = q.read()
 
diff --git a/rdflib/plugins/sparql/parserutils.py b/rdflib/plugins/sparql/parserutils.py
index 1f2e88eaf..09f19ff8b 100644
--- a/rdflib/plugins/sparql/parserutils.py
+++ b/rdflib/plugins/sparql/parserutils.py
@@ -1,10 +1,11 @@
 from collections import OrderedDict
 from types import MethodType
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, List, Tuple, Union
 
 from pyparsing import ParseResults, TokenConverter, originalTextFor
 
 from rdflib import BNode, Variable
+from rdflib.term import Identifier
 
 if TYPE_CHECKING:
     from rdflib.plugins.sparql.sparql import FrozenBindings
@@ -252,26 +253,34 @@ def setEvalFn(self, evalfn):
         return self
 
 
-def prettify_parsetree(t, indent="", depth=0):
-    out = []
-    if isinstance(t, ParseResults):
-        for e in t.asList():
-            out.append(prettify_parsetree(e, indent, depth + 1))
-        for k, v in sorted(t.items()):
-            out.append("%s%s- %s:\n" % (indent, "  " * depth, k))
-            out.append(prettify_parsetree(v, indent, depth + 1))
-    elif isinstance(t, CompValue):
+def prettify_parsetree(t: ParseResults, indent: str = "", depth: int = 0) -> str:
+    out: List[str] = []
+    for e in t.asList():
+        out.append(_prettify_sub_parsetree(e, indent, depth + 1))
+    for k, v in sorted(t.items()):
+        out.append("%s%s- %s:\n" % (indent, "  " * depth, k))
+        out.append(_prettify_sub_parsetree(v, indent, depth + 1))
+    return "".join(out)
+
+
+def _prettify_sub_parsetree(
+    t: Union[Identifier, CompValue, set, list, dict, Tuple, bool, None],
+    indent: str = "",
+    depth: int = 0,
+) -> str:
+    out: List[str] = []
+    if isinstance(t, CompValue):
         out.append("%s%s> %s:\n" % (indent, "  " * depth, t.name))
         for k, v in t.items():
             out.append("%s%s- %s:\n" % (indent, "  " * (depth + 1), k))
-            out.append(prettify_parsetree(v, indent, depth + 2))
+            out.append(_prettify_sub_parsetree(v, indent, depth + 2))
     elif isinstance(t, dict):
         for k, v in t.items():
             out.append("%s%s- %s:\n" % (indent, "  " * (depth + 1), k))
-            out.append(prettify_parsetree(v, indent, depth + 2))
+            out.append(_prettify_sub_parsetree(v, indent, depth + 2))
     elif isinstance(t, list):
         for e in t:
-            out.append(prettify_parsetree(e, indent, depth + 1))
+            out.append(_prettify_sub_parsetree(e, indent, depth + 1))
     else:
         out.append("%s%s- %r\n" % (indent, "  " * depth, t))
     return "".join(out)
diff --git a/test/test_sparql/test_sparql.py b/test/test_sparql/test_sparql.py
index 02406bdf9..28669c604 100644
--- a/test/test_sparql/test_sparql.py
+++ b/test/test_sparql/test_sparql.py
@@ -16,7 +16,7 @@
 from rdflib.plugins.sparql.algebra import translateQuery
 from rdflib.plugins.sparql.evaluate import evalPart
 from rdflib.plugins.sparql.evalutils import _eval
-from rdflib.plugins.sparql.parser import parseQuery
+from rdflib.plugins.sparql.parser import expandUnicodeEscapes, parseQuery
 from rdflib.plugins.sparql.parserutils import prettify_parsetree
 from rdflib.plugins.sparql.sparql import SPARQLError
 from rdflib.query import Result, ResultRow
@@ -957,3 +957,25 @@ def test_sparql_describe(
     subjects = {s for s in r.graph.subjects() if not isinstance(s, BNode)}
     assert subjects == expected_subjects
     assert len(r.graph) == expected_size
+
+
+@pytest.mark.parametrize(
+    "arg, expected_result, expected_valid",
+    [
+        ("abc", "abc", True),
+        ("1234", "1234", True),
+        (r"1234\u0050", "1234P", True),
+        (r"1234\u00e3", "1234\u00e3", True),
+        (r"1234\u00e3\u00e5", "1234ãå", True),
+        (r"1234\u900000e5", "", False),
+        (r"1234\u010000e5", "", False),
+        (r"1234\u001000e5", "1234\U001000e5", True),
+    ],
+)
+def test_expandUnicodeEscapes(arg: str, expected_result: str, expected_valid: bool):
+    if expected_valid:
+        actual_result = expandUnicodeEscapes(arg)
+        assert actual_result == expected_result
+    else:
+        with pytest.raises(ValueError, match="Invalid unicode code point"):
+            _ = expandUnicodeEscapes(arg)

From c05f07e034e6b7001a1cd40b27b3d797856626c4 Mon Sep 17 00:00:00 2001
From: Jeff Lerman <jeff@sironamedical.com>
Date: Sun, 12 Mar 2023 05:00:10 -0700
Subject: [PATCH 2/3] eliminate use of file intermediary in translateAlgebra()

 - moves the functionality into a class, so that objects can maintain state without use of a file
---
 rdflib/plugins/sparql/algebra.py | 450 ++++++++++++++++++-------------
 1 file changed, 260 insertions(+), 190 deletions(-)

diff --git a/rdflib/plugins/sparql/algebra.py b/rdflib/plugins/sparql/algebra.py
index 5fd9e59bc..a4bfd2270 100644
--- a/rdflib/plugins/sparql/algebra.py
+++ b/rdflib/plugins/sparql/algebra.py
@@ -955,31 +955,34 @@ class ExpressionNotCoveredException(Exception):  # noqa: N818
     pass
 
 
-def translateAlgebra(query_algebra: Query) -> str:
-    """
+class AlgebraTranslator:
+    """Translator of a Query's algebra to its equivalent SPARQL (string).
 
-    :param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree).
-    :return: The query form generated from the SPARQL 1.1 algebra tree for select queries.
+    Coded as a class to support storage of state during the translation process, without
+    use of a file.
 
-    """
-    import os
+    Anticipated Usage: translated_query = AlgebraTranslator(query).translateAlgebra()
 
-    def overwrite(text: str):
-        file = open("query.txt", "w+")
-        file.write(text)
-        file.close()
+    An external convenience function which wraps the above call, algebra.translateAlgebra(),
+    is supplied, so this class does not need to be referenced by client code at all in
+    normal use.
+    """
 
-    def replace(
-        old,
-        new,
+    def __init__(self, query_algebra: Query):
+        self.query_algebra = query_algebra
+        self.aggr_vars: DefaultDict[
+            Identifier, List[Identifier]
+        ] = collections.defaultdict(list)
+        self._alg_translation: str = ""
+
+    def _replace(
+        self,
+        old: str,
+        new: str,
         search_from_match: str = None,
         search_from_match_occurrence: int = None,
         count: int = 1,
     ):
-        # Read in the file
-        with open("query.txt", "r") as file:
-            filedata = file.read()
-
         def find_nth(haystack, needle, n):
             start = haystack.lower().find(needle)
             while start >= 0 and n > 1:
@@ -989,27 +992,21 @@ def find_nth(haystack, needle, n):
 
         if search_from_match and search_from_match_occurrence:
             position = find_nth(
-                filedata, search_from_match, search_from_match_occurrence
+                self._alg_translation, search_from_match, search_from_match_occurrence
             )
-            filedata_pre = filedata[:position]
-            filedata_post = filedata[position:].replace(old, new, count)
-            filedata = filedata_pre + filedata_post
+            filedata_pre = self._alg_translation[:position]
+            filedata_post = self._alg_translation[position:].replace(old, new, count)
+            self._alg_translation = filedata_pre + filedata_post
         else:
-            filedata = filedata.replace(old, new, count)
-
-        # Write the file out again
-        with open("query.txt", "w") as file:
-            file.write(filedata)
-
-    aggr_vars: DefaultDict[Identifier, List[Identifier]] = collections.defaultdict(list)
+            self._alg_translation = self._alg_translation.replace(old, new, count)
 
     def convert_node_arg(
-        node_arg: typing.Union[Identifier, CompValue, Expr, str]
+        self, node_arg: typing.Union[Identifier, CompValue, Expr, str]
     ) -> str:
         if isinstance(node_arg, Identifier):
-            if node_arg in aggr_vars.keys():
+            if node_arg in self.aggr_vars.keys():
                 # type error: "Identifier" has no attribute "n3"
-                grp_var = aggr_vars[node_arg].pop(0).n3()  # type: ignore[attr-defined]
+                grp_var = self.aggr_vars[node_arg].pop(0).n3()  # type: ignore[attr-defined]
                 return grp_var
             else:
                 # type error: "Identifier" has no attribute "n3"
@@ -1025,7 +1022,7 @@ def convert_node_arg(
                 "The expression {0} might not be covered yet.".format(node_arg)
             )
 
-    def sparql_query_text(node):
+    def sparql_query_text(self, node):
         """
          https://www.w3.org/TR/sparql11-query/#sparqlSyntax
 
@@ -1036,7 +1033,7 @@ def sparql_query_text(node):
         if isinstance(node, CompValue):
             # 18.2 Query Forms
             if node.name == "SelectQuery":
-                overwrite("-*-SELECT-*- " + "{" + node.p.name + "}")
+                self._alg_translation = "-*-SELECT-*- " + "{" + node.p.name + "}"
 
             # 18.2 Graph Patterns
             elif node.name == "BGP":
@@ -1046,18 +1043,20 @@ def sparql_query_text(node):
                     triple[0].n3() + " " + triple[1].n3() + " " + triple[2].n3() + "."
                     for triple in node.triples
                 )
-                replace("{BGP}", triples)
+                self._replace("{BGP}", triples)
                 # The dummy -*-SELECT-*- is placed during a SelectQuery or Multiset pattern in order to be able
                 # to match extended variables in a specific Select-clause (see "Extend" below)
-                replace("-*-SELECT-*-", "SELECT", count=-1)
+                self._replace("-*-SELECT-*-", "SELECT", count=-1)
                 # If there is no "Group By" clause the placeholder will simply be deleted. Otherwise there will be
                 # no matching {GroupBy} placeholder because it has already been replaced by "group by variables"
-                replace("{GroupBy}", "", count=-1)
-                replace("{Having}", "", count=-1)
+                self._replace("{GroupBy}", "", count=-1)
+                self._replace("{Having}", "", count=-1)
             elif node.name == "Join":
-                replace("{Join}", "{" + node.p1.name + "}{" + node.p2.name + "}")  #
+                self._replace(
+                    "{Join}", "{" + node.p1.name + "}{" + node.p2.name + "}"
+                )  #
             elif node.name == "LeftJoin":
-                replace(
+                self._replace(
                     "{LeftJoin}",
                     "{" + node.p1.name + "}OPTIONAL{{" + node.p2.name + "}}",
                 )
@@ -1071,35 +1070,39 @@ def sparql_query_text(node):
                 if node.p:
                     # Filter with p=AggregateJoin = Having
                     if node.p.name == "AggregateJoin":
-                        replace("{Filter}", "{" + node.p.name + "}")
-                        replace("{Having}", "HAVING({" + expr + "})")
+                        self._replace("{Filter}", "{" + node.p.name + "}")
+                        self._replace("{Having}", "HAVING({" + expr + "})")
                     else:
-                        replace(
+                        self._replace(
                             "{Filter}", "FILTER({" + expr + "}) {" + node.p.name + "}"
                         )
                 else:
-                    replace("{Filter}", "FILTER({" + expr + "})")
+                    self._replace("{Filter}", "FILTER({" + expr + "})")
 
             elif node.name == "Union":
-                replace(
+                self._replace(
                     "{Union}", "{{" + node.p1.name + "}}UNION{{" + node.p2.name + "}}"
                 )
             elif node.name == "Graph":
                 expr = "GRAPH " + node.term.n3() + " {{" + node.p.name + "}}"
-                replace("{Graph}", expr)
+                self._replace("{Graph}", expr)
             elif node.name == "Extend":
-                query_string = open("query.txt", "r").read().lower()
+                query_string = self._alg_translation.lower()
                 select_occurrences = query_string.count("-*-select-*-")
-                replace(
+                self._replace(
                     node.var.n3(),
-                    "(" + convert_node_arg(node.expr) + " as " + node.var.n3() + ")",
+                    "("
+                    + self.convert_node_arg(node.expr)
+                    + " as "
+                    + node.var.n3()
+                    + ")",
                     search_from_match="-*-select-*-",
                     search_from_match_occurrence=select_occurrences,
                 )
-                replace("{Extend}", "{" + node.p.name + "}")
+                self._replace("{Extend}", "{" + node.p.name + "}")
             elif node.name == "Minus":
                 expr = "{" + node.p1.name + "}MINUS{{" + node.p2.name + "}}"
-                replace("{Minus}", expr)
+                self._replace("{Minus}", expr)
             elif node.name == "Group":
                 group_by_vars = []
                 if node.expr:
@@ -1110,12 +1113,14 @@ def sparql_query_text(node):
                             raise ExpressionNotCoveredException(
                                 "This expression might not be covered yet."
                             )
-                    replace("{Group}", "{" + node.p.name + "}")
-                    replace("{GroupBy}", "GROUP BY " + " ".join(group_by_vars) + " ")
+                    self._replace("{Group}", "{" + node.p.name + "}")
+                    self._replace(
+                        "{GroupBy}", "GROUP BY " + " ".join(group_by_vars) + " "
+                    )
                 else:
-                    replace("{Group}", "{" + node.p.name + "}")
+                    self._replace("{Group}", "{" + node.p.name + "}")
             elif node.name == "AggregateJoin":
-                replace("{AggregateJoin}", "{" + node.p.name + "}")
+                self._replace("{AggregateJoin}", "{" + node.p.name + "}")
                 for agg_func in node.A:
                     if isinstance(agg_func.res, Identifier):
                         identifier = agg_func.res.n3()
@@ -1123,14 +1128,14 @@ def sparql_query_text(node):
                         raise ExpressionNotCoveredException(
                             "This expression might not be covered yet."
                         )
-                    aggr_vars[agg_func.res].append(agg_func.vars)
+                    self.aggr_vars[agg_func.res].append(agg_func.vars)
 
                     agg_func_name = agg_func.name.split("_")[1]
                     distinct = ""
                     if agg_func.distinct:
                         distinct = agg_func.distinct + " "
                     if agg_func_name == "GroupConcat":
-                        replace(
+                        self._replace(
                             identifier,
                             "GROUP_CONCAT"
                             + "("
@@ -1141,30 +1146,32 @@ def sparql_query_text(node):
                             + ")",
                         )
                     else:
-                        replace(
+                        self._replace(
                             identifier,
                             agg_func_name.upper()
                             + "("
                             + distinct
-                            + convert_node_arg(agg_func.vars)
+                            + self.convert_node_arg(agg_func.vars)
                             + ")",
                         )
                     # For non-aggregated variables the aggregation function "sample" is automatically assigned.
                     # However, we do not want to have "sample" wrapped around non-aggregated variables. That is
                     # why we replace it. If "sample" is used on purpose it will not be replaced as the alias
                     # must be different from the variable in this case.
-                    replace(
-                        "(SAMPLE({0}) as {0})".format(convert_node_arg(agg_func.vars)),
-                        convert_node_arg(agg_func.vars),
+                    self._replace(
+                        "(SAMPLE({0}) as {0})".format(
+                            self.convert_node_arg(agg_func.vars)
+                        ),
+                        self.convert_node_arg(agg_func.vars),
                     )
             elif node.name == "GroupGraphPatternSub":
-                replace(
+                self._replace(
                     "GroupGraphPatternSub",
-                    " ".join([convert_node_arg(pattern) for pattern in node.part]),
+                    " ".join([self.convert_node_arg(pattern) for pattern in node.part]),
                 )
             elif node.name == "TriplesBlock":
                 print("triplesblock")
-                replace(
+                self._replace(
                     "{TriplesBlock}",
                     "".join(
                         triple[0].n3()
@@ -1196,8 +1203,8 @@ def sparql_query_text(node):
                         raise ExpressionNotCoveredException(
                             "This expression might not be covered yet."
                         )
-                replace("{OrderBy}", "{" + node.p.name + "}")
-                replace("{OrderConditions}", " ".join(order_conditions) + " ")
+                self._replace("{OrderBy}", "{" + node.p.name + "}")
+                self._replace("{OrderConditions}", " ".join(order_conditions) + " ")
             elif node.name == "Project":
                 project_variables = []
                 for var in node.PV:
@@ -1210,7 +1217,7 @@ def sparql_query_text(node):
                 order_by_pattern = ""
                 if node.p.name == "OrderBy":
                     order_by_pattern = "ORDER BY {OrderConditions}"
-                replace(
+                self._replace(
                     "{Project}",
                     " ".join(project_variables)
                     + "{{"
@@ -1221,17 +1228,17 @@ def sparql_query_text(node):
                     + "{Having}",
                 )
             elif node.name == "Distinct":
-                replace("{Distinct}", "DISTINCT {" + node.p.name + "}")
+                self._replace("{Distinct}", "DISTINCT {" + node.p.name + "}")
             elif node.name == "Reduced":
-                replace("{Reduced}", "REDUCED {" + node.p.name + "}")
+                self._replace("{Reduced}", "REDUCED {" + node.p.name + "}")
             elif node.name == "Slice":
                 slice = "OFFSET " + str(node.start) + " LIMIT " + str(node.length)
-                replace("{Slice}", "{" + node.p.name + "}" + slice)
+                self._replace("{Slice}", "{" + node.p.name + "}" + slice)
             elif node.name == "ToMultiSet":
                 if node.p.name == "values":
-                    replace("{ToMultiSet}", "{{" + node.p.name + "}}")
+                    self._replace("{ToMultiSet}", "{{" + node.p.name + "}}")
                 else:
-                    replace(
+                    self._replace(
                         "{ToMultiSet}", "{-*-SELECT-*- " + "{" + node.p.name + "}" + "}"
                     )
 
@@ -1240,71 +1247,73 @@ def sparql_query_text(node):
             # 17 Expressions and Testing Values
             # # 17.3 Operator Mapping
             elif node.name == "RelationalExpression":
-                expr = convert_node_arg(node.expr)
+                expr = self.convert_node_arg(node.expr)
                 op = node.op
                 if isinstance(list, type(node.other)):
                     other = (
                         "("
-                        + ", ".join(convert_node_arg(expr) for expr in node.other)
+                        + ", ".join(self.convert_node_arg(expr) for expr in node.other)
                         + ")"
                     )
                 else:
-                    other = convert_node_arg(node.other)
+                    other = self.convert_node_arg(node.other)
                 condition = "{left} {operator} {right}".format(
                     left=expr, operator=op, right=other
                 )
-                replace("{RelationalExpression}", condition)
+                self._replace("{RelationalExpression}", condition)
             elif node.name == "ConditionalAndExpression":
                 inner_nodes = " && ".join(
-                    [convert_node_arg(expr) for expr in node.other]
+                    [self.convert_node_arg(expr) for expr in node.other]
                 )
-                replace(
+                self._replace(
                     "{ConditionalAndExpression}",
-                    convert_node_arg(node.expr) + " && " + inner_nodes,
+                    self.convert_node_arg(node.expr) + " && " + inner_nodes,
                 )
             elif node.name == "ConditionalOrExpression":
                 inner_nodes = " || ".join(
-                    [convert_node_arg(expr) for expr in node.other]
+                    [self.convert_node_arg(expr) for expr in node.other]
                 )
-                replace(
+                self._replace(
                     "{ConditionalOrExpression}",
-                    "(" + convert_node_arg(node.expr) + " || " + inner_nodes + ")",
+                    "(" + self.convert_node_arg(node.expr) + " || " + inner_nodes + ")",
                 )
             elif node.name == "MultiplicativeExpression":
-                left_side = convert_node_arg(node.expr)
+                left_side = self.convert_node_arg(node.expr)
                 multiplication = left_side
                 for i, operator in enumerate(node.op):  # noqa: F402
                     multiplication += (
-                        operator + " " + convert_node_arg(node.other[i]) + " "
+                        operator + " " + self.convert_node_arg(node.other[i]) + " "
                     )
-                replace("{MultiplicativeExpression}", multiplication)
+                self._replace("{MultiplicativeExpression}", multiplication)
             elif node.name == "AdditiveExpression":
-                left_side = convert_node_arg(node.expr)
+                left_side = self.convert_node_arg(node.expr)
                 addition = left_side
                 for i, operator in enumerate(node.op):
-                    addition += operator + " " + convert_node_arg(node.other[i]) + " "
-                replace("{AdditiveExpression}", addition)
+                    addition += (
+                        operator + " " + self.convert_node_arg(node.other[i]) + " "
+                    )
+                self._replace("{AdditiveExpression}", addition)
             elif node.name == "UnaryNot":
-                replace("{UnaryNot}", "!" + convert_node_arg(node.expr))
+                self._replace("{UnaryNot}", "!" + self.convert_node_arg(node.expr))
 
             # # 17.4 Function Definitions
             # # # 17.4.1 Functional Forms
             elif node.name.endswith("BOUND"):
-                bound_var = convert_node_arg(node.arg)
-                replace("{Builtin_BOUND}", "bound(" + bound_var + ")")
+                bound_var = self.convert_node_arg(node.arg)
+                self._replace("{Builtin_BOUND}", "bound(" + bound_var + ")")
             elif node.name.endswith("IF"):
-                arg2 = convert_node_arg(node.arg2)
-                arg3 = convert_node_arg(node.arg3)
+                arg2 = self.convert_node_arg(node.arg2)
+                arg3 = self.convert_node_arg(node.arg3)
 
                 if_expression = (
                     "IF(" + "{" + node.arg1.name + "}, " + arg2 + ", " + arg3 + ")"
                 )
-                replace("{Builtin_IF}", if_expression)
+                self._replace("{Builtin_IF}", if_expression)
             elif node.name.endswith("COALESCE"):
-                replace(
+                self._replace(
                     "{Builtin_COALESCE}",
                     "COALESCE("
-                    + ", ".join(convert_node_arg(arg) for arg in node.arg)
+                    + ", ".join(self.convert_node_arg(arg) for arg in node.arg)
                     + ")",
                 )
             elif node.name.endswith("Builtin_EXISTS"):
@@ -1312,8 +1321,10 @@ def sparql_query_text(node):
                 # According to https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rExistsFunc
                 # ExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra
                 # we get a GroupGraphPatternSub
-                replace("{Builtin_EXISTS}", "EXISTS " + "{{" + node.graph.name + "}}")
-                traverse(node.graph, visitPre=sparql_query_text)
+                self._replace(
+                    "{Builtin_EXISTS}", "EXISTS " + "{{" + node.graph.name + "}}"
+                )
+                traverse(node.graph, visitPre=self.sparql_query_text)
                 return node.graph
             elif node.name.endswith("Builtin_NOTEXISTS"):
                 # The node's name which we get with node.graph.name returns "Join" instead of GroupGraphPatternSub
@@ -1321,21 +1332,21 @@ def sparql_query_text(node):
                 # NotExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra
                 # we get a GroupGraphPatternSub
                 print(node.graph.name)
-                replace(
+                self._replace(
                     "{Builtin_NOTEXISTS}", "NOT EXISTS " + "{{" + node.graph.name + "}}"
                 )
-                traverse(node.graph, visitPre=sparql_query_text)
+                traverse(node.graph, visitPre=self.sparql_query_text)
                 return node.graph
             # # # # 17.4.1.5 logical-or: Covered in "RelationalExpression"
             # # # # 17.4.1.6 logical-and: Covered in "RelationalExpression"
             # # # # 17.4.1.7 RDFterm-equal: Covered in "RelationalExpression"
             elif node.name.endswith("sameTerm"):
-                replace(
+                self._replace(
                     "{Builtin_sameTerm}",
                     "SAMETERM("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             # # # # IN: Covered in "RelationalExpression"
@@ -1343,205 +1354,253 @@ def sparql_query_text(node):
 
             # # # 17.4.2 Functions on RDF Terms
             elif node.name.endswith("Builtin_isIRI"):
-                replace("{Builtin_isIRI}", "isIRI(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_isIRI}", "isIRI(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name.endswith("Builtin_isBLANK"):
-                replace(
-                    "{Builtin_isBLANK}", "isBLANK(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_isBLANK}",
+                    "isBLANK(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name.endswith("Builtin_isLITERAL"):
-                replace(
+                self._replace(
                     "{Builtin_isLITERAL}",
-                    "isLITERAL(" + convert_node_arg(node.arg) + ")",
+                    "isLITERAL(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name.endswith("Builtin_isNUMERIC"):
-                replace(
+                self._replace(
                     "{Builtin_isNUMERIC}",
-                    "isNUMERIC(" + convert_node_arg(node.arg) + ")",
+                    "isNUMERIC(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name.endswith("Builtin_STR"):
-                replace("{Builtin_STR}", "STR(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_STR}", "STR(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name.endswith("Builtin_LANG"):
-                replace("{Builtin_LANG}", "LANG(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_LANG}", "LANG(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name.endswith("Builtin_DATATYPE"):
-                replace(
-                    "{Builtin_DATATYPE}", "DATATYPE(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_DATATYPE}",
+                    "DATATYPE(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name.endswith("Builtin_IRI"):
-                replace("{Builtin_IRI}", "IRI(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_IRI}", "IRI(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name.endswith("Builtin_BNODE"):
-                replace("{Builtin_BNODE}", "BNODE(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_BNODE}", "BNODE(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name.endswith("STRDT"):
-                replace(
+                self._replace(
                     "{Builtin_STRDT}",
                     "STRDT("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("Builtin_STRLANG"):
-                replace(
+                self._replace(
                     "{Builtin_STRLANG}",
                     "STRLANG("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("Builtin_UUID"):
-                replace("{Builtin_UUID}", "UUID()")
+                self._replace("{Builtin_UUID}", "UUID()")
             elif node.name.endswith("Builtin_STRUUID"):
-                replace("{Builtin_STRUUID}", "STRUUID()")
+                self._replace("{Builtin_STRUUID}", "STRUUID()")
 
             # # # 17.4.3 Functions on Strings
             elif node.name.endswith("Builtin_STRLEN"):
-                replace(
-                    "{Builtin_STRLEN}", "STRLEN(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_STRLEN}",
+                    "STRLEN(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name.endswith("Builtin_SUBSTR"):
-                args = [convert_node_arg(node.arg), node.start]
+                args = [self.convert_node_arg(node.arg), node.start]
                 if node.length:
                     args.append(node.length)
                 expr = "SUBSTR(" + ", ".join(args) + ")"
-                replace("{Builtin_SUBSTR}", expr)
+                self._replace("{Builtin_SUBSTR}", expr)
             elif node.name.endswith("Builtin_UCASE"):
-                replace("{Builtin_UCASE}", "UCASE(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_UCASE}", "UCASE(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name.endswith("Builtin_LCASE"):
-                replace("{Builtin_LCASE}", "LCASE(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_LCASE}", "LCASE(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name.endswith("Builtin_STRSTARTS"):
-                replace(
+                self._replace(
                     "{Builtin_STRSTARTS}",
                     "STRSTARTS("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("Builtin_STRENDS"):
-                replace(
+                self._replace(
                     "{Builtin_STRENDS}",
                     "STRENDS("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("Builtin_CONTAINS"):
-                replace(
+                self._replace(
                     "{Builtin_CONTAINS}",
                     "CONTAINS("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("Builtin_STRBEFORE"):
-                replace(
+                self._replace(
                     "{Builtin_STRBEFORE}",
                     "STRBEFORE("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("Builtin_STRAFTER"):
-                replace(
+                self._replace(
                     "{Builtin_STRAFTER}",
                     "STRAFTER("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("Builtin_ENCODE_FOR_URI"):
-                replace(
+                self._replace(
                     "{Builtin_ENCODE_FOR_URI}",
-                    "ENCODE_FOR_URI(" + convert_node_arg(node.arg) + ")",
+                    "ENCODE_FOR_URI(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name.endswith("Builtin_CONCAT"):
                 expr = "CONCAT({vars})".format(
-                    vars=", ".join(convert_node_arg(elem) for elem in node.arg)
+                    vars=", ".join(self.convert_node_arg(elem) for elem in node.arg)
                 )
-                replace("{Builtin_CONCAT}", expr)
+                self._replace("{Builtin_CONCAT}", expr)
             elif node.name.endswith("Builtin_LANGMATCHES"):
-                replace(
+                self._replace(
                     "{Builtin_LANGMATCHES}",
                     "LANGMATCHES("
-                    + convert_node_arg(node.arg1)
+                    + self.convert_node_arg(node.arg1)
                     + ", "
-                    + convert_node_arg(node.arg2)
+                    + self.convert_node_arg(node.arg2)
                     + ")",
                 )
             elif node.name.endswith("REGEX"):
-                args = [convert_node_arg(node.text), convert_node_arg(node.pattern)]
+                args = [
+                    self.convert_node_arg(node.text),
+                    self.convert_node_arg(node.pattern),
+                ]
                 expr = "REGEX(" + ", ".join(args) + ")"
-                replace("{Builtin_REGEX}", expr)
+                self._replace("{Builtin_REGEX}", expr)
             elif node.name.endswith("REPLACE"):
-                replace(
+                self._replace(
                     "{Builtin_REPLACE}",
                     "REPLACE("
-                    + convert_node_arg(node.arg)
+                    + self.convert_node_arg(node.arg)
                     + ", "
-                    + convert_node_arg(node.pattern)
+                    + self.convert_node_arg(node.pattern)
                     + ", "
-                    + convert_node_arg(node.replacement)
+                    + self.convert_node_arg(node.replacement)
                     + ")",
                 )
 
             # # # 17.4.4 Functions on Numerics
             elif node.name == "Builtin_ABS":
-                replace("{Builtin_ABS}", "ABS(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_ABS}", "ABS(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_ROUND":
-                replace("{Builtin_ROUND}", "ROUND(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_ROUND}", "ROUND(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_CEIL":
-                replace("{Builtin_CEIL}", "CEIL(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_CEIL}", "CEIL(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_FLOOR":
-                replace("{Builtin_FLOOR}", "FLOOR(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_FLOOR}", "FLOOR(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_RAND":
-                replace("{Builtin_RAND}", "RAND()")
+                self._replace("{Builtin_RAND}", "RAND()")
 
             # # # 17.4.5 Functions on Dates and Times
             elif node.name == "Builtin_NOW":
-                replace("{Builtin_NOW}", "NOW()")
+                self._replace("{Builtin_NOW}", "NOW()")
             elif node.name == "Builtin_YEAR":
-                replace("{Builtin_YEAR}", "YEAR(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_YEAR}", "YEAR(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_MONTH":
-                replace("{Builtin_MONTH}", "MONTH(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_MONTH}", "MONTH(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_DAY":
-                replace("{Builtin_DAY}", "DAY(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_DAY}", "DAY(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_HOURS":
-                replace("{Builtin_HOURS}", "HOURS(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_HOURS}", "HOURS(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_MINUTES":
-                replace(
-                    "{Builtin_MINUTES}", "MINUTES(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_MINUTES}",
+                    "MINUTES(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name == "Builtin_SECONDS":
-                replace(
-                    "{Builtin_SECONDS}", "SECONDS(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_SECONDS}",
+                    "SECONDS(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name == "Builtin_TIMEZONE":
-                replace(
-                    "{Builtin_TIMEZONE}", "TIMEZONE(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_TIMEZONE}",
+                    "TIMEZONE(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name == "Builtin_TZ":
-                replace("{Builtin_TZ}", "TZ(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_TZ}", "TZ(" + self.convert_node_arg(node.arg) + ")"
+                )
 
             # # # 17.4.6 Hash functions
             elif node.name == "Builtin_MD5":
-                replace("{Builtin_MD5}", "MD5(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_MD5}", "MD5(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_SHA1":
-                replace("{Builtin_SHA1}", "SHA1(" + convert_node_arg(node.arg) + ")")
+                self._replace(
+                    "{Builtin_SHA1}", "SHA1(" + self.convert_node_arg(node.arg) + ")"
+                )
             elif node.name == "Builtin_SHA256":
-                replace(
-                    "{Builtin_SHA256}", "SHA256(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_SHA256}",
+                    "SHA256(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name == "Builtin_SHA384":
-                replace(
-                    "{Builtin_SHA384}", "SHA384(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_SHA384}",
+                    "SHA384(" + self.convert_node_arg(node.arg) + ")",
                 )
             elif node.name == "Builtin_SHA512":
-                replace(
-                    "{Builtin_SHA512}", "SHA512(" + convert_node_arg(node.arg) + ")"
+                self._replace(
+                    "{Builtin_SHA512}",
+                    "SHA512(" + self.convert_node_arg(node.arg) + ")",
                 )
 
             # Other
@@ -1574,25 +1633,36 @@ def sparql_query_text(node):
                             )
                     rows += "(" + " ".join(row) + ")"
 
-                replace("values", values + "{" + rows + "}")
+                self._replace("values", values + "{" + rows + "}")
             elif node.name == "ServiceGraphPattern":
-                replace(
+                self._replace(
                     "{ServiceGraphPattern}",
                     "SERVICE "
-                    + convert_node_arg(node.term)
+                    + self.convert_node_arg(node.term)
                     + "{"
                     + node.graph.name
                     + "}",
                 )
-                traverse(node.graph, visitPre=sparql_query_text)
+                traverse(node.graph, visitPre=self.sparql_query_text)
                 return node.graph
             # else:
             #     raise ExpressionNotCoveredException("The expression {0} might not be covered yet.".format(node.name))
 
-    traverse(query_algebra.algebra, visitPre=sparql_query_text)
-    query_from_algebra = open("query.txt", "r").read()
-    os.remove("query.txt")
+    def translateAlgebra(self) -> str:
+        traverse(self.query_algebra.algebra, visitPre=self.sparql_query_text)
+        return self._alg_translation
+
+
+def translateAlgebra(query_algebra: Query) -> str:
+    """
 
+    :param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree).
+    :return: The query form generated from the SPARQL 1.1 algebra tree for select queries.
+
+    """
+    query_from_algebra = AlgebraTranslator(
+        query_algebra=query_algebra
+    ).translateAlgebra()
     return query_from_algebra
 
 

From 9e06fb88638d14d4c04dd04dec86153c47a42363 Mon Sep 17 00:00:00 2001
From: Iwan Aucamp <aucampia@gmail.com>
Date: Sun, 26 Mar 2023 08:55:57 +0000
Subject: [PATCH 3/3] Rename `AlgebraTranslator` to `_AlgebraTranslator`

This is mainly to avoid increasing our public interface. People should
be able to use `translateAlgebra` for most of their needs.

Also make slight change to the docstrings.
---
 rdflib/plugins/sparql/algebra.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/rdflib/plugins/sparql/algebra.py b/rdflib/plugins/sparql/algebra.py
index a4bfd2270..52aa92a7f 100644
--- a/rdflib/plugins/sparql/algebra.py
+++ b/rdflib/plugins/sparql/algebra.py
@@ -955,17 +955,22 @@ class ExpressionNotCoveredException(Exception):  # noqa: N818
     pass
 
 
-class AlgebraTranslator:
-    """Translator of a Query's algebra to its equivalent SPARQL (string).
+class _AlgebraTranslator:
+    """
+    Translator of a Query's algebra to its equivalent SPARQL (string).
+
+    Coded as a class to support storage of state during the translation process,
+    without use of a file.
 
-    Coded as a class to support storage of state during the translation process, without
-    use of a file.
+    Anticipated Usage:
 
-    Anticipated Usage: translated_query = AlgebraTranslator(query).translateAlgebra()
+    .. code-block:: python
 
-    An external convenience function which wraps the above call, algebra.translateAlgebra(),
-    is supplied, so this class does not need to be referenced by client code at all in
-    normal use.
+        translated_query = _AlgebraTranslator(query).translateAlgebra()
+
+    An external convenience function which wraps the above call,
+    `translateAlgebra`, is supplied, so this class does not need to be
+    referenced by client code at all in normal use.
     """
 
     def __init__(self, query_algebra: Query):
@@ -1655,12 +1660,13 @@ def translateAlgebra(self) -> str:
 
 def translateAlgebra(query_algebra: Query) -> str:
     """
+    Translates a SPARQL 1.1 algebra tree into the corresponding query string.
 
-    :param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree).
-    :return: The query form generated from the SPARQL 1.1 algebra tree for select queries.
-
+    :param query_algebra: An algebra returned by `translateQuery`.
+    :return: The query form generated from the SPARQL 1.1 algebra tree for
+        SELECT queries.
     """
-    query_from_algebra = AlgebraTranslator(
+    query_from_algebra = _AlgebraTranslator(
         query_algebra=query_algebra
     ).translateAlgebra()
     return query_from_algebra