From 0d19ad4be18a56c75b5267cc6cdbcf5bd43924d1 Mon Sep 17 00:00:00 2001 From: Jeff Lerman Date: Sat, 11 Mar 2023 20:48:27 -0800 Subject: [PATCH 1/3] add type-hints; including minor refactor of prettify_parsetree() to improve type-hinting --- rdflib/plugins/sparql/parser.py | 25 ++++++++++++-------- rdflib/plugins/sparql/parserutils.py | 35 +++++++++++++++++----------- test/test_sparql/test_sparql.py | 24 ++++++++++++++++++- 3 files changed, 60 insertions(+), 24 deletions(-) diff --git a/rdflib/plugins/sparql/parser.py b/rdflib/plugins/sparql/parser.py index 2a897f822..8989cbfac 100644 --- a/rdflib/plugins/sparql/parser.py +++ b/rdflib/plugins/sparql/parser.py @@ -6,6 +6,9 @@ import re import sys +from typing import Any +from typing import Optional as OptionalType +from typing import TextIO, Tuple, Union from pyparsing import CaselessKeyword as Keyword # watch out :) from pyparsing import ( @@ -37,15 +40,15 @@ # ---------------- ACTIONS -def neg(literal): +def neg(literal) -> rdflib.Literal: return rdflib.Literal(-literal, datatype=literal.datatype) -def setLanguage(terms): +def setLanguage(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal: return rdflib.Literal(terms[0], lang=terms[1]) -def setDataType(terms): +def setDataType(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal: return rdflib.Literal(terms[0], datatype=terms[1]) @@ -1508,25 +1511,27 @@ def expandCollection(terms): UpdateUnit.ignore("#" + restOfLine) -expandUnicodeEscapes_re = re.compile(r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I) +expandUnicodeEscapes_re: re.Pattern = re.compile( + r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I +) -def expandUnicodeEscapes(q): +def expandUnicodeEscapes(q: str) -> str: r""" The syntax of the SPARQL Query Language is expressed over code points in Unicode [UNICODE]. The encoding is always UTF-8 [RFC3629]. Unicode code points may also be expressed using an \ uXXXX (U+0 to U+FFFF) or \ UXXXXXXXX syntax (for U+10000 onwards) where X is a hexadecimal digit [0-9A-F] """ - def expand(m): + def expand(m: re.Match) -> str: try: return chr(int(m.group(1), 16)) - except: # noqa: E722 - raise Exception("Invalid unicode code point: " + m) + except (ValueError, OverflowError) as e: + raise ValueError("Invalid unicode code point: " + m.group(1)) from e return expandUnicodeEscapes_re.sub(expand, q) -def parseQuery(q): +def parseQuery(q: Union[str, bytes, TextIO]) -> ParseResults: if hasattr(q, "read"): q = q.read() if isinstance(q, bytes): @@ -1536,7 +1541,7 @@ def parseQuery(q): return Query.parseString(q, parseAll=True) -def parseUpdate(q): +def parseUpdate(q: Union[str, bytes, TextIO]): if hasattr(q, "read"): q = q.read() diff --git a/rdflib/plugins/sparql/parserutils.py b/rdflib/plugins/sparql/parserutils.py index 1f2e88eaf..09f19ff8b 100644 --- a/rdflib/plugins/sparql/parserutils.py +++ b/rdflib/plugins/sparql/parserutils.py @@ -1,10 +1,11 @@ from collections import OrderedDict from types import MethodType -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, List, Tuple, Union from pyparsing import ParseResults, TokenConverter, originalTextFor from rdflib import BNode, Variable +from rdflib.term import Identifier if TYPE_CHECKING: from rdflib.plugins.sparql.sparql import FrozenBindings @@ -252,26 +253,34 @@ def setEvalFn(self, evalfn): return self -def prettify_parsetree(t, indent="", depth=0): - out = [] - if isinstance(t, ParseResults): - for e in t.asList(): - out.append(prettify_parsetree(e, indent, depth + 1)) - for k, v in sorted(t.items()): - out.append("%s%s- %s:\n" % (indent, " " * depth, k)) - out.append(prettify_parsetree(v, indent, depth + 1)) - elif isinstance(t, CompValue): +def prettify_parsetree(t: ParseResults, indent: str = "", depth: int = 0) -> str: + out: List[str] = [] + for e in t.asList(): + out.append(_prettify_sub_parsetree(e, indent, depth + 1)) + for k, v in sorted(t.items()): + out.append("%s%s- %s:\n" % (indent, " " * depth, k)) + out.append(_prettify_sub_parsetree(v, indent, depth + 1)) + return "".join(out) + + +def _prettify_sub_parsetree( + t: Union[Identifier, CompValue, set, list, dict, Tuple, bool, None], + indent: str = "", + depth: int = 0, +) -> str: + out: List[str] = [] + if isinstance(t, CompValue): out.append("%s%s> %s:\n" % (indent, " " * depth, t.name)) for k, v in t.items(): out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k)) - out.append(prettify_parsetree(v, indent, depth + 2)) + out.append(_prettify_sub_parsetree(v, indent, depth + 2)) elif isinstance(t, dict): for k, v in t.items(): out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k)) - out.append(prettify_parsetree(v, indent, depth + 2)) + out.append(_prettify_sub_parsetree(v, indent, depth + 2)) elif isinstance(t, list): for e in t: - out.append(prettify_parsetree(e, indent, depth + 1)) + out.append(_prettify_sub_parsetree(e, indent, depth + 1)) else: out.append("%s%s- %r\n" % (indent, " " * depth, t)) return "".join(out) diff --git a/test/test_sparql/test_sparql.py b/test/test_sparql/test_sparql.py index 02406bdf9..28669c604 100644 --- a/test/test_sparql/test_sparql.py +++ b/test/test_sparql/test_sparql.py @@ -16,7 +16,7 @@ from rdflib.plugins.sparql.algebra import translateQuery from rdflib.plugins.sparql.evaluate import evalPart from rdflib.plugins.sparql.evalutils import _eval -from rdflib.plugins.sparql.parser import parseQuery +from rdflib.plugins.sparql.parser import expandUnicodeEscapes, parseQuery from rdflib.plugins.sparql.parserutils import prettify_parsetree from rdflib.plugins.sparql.sparql import SPARQLError from rdflib.query import Result, ResultRow @@ -957,3 +957,25 @@ def test_sparql_describe( subjects = {s for s in r.graph.subjects() if not isinstance(s, BNode)} assert subjects == expected_subjects assert len(r.graph) == expected_size + + +@pytest.mark.parametrize( + "arg, expected_result, expected_valid", + [ + ("abc", "abc", True), + ("1234", "1234", True), + (r"1234\u0050", "1234P", True), + (r"1234\u00e3", "1234\u00e3", True), + (r"1234\u00e3\u00e5", "1234ãå", True), + (r"1234\u900000e5", "", False), + (r"1234\u010000e5", "", False), + (r"1234\u001000e5", "1234\U001000e5", True), + ], +) +def test_expandUnicodeEscapes(arg: str, expected_result: str, expected_valid: bool): + if expected_valid: + actual_result = expandUnicodeEscapes(arg) + assert actual_result == expected_result + else: + with pytest.raises(ValueError, match="Invalid unicode code point"): + _ = expandUnicodeEscapes(arg) From c05f07e034e6b7001a1cd40b27b3d797856626c4 Mon Sep 17 00:00:00 2001 From: Jeff Lerman Date: Sun, 12 Mar 2023 05:00:10 -0700 Subject: [PATCH 2/3] eliminate use of file intermediary in translateAlgebra() - moves the functionality into a class, so that objects can maintain state without use of a file --- rdflib/plugins/sparql/algebra.py | 450 ++++++++++++++++++------------- 1 file changed, 260 insertions(+), 190 deletions(-) diff --git a/rdflib/plugins/sparql/algebra.py b/rdflib/plugins/sparql/algebra.py index 5fd9e59bc..a4bfd2270 100644 --- a/rdflib/plugins/sparql/algebra.py +++ b/rdflib/plugins/sparql/algebra.py @@ -955,31 +955,34 @@ class ExpressionNotCoveredException(Exception): # noqa: N818 pass -def translateAlgebra(query_algebra: Query) -> str: - """ +class AlgebraTranslator: + """Translator of a Query's algebra to its equivalent SPARQL (string). - :param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree). - :return: The query form generated from the SPARQL 1.1 algebra tree for select queries. + Coded as a class to support storage of state during the translation process, without + use of a file. - """ - import os + Anticipated Usage: translated_query = AlgebraTranslator(query).translateAlgebra() - def overwrite(text: str): - file = open("query.txt", "w+") - file.write(text) - file.close() + An external convenience function which wraps the above call, algebra.translateAlgebra(), + is supplied, so this class does not need to be referenced by client code at all in + normal use. + """ - def replace( - old, - new, + def __init__(self, query_algebra: Query): + self.query_algebra = query_algebra + self.aggr_vars: DefaultDict[ + Identifier, List[Identifier] + ] = collections.defaultdict(list) + self._alg_translation: str = "" + + def _replace( + self, + old: str, + new: str, search_from_match: str = None, search_from_match_occurrence: int = None, count: int = 1, ): - # Read in the file - with open("query.txt", "r") as file: - filedata = file.read() - def find_nth(haystack, needle, n): start = haystack.lower().find(needle) while start >= 0 and n > 1: @@ -989,27 +992,21 @@ def find_nth(haystack, needle, n): if search_from_match and search_from_match_occurrence: position = find_nth( - filedata, search_from_match, search_from_match_occurrence + self._alg_translation, search_from_match, search_from_match_occurrence ) - filedata_pre = filedata[:position] - filedata_post = filedata[position:].replace(old, new, count) - filedata = filedata_pre + filedata_post + filedata_pre = self._alg_translation[:position] + filedata_post = self._alg_translation[position:].replace(old, new, count) + self._alg_translation = filedata_pre + filedata_post else: - filedata = filedata.replace(old, new, count) - - # Write the file out again - with open("query.txt", "w") as file: - file.write(filedata) - - aggr_vars: DefaultDict[Identifier, List[Identifier]] = collections.defaultdict(list) + self._alg_translation = self._alg_translation.replace(old, new, count) def convert_node_arg( - node_arg: typing.Union[Identifier, CompValue, Expr, str] + self, node_arg: typing.Union[Identifier, CompValue, Expr, str] ) -> str: if isinstance(node_arg, Identifier): - if node_arg in aggr_vars.keys(): + if node_arg in self.aggr_vars.keys(): # type error: "Identifier" has no attribute "n3" - grp_var = aggr_vars[node_arg].pop(0).n3() # type: ignore[attr-defined] + grp_var = self.aggr_vars[node_arg].pop(0).n3() # type: ignore[attr-defined] return grp_var else: # type error: "Identifier" has no attribute "n3" @@ -1025,7 +1022,7 @@ def convert_node_arg( "The expression {0} might not be covered yet.".format(node_arg) ) - def sparql_query_text(node): + def sparql_query_text(self, node): """ https://www.w3.org/TR/sparql11-query/#sparqlSyntax @@ -1036,7 +1033,7 @@ def sparql_query_text(node): if isinstance(node, CompValue): # 18.2 Query Forms if node.name == "SelectQuery": - overwrite("-*-SELECT-*- " + "{" + node.p.name + "}") + self._alg_translation = "-*-SELECT-*- " + "{" + node.p.name + "}" # 18.2 Graph Patterns elif node.name == "BGP": @@ -1046,18 +1043,20 @@ def sparql_query_text(node): triple[0].n3() + " " + triple[1].n3() + " " + triple[2].n3() + "." for triple in node.triples ) - replace("{BGP}", triples) + self._replace("{BGP}", triples) # The dummy -*-SELECT-*- is placed during a SelectQuery or Multiset pattern in order to be able # to match extended variables in a specific Select-clause (see "Extend" below) - replace("-*-SELECT-*-", "SELECT", count=-1) + self._replace("-*-SELECT-*-", "SELECT", count=-1) # If there is no "Group By" clause the placeholder will simply be deleted. Otherwise there will be # no matching {GroupBy} placeholder because it has already been replaced by "group by variables" - replace("{GroupBy}", "", count=-1) - replace("{Having}", "", count=-1) + self._replace("{GroupBy}", "", count=-1) + self._replace("{Having}", "", count=-1) elif node.name == "Join": - replace("{Join}", "{" + node.p1.name + "}{" + node.p2.name + "}") # + self._replace( + "{Join}", "{" + node.p1.name + "}{" + node.p2.name + "}" + ) # elif node.name == "LeftJoin": - replace( + self._replace( "{LeftJoin}", "{" + node.p1.name + "}OPTIONAL{{" + node.p2.name + "}}", ) @@ -1071,35 +1070,39 @@ def sparql_query_text(node): if node.p: # Filter with p=AggregateJoin = Having if node.p.name == "AggregateJoin": - replace("{Filter}", "{" + node.p.name + "}") - replace("{Having}", "HAVING({" + expr + "})") + self._replace("{Filter}", "{" + node.p.name + "}") + self._replace("{Having}", "HAVING({" + expr + "})") else: - replace( + self._replace( "{Filter}", "FILTER({" + expr + "}) {" + node.p.name + "}" ) else: - replace("{Filter}", "FILTER({" + expr + "})") + self._replace("{Filter}", "FILTER({" + expr + "})") elif node.name == "Union": - replace( + self._replace( "{Union}", "{{" + node.p1.name + "}}UNION{{" + node.p2.name + "}}" ) elif node.name == "Graph": expr = "GRAPH " + node.term.n3() + " {{" + node.p.name + "}}" - replace("{Graph}", expr) + self._replace("{Graph}", expr) elif node.name == "Extend": - query_string = open("query.txt", "r").read().lower() + query_string = self._alg_translation.lower() select_occurrences = query_string.count("-*-select-*-") - replace( + self._replace( node.var.n3(), - "(" + convert_node_arg(node.expr) + " as " + node.var.n3() + ")", + "(" + + self.convert_node_arg(node.expr) + + " as " + + node.var.n3() + + ")", search_from_match="-*-select-*-", search_from_match_occurrence=select_occurrences, ) - replace("{Extend}", "{" + node.p.name + "}") + self._replace("{Extend}", "{" + node.p.name + "}") elif node.name == "Minus": expr = "{" + node.p1.name + "}MINUS{{" + node.p2.name + "}}" - replace("{Minus}", expr) + self._replace("{Minus}", expr) elif node.name == "Group": group_by_vars = [] if node.expr: @@ -1110,12 +1113,14 @@ def sparql_query_text(node): raise ExpressionNotCoveredException( "This expression might not be covered yet." ) - replace("{Group}", "{" + node.p.name + "}") - replace("{GroupBy}", "GROUP BY " + " ".join(group_by_vars) + " ") + self._replace("{Group}", "{" + node.p.name + "}") + self._replace( + "{GroupBy}", "GROUP BY " + " ".join(group_by_vars) + " " + ) else: - replace("{Group}", "{" + node.p.name + "}") + self._replace("{Group}", "{" + node.p.name + "}") elif node.name == "AggregateJoin": - replace("{AggregateJoin}", "{" + node.p.name + "}") + self._replace("{AggregateJoin}", "{" + node.p.name + "}") for agg_func in node.A: if isinstance(agg_func.res, Identifier): identifier = agg_func.res.n3() @@ -1123,14 +1128,14 @@ def sparql_query_text(node): raise ExpressionNotCoveredException( "This expression might not be covered yet." ) - aggr_vars[agg_func.res].append(agg_func.vars) + self.aggr_vars[agg_func.res].append(agg_func.vars) agg_func_name = agg_func.name.split("_")[1] distinct = "" if agg_func.distinct: distinct = agg_func.distinct + " " if agg_func_name == "GroupConcat": - replace( + self._replace( identifier, "GROUP_CONCAT" + "(" @@ -1141,30 +1146,32 @@ def sparql_query_text(node): + ")", ) else: - replace( + self._replace( identifier, agg_func_name.upper() + "(" + distinct - + convert_node_arg(agg_func.vars) + + self.convert_node_arg(agg_func.vars) + ")", ) # For non-aggregated variables the aggregation function "sample" is automatically assigned. # However, we do not want to have "sample" wrapped around non-aggregated variables. That is # why we replace it. If "sample" is used on purpose it will not be replaced as the alias # must be different from the variable in this case. - replace( - "(SAMPLE({0}) as {0})".format(convert_node_arg(agg_func.vars)), - convert_node_arg(agg_func.vars), + self._replace( + "(SAMPLE({0}) as {0})".format( + self.convert_node_arg(agg_func.vars) + ), + self.convert_node_arg(agg_func.vars), ) elif node.name == "GroupGraphPatternSub": - replace( + self._replace( "GroupGraphPatternSub", - " ".join([convert_node_arg(pattern) for pattern in node.part]), + " ".join([self.convert_node_arg(pattern) for pattern in node.part]), ) elif node.name == "TriplesBlock": print("triplesblock") - replace( + self._replace( "{TriplesBlock}", "".join( triple[0].n3() @@ -1196,8 +1203,8 @@ def sparql_query_text(node): raise ExpressionNotCoveredException( "This expression might not be covered yet." ) - replace("{OrderBy}", "{" + node.p.name + "}") - replace("{OrderConditions}", " ".join(order_conditions) + " ") + self._replace("{OrderBy}", "{" + node.p.name + "}") + self._replace("{OrderConditions}", " ".join(order_conditions) + " ") elif node.name == "Project": project_variables = [] for var in node.PV: @@ -1210,7 +1217,7 @@ def sparql_query_text(node): order_by_pattern = "" if node.p.name == "OrderBy": order_by_pattern = "ORDER BY {OrderConditions}" - replace( + self._replace( "{Project}", " ".join(project_variables) + "{{" @@ -1221,17 +1228,17 @@ def sparql_query_text(node): + "{Having}", ) elif node.name == "Distinct": - replace("{Distinct}", "DISTINCT {" + node.p.name + "}") + self._replace("{Distinct}", "DISTINCT {" + node.p.name + "}") elif node.name == "Reduced": - replace("{Reduced}", "REDUCED {" + node.p.name + "}") + self._replace("{Reduced}", "REDUCED {" + node.p.name + "}") elif node.name == "Slice": slice = "OFFSET " + str(node.start) + " LIMIT " + str(node.length) - replace("{Slice}", "{" + node.p.name + "}" + slice) + self._replace("{Slice}", "{" + node.p.name + "}" + slice) elif node.name == "ToMultiSet": if node.p.name == "values": - replace("{ToMultiSet}", "{{" + node.p.name + "}}") + self._replace("{ToMultiSet}", "{{" + node.p.name + "}}") else: - replace( + self._replace( "{ToMultiSet}", "{-*-SELECT-*- " + "{" + node.p.name + "}" + "}" ) @@ -1240,71 +1247,73 @@ def sparql_query_text(node): # 17 Expressions and Testing Values # # 17.3 Operator Mapping elif node.name == "RelationalExpression": - expr = convert_node_arg(node.expr) + expr = self.convert_node_arg(node.expr) op = node.op if isinstance(list, type(node.other)): other = ( "(" - + ", ".join(convert_node_arg(expr) for expr in node.other) + + ", ".join(self.convert_node_arg(expr) for expr in node.other) + ")" ) else: - other = convert_node_arg(node.other) + other = self.convert_node_arg(node.other) condition = "{left} {operator} {right}".format( left=expr, operator=op, right=other ) - replace("{RelationalExpression}", condition) + self._replace("{RelationalExpression}", condition) elif node.name == "ConditionalAndExpression": inner_nodes = " && ".join( - [convert_node_arg(expr) for expr in node.other] + [self.convert_node_arg(expr) for expr in node.other] ) - replace( + self._replace( "{ConditionalAndExpression}", - convert_node_arg(node.expr) + " && " + inner_nodes, + self.convert_node_arg(node.expr) + " && " + inner_nodes, ) elif node.name == "ConditionalOrExpression": inner_nodes = " || ".join( - [convert_node_arg(expr) for expr in node.other] + [self.convert_node_arg(expr) for expr in node.other] ) - replace( + self._replace( "{ConditionalOrExpression}", - "(" + convert_node_arg(node.expr) + " || " + inner_nodes + ")", + "(" + self.convert_node_arg(node.expr) + " || " + inner_nodes + ")", ) elif node.name == "MultiplicativeExpression": - left_side = convert_node_arg(node.expr) + left_side = self.convert_node_arg(node.expr) multiplication = left_side for i, operator in enumerate(node.op): # noqa: F402 multiplication += ( - operator + " " + convert_node_arg(node.other[i]) + " " + operator + " " + self.convert_node_arg(node.other[i]) + " " ) - replace("{MultiplicativeExpression}", multiplication) + self._replace("{MultiplicativeExpression}", multiplication) elif node.name == "AdditiveExpression": - left_side = convert_node_arg(node.expr) + left_side = self.convert_node_arg(node.expr) addition = left_side for i, operator in enumerate(node.op): - addition += operator + " " + convert_node_arg(node.other[i]) + " " - replace("{AdditiveExpression}", addition) + addition += ( + operator + " " + self.convert_node_arg(node.other[i]) + " " + ) + self._replace("{AdditiveExpression}", addition) elif node.name == "UnaryNot": - replace("{UnaryNot}", "!" + convert_node_arg(node.expr)) + self._replace("{UnaryNot}", "!" + self.convert_node_arg(node.expr)) # # 17.4 Function Definitions # # # 17.4.1 Functional Forms elif node.name.endswith("BOUND"): - bound_var = convert_node_arg(node.arg) - replace("{Builtin_BOUND}", "bound(" + bound_var + ")") + bound_var = self.convert_node_arg(node.arg) + self._replace("{Builtin_BOUND}", "bound(" + bound_var + ")") elif node.name.endswith("IF"): - arg2 = convert_node_arg(node.arg2) - arg3 = convert_node_arg(node.arg3) + arg2 = self.convert_node_arg(node.arg2) + arg3 = self.convert_node_arg(node.arg3) if_expression = ( "IF(" + "{" + node.arg1.name + "}, " + arg2 + ", " + arg3 + ")" ) - replace("{Builtin_IF}", if_expression) + self._replace("{Builtin_IF}", if_expression) elif node.name.endswith("COALESCE"): - replace( + self._replace( "{Builtin_COALESCE}", "COALESCE(" - + ", ".join(convert_node_arg(arg) for arg in node.arg) + + ", ".join(self.convert_node_arg(arg) for arg in node.arg) + ")", ) elif node.name.endswith("Builtin_EXISTS"): @@ -1312,8 +1321,10 @@ def sparql_query_text(node): # According to https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rExistsFunc # ExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra # we get a GroupGraphPatternSub - replace("{Builtin_EXISTS}", "EXISTS " + "{{" + node.graph.name + "}}") - traverse(node.graph, visitPre=sparql_query_text) + self._replace( + "{Builtin_EXISTS}", "EXISTS " + "{{" + node.graph.name + "}}" + ) + traverse(node.graph, visitPre=self.sparql_query_text) return node.graph elif node.name.endswith("Builtin_NOTEXISTS"): # The node's name which we get with node.graph.name returns "Join" instead of GroupGraphPatternSub @@ -1321,21 +1332,21 @@ def sparql_query_text(node): # NotExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra # we get a GroupGraphPatternSub print(node.graph.name) - replace( + self._replace( "{Builtin_NOTEXISTS}", "NOT EXISTS " + "{{" + node.graph.name + "}}" ) - traverse(node.graph, visitPre=sparql_query_text) + traverse(node.graph, visitPre=self.sparql_query_text) return node.graph # # # # 17.4.1.5 logical-or: Covered in "RelationalExpression" # # # # 17.4.1.6 logical-and: Covered in "RelationalExpression" # # # # 17.4.1.7 RDFterm-equal: Covered in "RelationalExpression" elif node.name.endswith("sameTerm"): - replace( + self._replace( "{Builtin_sameTerm}", "SAMETERM(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) # # # # IN: Covered in "RelationalExpression" @@ -1343,205 +1354,253 @@ def sparql_query_text(node): # # # 17.4.2 Functions on RDF Terms elif node.name.endswith("Builtin_isIRI"): - replace("{Builtin_isIRI}", "isIRI(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_isIRI}", "isIRI(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_isBLANK"): - replace( - "{Builtin_isBLANK}", "isBLANK(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_isBLANK}", + "isBLANK(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_isLITERAL"): - replace( + self._replace( "{Builtin_isLITERAL}", - "isLITERAL(" + convert_node_arg(node.arg) + ")", + "isLITERAL(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_isNUMERIC"): - replace( + self._replace( "{Builtin_isNUMERIC}", - "isNUMERIC(" + convert_node_arg(node.arg) + ")", + "isNUMERIC(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_STR"): - replace("{Builtin_STR}", "STR(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_STR}", "STR(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_LANG"): - replace("{Builtin_LANG}", "LANG(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_LANG}", "LANG(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_DATATYPE"): - replace( - "{Builtin_DATATYPE}", "DATATYPE(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_DATATYPE}", + "DATATYPE(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_IRI"): - replace("{Builtin_IRI}", "IRI(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_IRI}", "IRI(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_BNODE"): - replace("{Builtin_BNODE}", "BNODE(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_BNODE}", "BNODE(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("STRDT"): - replace( + self._replace( "{Builtin_STRDT}", "STRDT(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRLANG"): - replace( + self._replace( "{Builtin_STRLANG}", "STRLANG(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_UUID"): - replace("{Builtin_UUID}", "UUID()") + self._replace("{Builtin_UUID}", "UUID()") elif node.name.endswith("Builtin_STRUUID"): - replace("{Builtin_STRUUID}", "STRUUID()") + self._replace("{Builtin_STRUUID}", "STRUUID()") # # # 17.4.3 Functions on Strings elif node.name.endswith("Builtin_STRLEN"): - replace( - "{Builtin_STRLEN}", "STRLEN(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_STRLEN}", + "STRLEN(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_SUBSTR"): - args = [convert_node_arg(node.arg), node.start] + args = [self.convert_node_arg(node.arg), node.start] if node.length: args.append(node.length) expr = "SUBSTR(" + ", ".join(args) + ")" - replace("{Builtin_SUBSTR}", expr) + self._replace("{Builtin_SUBSTR}", expr) elif node.name.endswith("Builtin_UCASE"): - replace("{Builtin_UCASE}", "UCASE(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_UCASE}", "UCASE(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_LCASE"): - replace("{Builtin_LCASE}", "LCASE(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_LCASE}", "LCASE(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_STRSTARTS"): - replace( + self._replace( "{Builtin_STRSTARTS}", "STRSTARTS(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRENDS"): - replace( + self._replace( "{Builtin_STRENDS}", "STRENDS(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_CONTAINS"): - replace( + self._replace( "{Builtin_CONTAINS}", "CONTAINS(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRBEFORE"): - replace( + self._replace( "{Builtin_STRBEFORE}", "STRBEFORE(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRAFTER"): - replace( + self._replace( "{Builtin_STRAFTER}", "STRAFTER(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_ENCODE_FOR_URI"): - replace( + self._replace( "{Builtin_ENCODE_FOR_URI}", - "ENCODE_FOR_URI(" + convert_node_arg(node.arg) + ")", + "ENCODE_FOR_URI(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_CONCAT"): expr = "CONCAT({vars})".format( - vars=", ".join(convert_node_arg(elem) for elem in node.arg) + vars=", ".join(self.convert_node_arg(elem) for elem in node.arg) ) - replace("{Builtin_CONCAT}", expr) + self._replace("{Builtin_CONCAT}", expr) elif node.name.endswith("Builtin_LANGMATCHES"): - replace( + self._replace( "{Builtin_LANGMATCHES}", "LANGMATCHES(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("REGEX"): - args = [convert_node_arg(node.text), convert_node_arg(node.pattern)] + args = [ + self.convert_node_arg(node.text), + self.convert_node_arg(node.pattern), + ] expr = "REGEX(" + ", ".join(args) + ")" - replace("{Builtin_REGEX}", expr) + self._replace("{Builtin_REGEX}", expr) elif node.name.endswith("REPLACE"): - replace( + self._replace( "{Builtin_REPLACE}", "REPLACE(" - + convert_node_arg(node.arg) + + self.convert_node_arg(node.arg) + ", " - + convert_node_arg(node.pattern) + + self.convert_node_arg(node.pattern) + ", " - + convert_node_arg(node.replacement) + + self.convert_node_arg(node.replacement) + ")", ) # # # 17.4.4 Functions on Numerics elif node.name == "Builtin_ABS": - replace("{Builtin_ABS}", "ABS(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_ABS}", "ABS(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_ROUND": - replace("{Builtin_ROUND}", "ROUND(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_ROUND}", "ROUND(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_CEIL": - replace("{Builtin_CEIL}", "CEIL(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_CEIL}", "CEIL(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_FLOOR": - replace("{Builtin_FLOOR}", "FLOOR(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_FLOOR}", "FLOOR(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_RAND": - replace("{Builtin_RAND}", "RAND()") + self._replace("{Builtin_RAND}", "RAND()") # # # 17.4.5 Functions on Dates and Times elif node.name == "Builtin_NOW": - replace("{Builtin_NOW}", "NOW()") + self._replace("{Builtin_NOW}", "NOW()") elif node.name == "Builtin_YEAR": - replace("{Builtin_YEAR}", "YEAR(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_YEAR}", "YEAR(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_MONTH": - replace("{Builtin_MONTH}", "MONTH(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_MONTH}", "MONTH(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_DAY": - replace("{Builtin_DAY}", "DAY(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_DAY}", "DAY(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_HOURS": - replace("{Builtin_HOURS}", "HOURS(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_HOURS}", "HOURS(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_MINUTES": - replace( - "{Builtin_MINUTES}", "MINUTES(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_MINUTES}", + "MINUTES(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_SECONDS": - replace( - "{Builtin_SECONDS}", "SECONDS(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SECONDS}", + "SECONDS(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_TIMEZONE": - replace( - "{Builtin_TIMEZONE}", "TIMEZONE(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_TIMEZONE}", + "TIMEZONE(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_TZ": - replace("{Builtin_TZ}", "TZ(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_TZ}", "TZ(" + self.convert_node_arg(node.arg) + ")" + ) # # # 17.4.6 Hash functions elif node.name == "Builtin_MD5": - replace("{Builtin_MD5}", "MD5(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_MD5}", "MD5(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_SHA1": - replace("{Builtin_SHA1}", "SHA1(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_SHA1}", "SHA1(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_SHA256": - replace( - "{Builtin_SHA256}", "SHA256(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SHA256}", + "SHA256(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_SHA384": - replace( - "{Builtin_SHA384}", "SHA384(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SHA384}", + "SHA384(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_SHA512": - replace( - "{Builtin_SHA512}", "SHA512(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SHA512}", + "SHA512(" + self.convert_node_arg(node.arg) + ")", ) # Other @@ -1574,25 +1633,36 @@ def sparql_query_text(node): ) rows += "(" + " ".join(row) + ")" - replace("values", values + "{" + rows + "}") + self._replace("values", values + "{" + rows + "}") elif node.name == "ServiceGraphPattern": - replace( + self._replace( "{ServiceGraphPattern}", "SERVICE " - + convert_node_arg(node.term) + + self.convert_node_arg(node.term) + "{" + node.graph.name + "}", ) - traverse(node.graph, visitPre=sparql_query_text) + traverse(node.graph, visitPre=self.sparql_query_text) return node.graph # else: # raise ExpressionNotCoveredException("The expression {0} might not be covered yet.".format(node.name)) - traverse(query_algebra.algebra, visitPre=sparql_query_text) - query_from_algebra = open("query.txt", "r").read() - os.remove("query.txt") + def translateAlgebra(self) -> str: + traverse(self.query_algebra.algebra, visitPre=self.sparql_query_text) + return self._alg_translation + + +def translateAlgebra(query_algebra: Query) -> str: + """ + :param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree). + :return: The query form generated from the SPARQL 1.1 algebra tree for select queries. + + """ + query_from_algebra = AlgebraTranslator( + query_algebra=query_algebra + ).translateAlgebra() return query_from_algebra From 9e06fb88638d14d4c04dd04dec86153c47a42363 Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Sun, 26 Mar 2023 08:55:57 +0000 Subject: [PATCH 3/3] Rename `AlgebraTranslator` to `_AlgebraTranslator` This is mainly to avoid increasing our public interface. People should be able to use `translateAlgebra` for most of their needs. Also make slight change to the docstrings. --- rdflib/plugins/sparql/algebra.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/rdflib/plugins/sparql/algebra.py b/rdflib/plugins/sparql/algebra.py index a4bfd2270..52aa92a7f 100644 --- a/rdflib/plugins/sparql/algebra.py +++ b/rdflib/plugins/sparql/algebra.py @@ -955,17 +955,22 @@ class ExpressionNotCoveredException(Exception): # noqa: N818 pass -class AlgebraTranslator: - """Translator of a Query's algebra to its equivalent SPARQL (string). +class _AlgebraTranslator: + """ + Translator of a Query's algebra to its equivalent SPARQL (string). + + Coded as a class to support storage of state during the translation process, + without use of a file. - Coded as a class to support storage of state during the translation process, without - use of a file. + Anticipated Usage: - Anticipated Usage: translated_query = AlgebraTranslator(query).translateAlgebra() + .. code-block:: python - An external convenience function which wraps the above call, algebra.translateAlgebra(), - is supplied, so this class does not need to be referenced by client code at all in - normal use. + translated_query = _AlgebraTranslator(query).translateAlgebra() + + An external convenience function which wraps the above call, + `translateAlgebra`, is supplied, so this class does not need to be + referenced by client code at all in normal use. """ def __init__(self, query_algebra: Query): @@ -1655,12 +1660,13 @@ def translateAlgebra(self) -> str: def translateAlgebra(query_algebra: Query) -> str: """ + Translates a SPARQL 1.1 algebra tree into the corresponding query string. - :param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree). - :return: The query form generated from the SPARQL 1.1 algebra tree for select queries. - + :param query_algebra: An algebra returned by `translateQuery`. + :return: The query form generated from the SPARQL 1.1 algebra tree for + SELECT queries. """ - query_from_algebra = AlgebraTranslator( + query_from_algebra = _AlgebraTranslator( query_algebra=query_algebra ).translateAlgebra() return query_from_algebra