Skip to content

Commit a44bd99

Browse files
jclermanaucampia
andauthored
fix: add more type-hinting for SPARQL plugin (#2265)
Here, adding type-hints to some of the SPARQL parser plugin code. Includes a couple of small consequent changes: 1. Minor refactor of `prettify_parsetree()`, separating the public-facing callable from the internal code that does not need to be public-facing. That allows the public-facing callable to have more informative and restrictive type-hints for its arguments. 2. Added some test-coverage for `expandUnicodeEscapes()` - initially for my own understanding, but seems useful to leave it in place since I didn't see test-coverage for that function. There should be no backwards-incompatible changes in this PR - at least, not intentionally. --------- Co-authored-by: Iwan Aucamp <[email protected]>
1 parent 8c48549 commit a44bd99

File tree

3 files changed

+60
-24
lines changed

3 files changed

+60
-24
lines changed

rdflib/plugins/sparql/parser.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
import re
88
import sys
9+
from typing import Any, BinaryIO
10+
from typing import Optional as OptionalType
11+
from typing import TextIO, Tuple, Union
912

1013
from pyparsing import CaselessKeyword as Keyword # watch out :)
1114
from pyparsing import (
@@ -37,15 +40,15 @@
3740
# ---------------- ACTIONS
3841

3942

40-
def neg(literal):
43+
def neg(literal) -> rdflib.Literal:
4144
return rdflib.Literal(-literal, datatype=literal.datatype)
4245

4346

44-
def setLanguage(terms):
47+
def setLanguage(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal:
4548
return rdflib.Literal(terms[0], lang=terms[1])
4649

4750

48-
def setDataType(terms):
51+
def setDataType(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal:
4952
return rdflib.Literal(terms[0], datatype=terms[1])
5053

5154

@@ -1508,25 +1511,27 @@ def expandCollection(terms):
15081511
UpdateUnit.ignore("#" + restOfLine)
15091512

15101513

1511-
expandUnicodeEscapes_re = re.compile(r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I)
1514+
expandUnicodeEscapes_re: re.Pattern = re.compile(
1515+
r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I
1516+
)
15121517

15131518

1514-
def expandUnicodeEscapes(q):
1519+
def expandUnicodeEscapes(q: str) -> str:
15151520
r"""
15161521
The syntax of the SPARQL Query Language is expressed over code points in Unicode [UNICODE]. The encoding is always UTF-8 [RFC3629].
15171522
Unicode code points may also be expressed using an \ uXXXX (U+0 to U+FFFF) or \ UXXXXXXXX syntax (for U+10000 onwards) where X is a hexadecimal digit [0-9A-F]
15181523
"""
15191524

1520-
def expand(m):
1525+
def expand(m: re.Match) -> str:
15211526
try:
15221527
return chr(int(m.group(1), 16))
1523-
except: # noqa: E722
1524-
raise Exception("Invalid unicode code point: " + m)
1528+
except (ValueError, OverflowError) as e:
1529+
raise ValueError("Invalid unicode code point: " + m.group(1)) from e
15251530

15261531
return expandUnicodeEscapes_re.sub(expand, q)
15271532

15281533

1529-
def parseQuery(q):
1534+
def parseQuery(q: Union[str, bytes, TextIO, BinaryIO]) -> ParseResults:
15301535
if hasattr(q, "read"):
15311536
q = q.read()
15321537
if isinstance(q, bytes):
@@ -1536,7 +1541,7 @@ def parseQuery(q):
15361541
return Query.parseString(q, parseAll=True)
15371542

15381543

1539-
def parseUpdate(q):
1544+
def parseUpdate(q: Union[str, bytes, TextIO, BinaryIO]):
15401545
if hasattr(q, "read"):
15411546
q = q.read()
15421547

rdflib/plugins/sparql/parserutils.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from collections import OrderedDict
22
from types import MethodType
3-
from typing import TYPE_CHECKING, Any
3+
from typing import TYPE_CHECKING, Any, List, Tuple, Union
44

55
from pyparsing import ParseResults, TokenConverter, originalTextFor
66

77
from rdflib import BNode, Variable
8+
from rdflib.term import Identifier
89

910
if TYPE_CHECKING:
1011
from rdflib.plugins.sparql.sparql import FrozenBindings
@@ -252,26 +253,34 @@ def setEvalFn(self, evalfn):
252253
return self
253254

254255

255-
def prettify_parsetree(t, indent="", depth=0):
256-
out = []
257-
if isinstance(t, ParseResults):
258-
for e in t.asList():
259-
out.append(prettify_parsetree(e, indent, depth + 1))
260-
for k, v in sorted(t.items()):
261-
out.append("%s%s- %s:\n" % (indent, " " * depth, k))
262-
out.append(prettify_parsetree(v, indent, depth + 1))
263-
elif isinstance(t, CompValue):
256+
def prettify_parsetree(t: ParseResults, indent: str = "", depth: int = 0) -> str:
257+
out: List[str] = []
258+
for e in t.asList():
259+
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
260+
for k, v in sorted(t.items()):
261+
out.append("%s%s- %s:\n" % (indent, " " * depth, k))
262+
out.append(_prettify_sub_parsetree(v, indent, depth + 1))
263+
return "".join(out)
264+
265+
266+
def _prettify_sub_parsetree(
267+
t: Union[Identifier, CompValue, set, list, dict, Tuple, bool, None],
268+
indent: str = "",
269+
depth: int = 0,
270+
) -> str:
271+
out: List[str] = []
272+
if isinstance(t, CompValue):
264273
out.append("%s%s> %s:\n" % (indent, " " * depth, t.name))
265274
for k, v in t.items():
266275
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
267-
out.append(prettify_parsetree(v, indent, depth + 2))
276+
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
268277
elif isinstance(t, dict):
269278
for k, v in t.items():
270279
out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k))
271-
out.append(prettify_parsetree(v, indent, depth + 2))
280+
out.append(_prettify_sub_parsetree(v, indent, depth + 2))
272281
elif isinstance(t, list):
273282
for e in t:
274-
out.append(prettify_parsetree(e, indent, depth + 1))
283+
out.append(_prettify_sub_parsetree(e, indent, depth + 1))
275284
else:
276285
out.append("%s%s- %r\n" % (indent, " " * depth, t))
277286
return "".join(out)

test/test_sparql/test_sparql.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from rdflib.plugins.sparql.algebra import translateQuery
1717
from rdflib.plugins.sparql.evaluate import evalPart
1818
from rdflib.plugins.sparql.evalutils import _eval
19-
from rdflib.plugins.sparql.parser import parseQuery
19+
from rdflib.plugins.sparql.parser import expandUnicodeEscapes, parseQuery
2020
from rdflib.plugins.sparql.parserutils import prettify_parsetree
2121
from rdflib.plugins.sparql.sparql import SPARQLError
2222
from rdflib.query import Result, ResultRow
@@ -957,3 +957,25 @@ def test_sparql_describe(
957957
subjects = {s for s in r.graph.subjects() if not isinstance(s, BNode)}
958958
assert subjects == expected_subjects
959959
assert len(r.graph) == expected_size
960+
961+
962+
@pytest.mark.parametrize(
963+
"arg, expected_result, expected_valid",
964+
[
965+
("abc", "abc", True),
966+
("1234", "1234", True),
967+
(r"1234\u0050", "1234P", True),
968+
(r"1234\u00e3", "1234\u00e3", True),
969+
(r"1234\u00e3\u00e5", "1234ãå", True),
970+
(r"1234\u900000e5", "", False),
971+
(r"1234\u010000e5", "", False),
972+
(r"1234\u001000e5", "1234\U001000e5", True),
973+
],
974+
)
975+
def test_expand_unicode_escapes(arg: str, expected_result: str, expected_valid: bool):
976+
if expected_valid:
977+
actual_result = expandUnicodeEscapes(arg)
978+
assert actual_result == expected_result
979+
else:
980+
with pytest.raises(ValueError, match="Invalid unicode code point"):
981+
_ = expandUnicodeEscapes(arg)

0 commit comments

Comments
 (0)