summaryrefslogtreecommitdiff
path: root/rdflib/util.py
diff options
context:
space:
mode:
authorIwan Aucamp <aucampia@gmail.com>2021-06-26 11:36:29 +0200
committerIwan Aucamp <aucampia@gmail.com>2021-06-26 11:36:29 +0200
commit50c3112a6f6c91efac8bdae265c4b9823d9a8a75 (patch)
tree64a10e7188cc3c9ed42b4d2cae39fd72fb68bcce /rdflib/util.py
parenta32f48b5e18b374d1be457e87d6441c19583575c (diff)
downloadrdflib-50c3112a6f6c91efac8bdae265c4b9823d9a8a75.tar.gz
Prevent `from_n3` from unescaping `\xhh`
This is a fairly pragmatic fix to a problem which should be solved by changing `from_n3` to do the same as the actual n3/turtle parser. There are still many issues with this function, some of which I added tests for.
Diffstat (limited to 'rdflib/util.py')
-rw-r--r--rdflib/util.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/rdflib/util.py b/rdflib/util.py
index 592e896e..4f4b8296 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -137,7 +137,7 @@ def to_term(s, default=None):
raise Exception(msg)
-def from_n3(s, default=None, backend=None, nsm=None):
+def from_n3(s: str, default=None, backend=None, nsm=None):
r'''
Creates the Identifier corresponding to the given n3 string.
@@ -193,6 +193,9 @@ def from_n3(s, default=None, backend=None, nsm=None):
language = rest[1:] # strip leading at sign
value = value.replace(r"\"", '"')
+ # unicode-escape interprets \xhh as an escape sequence,
+ # but n3 does not define it as such.
+ value = value.replace(r"\x", r"\\x")
# Hack: this should correctly handle strings with either native unicode
# characters, or \u1234 unicode escapes.
value = value.encode("raw-unicode-escape").decode("unicode-escape")