diff options
author | Iwan Aucamp <aucampia@gmail.com> | 2021-06-26 11:36:29 +0200 |
---|---|---|
committer | Iwan Aucamp <aucampia@gmail.com> | 2021-06-26 11:36:29 +0200 |
commit | 50c3112a6f6c91efac8bdae265c4b9823d9a8a75 (patch) | |
tree | 64a10e7188cc3c9ed42b4d2cae39fd72fb68bcce /rdflib/util.py | |
parent | a32f48b5e18b374d1be457e87d6441c19583575c (diff) | |
download | rdflib-50c3112a6f6c91efac8bdae265c4b9823d9a8a75.tar.gz |
Prevent `from_n3` from unescaping `\xhh`
This is a fairly pragmatic fix to a problem which should be solved by
changing `from_n3` to do the same as the actual n3/turtle parser.
There are still many issues with this function, some of which I added
tests for.
Diffstat (limited to 'rdflib/util.py')
-rw-r--r-- | rdflib/util.py | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/rdflib/util.py b/rdflib/util.py index 592e896e..4f4b8296 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -137,7 +137,7 @@ def to_term(s, default=None): raise Exception(msg) -def from_n3(s, default=None, backend=None, nsm=None): +def from_n3(s: str, default=None, backend=None, nsm=None): r''' Creates the Identifier corresponding to the given n3 string. @@ -193,6 +193,9 @@ def from_n3(s, default=None, backend=None, nsm=None): language = rest[1:] # strip leading at sign value = value.replace(r"\"", '"') + # unicode-escape interprets \xhh as an escape sequence, + # but n3 does not define it as such. + value = value.replace(r"\x", r"\\x") # Hack: this should correctly handle strings with either native unicode # characters, or \u1234 unicode escapes. value = value.encode("raw-unicode-escape").decode("unicode-escape") |