summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIwan Aucamp <aucampia@gmail.com>2022-01-18 01:12:53 +0100
committerGitHub <noreply@github.com>2022-01-18 01:12:53 +0100
commitb654142ed16e1a30e941772990afd24975de6910 (patch)
tree139a1361fdae65d5fd4e22307c31eaa6e29bacfe
parentc0bd5eaaa983461445b9469d731be4ae0e0cfc54 (diff)
parent9e678c1a233eea7011d9f515190e7b74df747356 (diff)
downloadrdflib-b654142ed16e1a30e941772990afd24975de6910.tar.gz
Merge pull request #1663 from aucampia/iwana-20220112T2104-fix_nt_unquote
Merging without second review as this is extensively tested against crafted expectations and the turtle parser which uses different code for unquoting.
-rw-r--r--.editorconfig6
-rw-r--r--CHANGELOG.md30
-rw-r--r--rdflib/compat.py60
-rw-r--r--rdflib/plugins/parsers/ntriples.py13
-rw-r--r--test/conftest.py5
-rw-r--r--test/test_issue247.py23
-rw-r--r--test/test_roundtrip.py81
-rw-r--r--test/test_testutils.py142
-rw-r--r--test/test_turtle_quoting.py148
-rw-r--r--test/test_variants.py182
-rw-r--r--test/testutils.py170
-rw-r--r--test/variants/README.md21
-rw-r--r--test/variants/rdf_prefix.jsonld17
-rw-r--r--test/variants/rdf_prefix.ttl5
-rw-r--r--test/variants/schema_only_base-asserts.json3
-rw-r--r--test/variants/schema_only_base.hext4
-rw-r--r--test/variants/schema_only_base.n38
-rw-r--r--test/variants/schema_only_base.nt4
-rw-r--r--test/variants/schema_only_base.ttl8
-rw-r--r--test/variants/special_chars-asserts.json3
-rw-r--r--test/variants/special_chars.hext7
-rw-r--r--test/variants/special_chars.nt7
-rw-r--r--test/variants/special_chars.ttl7
-rw-r--r--test/variants/xml_literal-asserts.json3
-rw-r--r--test/variants/xml_literal-variant-control.ttl5
-rw-r--r--test/variants/xml_literal.hext1
-rw-r--r--test/variants/xml_literal.nt2
-rw-r--r--test/variants/xml_literal.rdf12
-rw-r--r--test/variants/xml_literal.ttl7
29 files changed, 899 insertions, 85 deletions
diff --git a/.editorconfig b/.editorconfig
index 945fbabb..cc636124 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -11,16 +11,16 @@ insert_final_newline = true
trim_trailing_whitespace = true
# Leave line endings as-is in Markdown and ReStructuredText files
-[*.{md, rst}]
+[*.{md,rst}]
charset = utf-8
trim_trailing_whitespace = false
# Matches multiple files with brace expansion notation
# Set default charset
-[*.{js, py, pyi, toml, yml, yaml}]
+[*.{js,py,pyi,toml,yml,yaml}]
charset = utf-8
-[*.{yaml, yml}]
+[*.{yaml,yml,json}]
indent_style = space
indent_size = 2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e9c5ea4c..7f905aa9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,33 @@
+RELEASE TODO
+========================
+
+Changes:
+--------
+
+### Fixed the handling of escape sequences in the ntriples and nquads parsers
+
+These parsers will now correctly handle strings like `"\\r"`.
+
+The time it takes for these parsers to parse strings with escape sequences will be increased, and the increase will be correlated with the amount of escape sequences that occur in a string.
+
+For strings with many escape sequences the parsing speed seems to be almost 4 times slower.
+
+Fixes [issue #1655](https://github.com/RDFLib/rdflib/issues/1655).
+
+### Deprecated Functions
+
+Marked the following functions as deprecated:
+
+- `rdflib.compat.decodeStringEscape`: This function is not used anywhere in
+ rdflib anymore and the utility that it does provide is not implemented
+ correctly. It will be removed in RDFLib 7.0.0
+
+PRs merged since last release:
+------------------------------
+
+* TODO
+
+
2021-12-20 RELEASE 6.1.1
========================
Better testing and tidier code.
diff --git a/rdflib/compat.py b/rdflib/compat.py
index 139f2428..28017324 100644
--- a/rdflib/compat.py
+++ b/rdflib/compat.py
@@ -5,9 +5,11 @@ and different versions of support libraries.
import re
import codecs
-import typing as t
+import warnings
+from typing import TYPE_CHECKING, Match
-if t.TYPE_CHECKING:
+
+if TYPE_CHECKING:
import xml.etree.ElementTree as etree
else:
try:
@@ -59,6 +61,14 @@ def _unicodeExpand(s):
def decodeStringEscape(s):
+ warnings.warn(
+ DeprecationWarning(
+ "rdflib.compat.decodeStringEscape() is deprecated, "
+ "it will be removed in rdflib 7.0.0. "
+ "This function is not used anywhere in rdflib anymore "
+ "and the utility that it does provide is not implemented correctly."
+ )
+ )
r"""
s is byte-string - replace \ escapes in string
"""
@@ -76,28 +86,36 @@ def decodeStringEscape(s):
# return _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping
-def decodeUnicodeEscape(s):
- """
- s is a unicode string
- replace ``\\n`` and ``\\u00AC`` unicode escapes
- """
- if "\\" not in s:
- # Most of times, there are no backslashes in strings.
- # In the general case, it could use maketrans and translate.
- return s
+_string_escape_map = {
+ "t": "\t",
+ "b": "\b",
+ "n": "\n",
+ "r": "\r",
+ "f": "\f",
+ '"': '"',
+ "'": "'",
+ "\\": "\\",
+}
- s = s.replace("\\t", "\t")
- s = s.replace("\\n", "\n")
- s = s.replace("\\r", "\r")
- s = s.replace("\\b", "\b")
- s = s.replace("\\f", "\f")
- s = s.replace('\\"', '"')
- s = s.replace("\\'", "'")
- s = s.replace("\\\\", "\\")
- s = _unicodeExpand(s) # hmm - string escape doesn't do unicode escaping
+def _turtle_escape_subber(match: Match[str]) -> str:
+ smatch, umatch = match.groups()
+ if smatch is not None:
+ return _string_escape_map[smatch]
+ else:
+ return chr(int(umatch[1:], 16))
- return s
+
+_turtle_escape_pattern = re.compile(
+ r"""\\(?:([tbnrf"'\\])|(u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8}))""",
+)
+
+
+def decodeUnicodeEscape(escaped: str) -> str:
+ if "\\" not in escaped:
+ # Most of times, there are no backslashes in strings.
+ return escaped
+ return _turtle_escape_pattern.sub(_turtle_escape_subber, escaped)
# Migration to abc in Python 3.8
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 1640f061..267e5c9d 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -13,7 +13,7 @@ from typing import IO, TYPE_CHECKING, Optional, Pattern, TextIO, Union
from rdflib.term import Node, URIRef as URI
from rdflib.term import BNode as bNode
from rdflib.term import Literal
-from rdflib.compat import decodeUnicodeEscape
+from rdflib.compat import decodeUnicodeEscape, _string_escape_map
from rdflib.exceptions import ParserError as ParseError
from rdflib.parser import InputSource, Parser
@@ -49,19 +49,18 @@ class DummySink(object):
print(s, p, o)
-quot = {"t": "\t", "n": "\n", "r": "\r", '"': '"', "\\": "\\"}
r_safe = re.compile(r"([\x20\x21\x23-\x5B\x5D-\x7E]+)")
-r_quot = re.compile(r'\\(t|n|r|"|\\)')
-r_uniquot = re.compile(r"\\u([0-9A-F]{4})|\\U([0-9A-F]{8})")
+r_quot = re.compile(r"""\\([tbnrf"'\\])""")
+r_uniquot = re.compile(r"\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8})")
-def unquote(s):
+def unquote(s: str) -> str:
"""Unquote an N-Triples string."""
if not validate:
if isinstance(s, str): # nquads
s = decodeUnicodeEscape(s)
else:
- s = s.decode("unicode-escape")
+ s = s.decode("unicode-escape") # type: ignore[unreachable]
return s
else:
@@ -76,7 +75,7 @@ def unquote(s):
m = r_quot.match(s)
if m:
s = s[2:]
- result.append(quot[m.group(1)])
+ result.append(_string_escape_map[m.group(1)])
continue
m = r_uniquot.match(s)
diff --git a/test/conftest.py b/test/conftest.py
index d34aeb05..4d6d23e4 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,3 +1,8 @@
from .earl import EarlReporter
+import pytest
pytest_plugins = [EarlReporter.__module__]
+
+# This is here so that asserts from these modules are formatted for human
+# readibility.
+pytest.register_assert_rewrite("test.testutils")
diff --git a/test/test_issue247.py b/test/test_issue247.py
index 7a51dd24..1a8c08e2 100644
--- a/test/test_issue247.py
+++ b/test/test_issue247.py
@@ -15,31 +15,8 @@ failxml = """\
</rdf:RDF>"""
-passxml = """\
-<rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:dc="http://purl.org/dc/elements/1.1/"
->
-
-<rdf:Description rdf:about="http://example.org/">
- <dc:description rdf:parseType="Literal">
- <p xmlns="http://www.w3.org/1999/xhtml"></p>
- </dc:description>
-</rdf:Description>
-
-</rdf:RDF>"""
-
class TestXMLLiteralwithLangAttr(unittest.TestCase):
- def test_successful_parse_of_literal_without_xmllang_attr(self):
- """
- Test parse of Literal without xmllang attr passes
- Parsing an RDF/XML document fails with a KeyError when
- it contains a XML Literal with a xml:lang attribute:
- """
- g = rdflib.Graph()
- g.parse(data=passxml, format="xml")
-
def test_failing_parse_of_literal_with_xmllang_attr(self):
"""
Show parse of Literal with xmllang attr fails
diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py
index 4a7b7acd..f65bd4e2 100644
--- a/test/test_roundtrip.py
+++ b/test/test_roundtrip.py
@@ -1,7 +1,7 @@
+from json.decoder import JSONDecodeError
import logging
import os.path
from pathlib import Path
-from test.testutils import GraphHelper
from typing import Callable, Collection, Iterable, List, Optional, Set, Tuple, Union
from xml.sax import SAXParseException
@@ -10,8 +10,12 @@ from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet
import rdflib
import rdflib.compare
+from rdflib.plugins.parsers.notation3 import BadSyntax
from rdflib.util import guess_format
from rdflib.namespace import XSD
+from test.testutils import GraphHelper
+
+logger = logging.getLogger(__name__)
"""
Test round-tripping by all serializers/parser that are registered.
@@ -35,7 +39,8 @@ but provides some roundtrip test functions of its own (see test_parser_hext.py
"""
-NT_DATA_DIR = Path(__file__).parent / "nt"
+TEST_DIR = Path(__file__).parent
+NT_DATA_DIR = TEST_DIR / "nt"
INVALID_NT_FILES = {
# illegal literal as subject
"literals-01.nt",
@@ -125,6 +130,30 @@ XFAILS = {
reason='HexTuples conflates "" and ""^^xsd:string strings',
raises=AssertionError,
),
+ ("xml", "special_chars.nt"): pytest.mark.xfail(
+ reason="missing escaping: PCDATA invalid Char value 12 and 8",
+ raises=SAXParseException,
+ ),
+ ("trix", "special_chars.nt"): pytest.mark.xfail(
+ reason="missing escaping: PCDATA invalid Char value 12 and 8",
+ raises=SAXParseException,
+ ),
+ ("n3", "rdf_prefix.jsonld"): pytest.mark.xfail(
+ reason="missing 'rdf:' prefix",
+ raises=BadSyntax,
+ ),
+ ("ttl", "rdf_prefix.jsonld"): pytest.mark.xfail(
+ reason="missing 'rdf:' prefix",
+ raises=BadSyntax,
+ ),
+ ("trig", "rdf_prefix.jsonld"): pytest.mark.xfail(
+ reason="missing 'rdf:' prefix",
+ raises=BadSyntax,
+ ),
+ ("turtle", "rdf_prefix.jsonld"): pytest.mark.xfail(
+ reason="missing 'rdf:' prefix",
+ raises=BadSyntax,
+ ),
}
# This is for files which can only be represented properly in one format
@@ -149,17 +178,15 @@ def collect_files(
return result
-def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) -> None:
-
+def roundtrip(infmt: str, testfmt: str, source: Path) -> None:
g1 = rdflib.ConjunctiveGraph()
g1.parse(source, format=infmt)
s = g1.serialize(format=testfmt)
- if verbose:
- print("S:")
- print(s, flush=True)
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug("serailized = \n%s", s)
g2 = rdflib.ConjunctiveGraph()
g2.parse(data=s, format=testfmt)
@@ -176,23 +203,16 @@ def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) ->
c.remove((s, p, o))
c.add((s, p, rdflib.Literal(str(o))))
- if verbose:
+ if logger.isEnabledFor(logging.DEBUG):
both, first, second = rdflib.compare.graph_diff(g1, g2)
- print("Diff:")
- print("%d triples in both" % len(both))
- print("G1 Only:")
- for t in sorted(first):
- print(t)
+ logger.debug("Items in both:\n%s", GraphHelper.format_graph_set(both))
+ logger.debug("Items in G1 Only:\n%s", GraphHelper.format_graph_set(first))
+ logger.debug("Items in G2 Only:\n%s", GraphHelper.format_graph_set(second))
- print("--------------------")
- print("G2 Only")
- for t in sorted(second):
- print(t)
+ GraphHelper.assert_isomorphic(g1, g2)
- assert rdflib.compare.isomorphic(g1, g2)
-
- if verbose:
- print("Ok!")
+ if logger.isEnabledFor(logging.DEBUG):
+ logger.debug("OK")
_formats: Optional[Set[str]] = None
@@ -211,7 +231,9 @@ def get_formats() -> Set[str]:
return _formats
-def make_cases(files: Collection[Tuple[Path, str]]) -> Iterable[ParameterSet]:
+def make_cases(
+ files: Collection[Tuple[Path, str]], hext_okay: bool = False
+) -> Iterable[ParameterSet]:
formats = get_formats()
for testfmt in formats:
# if testfmt == "hext":
@@ -251,3 +273,18 @@ def test_nt(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Pat
@pytest.mark.parametrize("checker, args", make_cases(collect_files(N3_DATA_DIR)))
def test_n3(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
checker(*args)
+
+
+EXTRA_FILES = [
+ (TEST_DIR / "variants" / "special_chars.nt", "ntriples"),
+ (TEST_DIR / "variants" / "xml_literal.rdf", "xml"),
+ (TEST_DIR / "variants" / "rdf_prefix.jsonld", "json-ld"),
+]
+
+
+@pytest.mark.parametrize("checker, args", make_cases(EXTRA_FILES, hext_okay=True))
+def test_extra(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
+ """
+ Round tripping works correctly for selected extra files.
+ """
+ checker(*args)
diff --git a/test/test_testutils.py b/test/test_testutils.py
index 7fe5ce62..f416da2a 100644
--- a/test/test_testutils.py
+++ b/test/test_testutils.py
@@ -1,7 +1,11 @@
+from dataclasses import dataclass
import os
from pathlib import PurePosixPath, PureWindowsPath
from typing import Optional
-from .testutils import file_uri_to_path
+
+from rdflib.graph import ConjunctiveGraph, Graph
+from rdflib.term import URIRef
+from .testutils import GraphHelper, file_uri_to_path
import pytest
@@ -89,3 +93,139 @@ def test_paths(
expected_posix_path: Optional[str],
) -> None:
check(file_uri, expected_windows_path, expected_posix_path)
+
+
+@dataclass
+class SetsEqualTestCase:
+ equal: bool
+ format: str
+ ignore_blanks: bool
+ lhs: str
+ rhs: str
+
+
+@pytest.mark.parametrize(
+ "test_case",
+ [
+ SetsEqualTestCase(
+ equal=False,
+ format="turtle",
+ ignore_blanks=False,
+ lhs="""
+ @prefix eg: <ex:> .
+ _:a _:b _:c .
+ eg:o0 eg:p0 eg:s0 .
+ eg:o1 eg:p1 eg:s1 .
+ """,
+ rhs="""
+ @prefix eg: <ex:> .
+ eg:o0 eg:p0 eg:s0 .
+ eg:o1 eg:p1 eg:s1 .
+ """,
+ ),
+ SetsEqualTestCase(
+ equal=True,
+ format="turtle",
+ ignore_blanks=True,
+ lhs="""
+ @prefix eg: <ex:> .
+ _:a _:b _:c .
+ eg:o0 eg:p0 eg:s0 .
+ eg:o1 eg:p1 eg:s1 .
+ """,
+ rhs="""
+ @prefix eg: <ex:> .
+ eg:o0 eg:p0 eg:s0 .
+ eg:o1 eg:p1 eg:s1 .
+ """,
+ ),
+ SetsEqualTestCase(
+ equal=True,
+ format="turtle",
+ ignore_blanks=False,
+ lhs="""
+ <ex:o0> <ex:p0> <ex:s0> .
+ <ex:o1> <ex:p1> <ex:s1> .
+ """,
+ rhs="""
+ @prefix eg: <ex:> .
+ eg:o0 eg:p0 eg:s0 .
+ eg:o1 eg:p1 eg:s1 .
+ """,
+ ),
+ SetsEqualTestCase(
+ equal=False,
+ format="turtle",
+ ignore_blanks=False,
+ lhs="""
+ <ex:o0> <ex:p0> <ex:s0> .
+ <ex:o1> <ex:p1> <ex:s1> .
+ <ex:o2> <ex:p2> <ex:s2> .
+ """,
+ rhs="""
+ @prefix eg: <ex:> .
+ eg:o0 eg:p0 eg:s0 .
+ eg:o1 eg:p1 eg:s1 .
+ """,
+ ),
+ ],
+)
+def test_assert_sets_equal(test_case: SetsEqualTestCase):
+ """
+ GraphHelper.sets_equals and related functions work correctly in both
+ positive and negative cases.
+ """
+ lhs_graph: Graph = Graph().parse(data=test_case.lhs, format=test_case.format)
+ rhs_graph: Graph = Graph().parse(data=test_case.rhs, format=test_case.format)
+
+ public_id = URIRef("example:graph")
+ lhs_cgraph: ConjunctiveGraph = ConjunctiveGraph()
+ lhs_cgraph.parse(data=test_case.lhs, format=test_case.format, publicID=public_id)
+
+ rhs_cgraph: ConjunctiveGraph = ConjunctiveGraph()
+ rhs_cgraph.parse(data=test_case.rhs, format=test_case.format, publicID=public_id)
+
+ assert isinstance(lhs_cgraph, ConjunctiveGraph)
+ assert isinstance(rhs_cgraph, ConjunctiveGraph)
+ graph: Graph
+ cgraph: ConjunctiveGraph
+ for graph, cgraph in ((lhs_graph, lhs_cgraph), (rhs_graph, rhs_cgraph)):
+ GraphHelper.assert_sets_equals(graph, graph, True)
+ GraphHelper.assert_sets_equals(cgraph, cgraph, True)
+ GraphHelper.assert_triple_sets_equals(graph, graph, True)
+ GraphHelper.assert_triple_sets_equals(cgraph, cgraph, True)
+ GraphHelper.assert_quad_sets_equals(cgraph, cgraph, True)
+
+ if not test_case.equal:
+ with pytest.raises(AssertionError):
+ GraphHelper.assert_sets_equals(
+ lhs_graph, rhs_graph, test_case.ignore_blanks
+ )
+ with pytest.raises(AssertionError):
+ GraphHelper.assert_sets_equals(
+ lhs_cgraph, rhs_cgraph, test_case.ignore_blanks
+ )
+ with pytest.raises(AssertionError):
+ GraphHelper.assert_triple_sets_equals(
+ lhs_graph, rhs_graph, test_case.ignore_blanks
+ )
+ with pytest.raises(AssertionError):
+ GraphHelper.assert_triple_sets_equals(
+ lhs_cgraph, rhs_cgraph, test_case.ignore_blanks
+ )
+ with pytest.raises(AssertionError):
+ GraphHelper.assert_quad_sets_equals(
+ lhs_cgraph, rhs_cgraph, test_case.ignore_blanks
+ )
+ else:
+ GraphHelper.assert_sets_equals(lhs_graph, rhs_graph, test_case.ignore_blanks)
+ GraphHelper.assert_sets_equals(lhs_cgraph, rhs_cgraph, test_case.ignore_blanks)
+ GraphHelper.assert_triple_sets_equals(
+ lhs_graph, rhs_graph, test_case.ignore_blanks
+ )
+ GraphHelper.assert_triple_sets_equals(
+ lhs_cgraph, rhs_cgraph, test_case.ignore_blanks
+ )
+ GraphHelper.assert_quad_sets_equals(
+ lhs_cgraph, rhs_cgraph, test_case.ignore_blanks
+ )
diff --git a/test/test_turtle_quoting.py b/test/test_turtle_quoting.py
new file mode 100644
index 00000000..dfcf4507
--- /dev/null
+++ b/test/test_turtle_quoting.py
@@ -0,0 +1,148 @@
+"""
+This module is intended for tests related to quoting/escaping and
+unquoting/unescaping in various formats that are related to turtle, such as
+ntriples, nquads, trig and n3.
+"""
+
+from typing import Callable, Dict, Iterable, List, Tuple
+import pytest
+from rdflib.graph import ConjunctiveGraph
+
+from rdflib.plugins.parsers import ntriples
+from rdflib.term import Literal
+
+string_escape_map = {
+ "t": "\t",
+ "b": "\b",
+ "n": "\n",
+ "r": "\r",
+ "f": "\f",
+ '"': '"',
+ "'": "'",
+ "\\": "\\",
+}
+
+
+def make_correctness_pairs() -> List[Tuple[str, str]]:
+ """
+ Creates pairs of quoted and unquoted strings.
+ """
+ result = []
+
+ def add_pair(escape: str, unescaped: str) -> None:
+ result.append((f"\\{escape}", unescaped))
+ result.append((f"\\\\{escape}", f"\\{escape}"))
+ result.append((f"\\\\\\{escape}", f"\\{unescaped}"))
+
+ chars = "A1a\\\nøæå"
+ for char in chars:
+ code_point = ord(char)
+ add_pair(f"u{code_point:04x}", char)
+ add_pair(f"u{code_point:04X}", char)
+ add_pair(f"U{code_point:08x}", char)
+ add_pair(f"U{code_point:08X}", char)
+
+ string_escapes = "tbnrf'"
+ for char in string_escapes:
+ add_pair(f"{char}", string_escape_map[char])
+
+ # special handling because «"» should not appear in string, and add_pair
+ # will add it.
+ result.append(('\\"', '"'))
+ result.append(('\\\\\\"', '\\"'))
+
+ # special handling because «\» should not appear in string, and add_pair
+ # will add it.
+ result.append(("\\\\", "\\"))
+ result.append(("\\\\\\\\", "\\\\"))
+
+ return result
+
+
+CORRECTNESS_PAIRS = make_correctness_pairs()
+
+
+def ntriples_unquote_validate(input: str) -> str:
+ """
+ This function wraps `ntriples.unquote` in a way that ensures that `ntriples.validate` is always ``True`` when it runs.
+ """
+ old_validate = ntriples.validate
+ try:
+ ntriples.validate = True
+ return ntriples.unquote(input)
+ finally:
+ ntriples.validate = old_validate
+
+
+def ntriples_unquote(input: str) -> str:
+ """
+ This function wraps `ntriples.unquote` in a way that ensures that `ntriples.validate` is always ``False`` when it runs.
+ """
+ old_validate = ntriples.validate
+ try:
+ ntriples.validate = False
+ return ntriples.unquote(input)
+ finally:
+ ntriples.validate = old_validate
+
+
+unquoters: Dict[str, Callable[[str], str]] = {
+ "ntriples_unquote": ntriples_unquote,
+ "ntriples_unquote_validate": ntriples_unquote_validate,
+}
+
+
+def make_correctness_tests(
+ selectors: Iterable[str],
+) -> Iterable[Tuple[str, str, str]]:
+ """
+ This function creates a cartesian product of the selectors and
+ `CORRECTNESS_PAIRS` that is suitable for use as pytest parameters.
+ """
+ for selector in selectors:
+ for quoted, unquoted in CORRECTNESS_PAIRS:
+ yield selector, quoted, unquoted
+
+
+@pytest.mark.parametrize(
+ "unquoter_key, quoted, unquoted", make_correctness_tests(unquoters.keys())
+)
+def test_unquote_correctness(
+ unquoter_key: str,
+ quoted: str,
+ unquoted: str,
+) -> None:
+ """
+ Various unquote functions work correctly.
+ """
+ unquoter = unquoters[unquoter_key]
+ assert unquoted == unquoter(quoted)
+
+
+QUAD_FORMATS = {"nquads"}
+
+
+@pytest.mark.parametrize(
+ "format, quoted, unquoted",
+ make_correctness_tests(["turtle", "ntriples", "nquads"]),
+)
+def test_parse_correctness(
+ format: str,
+ quoted: str,
+ unquoted: str,
+) -> None:
+ """
+ Quoted strings parse correctly
+ """
+ if format in QUAD_FORMATS:
+ data = f'<example:Subject> <example:Predicate> "{quoted}" <example:Graph>.'
+ else:
+ data = f'<example:Subject> <example:Predicate> "{quoted}".'
+ graph = ConjunctiveGraph()
+ graph.parse(data=data, format=format)
+ objs = list(graph.objects())
+ assert len(objs) == 1
+ obj = objs[0]
+ assert isinstance(obj, Literal)
+ assert isinstance(obj.value, str)
+ assert obj.value == unquoted
diff --git a/test/test_variants.py b/test/test_variants.py
new file mode 100644
index 00000000..b7d8ece4
--- /dev/null
+++ b/test/test_variants.py
@@ -0,0 +1,182 @@
+import json
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path, PurePath
+from test.testutils import GraphHelper
+from typing import (
+ ClassVar,
+ Collection,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Pattern,
+ Tuple,
+ Union,
+ cast,
+)
+
+import pytest
+from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet
+
+import rdflib.compare
+import rdflib.util
+from rdflib.graph import ConjunctiveGraph, Graph
+from rdflib.namespace import XSD
+from rdflib.term import URIRef
+from rdflib.util import guess_format
+
+TEST_DIR = Path(__file__).parent.absolute()
+VARIANTS_DIR = TEST_DIR / "variants"
+
+# Put files from other directories in here.
+EXTRA_FILES: List[Path] = []
+
+SUFFIX_FORMAT_MAP = {**rdflib.util.SUFFIX_FORMAT_MAP, "hext": "hext"}
+
+
+@dataclass
+class GraphAsserts:
+ """
+ A specification of asserts that must be checked against a graph. This is
+ read in from a JSON dict.
+ """
+
+ quad_count: Optional[int] = None
+
+ def check(self, graph: ConjunctiveGraph) -> None:
+ if self.quad_count is not None:
+ assert self.quad_count == len(list(graph.quads()))
+
+
+@dataclass(order=True)
+class GraphVariants:
+ """
+ Represents a graph with multiple variants in different files.
+ """
+
+ key: str
+ variants: Dict[str, Path] = field(default_factory=dict)
+ asserts: GraphAsserts = field(default_factory=lambda: GraphAsserts())
+
+ _variant_regex: ClassVar[Pattern[str]] = re.compile(
+ r"^(.*?)(|[-]variant-[^/]+|[-]asserts)$"
+ )
+
+ def pytest_param(
+ self,
+ marks: Optional[
+ Union[MarkDecorator, Collection[Union[MarkDecorator, Mark]]]
+ ] = None,
+ ) -> ParameterSet:
+ if marks is None:
+ marks = cast(Tuple[MarkDecorator], tuple())
+ logging.debug("self = %s", self)
+ return pytest.param(self, id=self.key, marks=marks)
+
+ @classmethod
+ def _decompose_path(cls, file_path: Path, basedir: Optional[Path]):
+ if basedir:
+ file_path = file_path.absolute().resolve().relative_to(basedir)
+ name_noext, ext = os.path.splitext(file_path)
+ name_noext_path = PurePath(name_noext)
+ match = cls._variant_regex.match("/".join(name_noext_path.parts))
+ if match is None:
+ raise RuntimeError(f"{cls._variant_regex!r} did not match {name_noext}")
+ file_key = match.group(1)
+ variant_key = f"{match.group(2)}{ext}"
+ return (file_key, variant_key)
+
+ @classmethod
+ def for_files(
+ cls, file_paths: Iterable[Path], basedir: Optional[Path] = None
+ ) -> Dict[str, "GraphVariants"]:
+ graph_varaint_dict: Dict[str, GraphVariants] = {}
+ for file_path in file_paths:
+ logging.debug("file_path = %s", file_path)
+ file_key, variant_key = cls._decompose_path(file_path, basedir)
+ # file_key = f"{file_path.parent / stem}"
+ if file_key not in graph_varaint_dict:
+ graph_variant = graph_varaint_dict[file_key] = GraphVariants(file_key)
+ else:
+ graph_variant = graph_varaint_dict[file_key]
+ if variant_key.endswith("-asserts.json"):
+ graph_variant.asserts = GraphAsserts(
+ **json.loads(file_path.read_text())
+ )
+ else:
+ graph_variant.variants[variant_key] = file_path
+ return graph_varaint_dict
+
+ @classmethod
+ def for_directory(
+ cls, directory: Path, basedir: Optional[Path] = None
+ ) -> Dict[str, "GraphVariants"]:
+ file_paths = []
+ for file_path in directory.glob("**/*"):
+ if not file_path.is_file():
+ continue
+ if file_path.name.endswith(".md"):
+ continue
+ file_paths.append(file_path)
+ logging.debug("file_paths = %s", file_paths)
+ return cls.for_files(file_paths, basedir)
+
+
+GRAPH_VARIANT_DICT = {
+ **GraphVariants.for_directory(VARIANTS_DIR, TEST_DIR),
+ **GraphVariants.for_files(EXTRA_FILES, TEST_DIR),
+}
+
+EXPECTED_FAILURES = {
+ ("variants/schema_only_base"): pytest.mark.xfail(
+ reason="Some issue with handling base URI that does not end with a slash",
+ raises=ValueError,
+ ),
+}
+
+
+def tests_found() -> None:
+ logging.debug("VARIANTS_DIR = %s", VARIANTS_DIR)
+ logging.debug("EXTRA_FILES = %s", EXTRA_FILES)
+ assert len(GRAPH_VARIANT_DICT) >= 1
+ logging.debug("ALL_VARIANT_GRAPHS = %s", GRAPH_VARIANT_DICT)
+ xml_literal = GRAPH_VARIANT_DICT.get("variants/xml_literal")
+ assert xml_literal is not None
+ assert len(xml_literal.variants) >= 5
+ assert xml_literal.asserts.quad_count == 1
+
+
+@pytest.mark.parametrize(
+ "graph_variant",
+ [
+ graph_variant.pytest_param(EXPECTED_FAILURES.get(graph_variant.key))
+ for graph_variant in GRAPH_VARIANT_DICT.values()
+ ],
+)
+def test_variants(graph_variant: GraphVariants) -> None:
+ """
+ All variants of a graph are isomorphic with the first variant, and thus
+ eachother.
+ """
+ logging.debug("graph_variant = %s", graph_variant)
+ public_id = URIRef(f"example:{graph_variant.key}")
+ assert len(graph_variant.variants) > 0
+ first_graph: Optional[Graph] = None
+ for variant_key, variant_path in graph_variant.variants.items():
+ logging.debug("variant_path = %s", variant_path)
+ format = guess_format(variant_path.name, fmap=SUFFIX_FORMAT_MAP)
+ assert format is not None, f"could not determine format for {variant_path.name}"
+ graph = ConjunctiveGraph()
+ graph.parse(variant_path, format=format, publicID=public_id)
+ # Stripping data types as different parsers (e.g. hext) have different
+ # opinions of when a bare string is of datatype XSD.string or not.
+ # Probably something that needs more investigation.
+ GraphHelper.strip_literal_datatypes(graph, {XSD.string})
+ graph_variant.asserts.check(graph)
+ if first_graph is None:
+ first_graph = graph
+ else:
+ GraphHelper.assert_isomorphic(first_graph, graph)
diff --git a/test/testutils.py b/test/testutils.py
index 90b73c11..2d450070 100644
--- a/test/testutils.py
+++ b/test/testutils.py
@@ -21,6 +21,7 @@ from typing import (
Dict,
Any,
TypeVar,
+ Union,
cast,
NamedTuple,
)
@@ -32,12 +33,13 @@ import email.message
import unittest
from rdflib import BNode, Graph, ConjunctiveGraph
-from rdflib.term import Node
+from rdflib.term import Identifier, Literal, Node, URIRef
from unittest.mock import MagicMock, Mock
from urllib.error import HTTPError
from urllib.request import urlopen
from pathlib import PurePath, PureWindowsPath
from nturl2path import url2pathname as nt_url2pathname
+import rdflib.compare
if TYPE_CHECKING:
import typing_extensions as te
@@ -65,21 +67,173 @@ def ctx_http_server(
server_thread.join()
+IdentifierTriple = Tuple[Identifier, Identifier, Identifier]
+IdentifierTripleSet = Set[IdentifierTriple]
+IdentifierQuad = Tuple[Identifier, Identifier, Identifier, Identifier]
+IdentifierQuadSet = Set[IdentifierQuad]
+
+
class GraphHelper:
+ """
+ Provides methods which are useful for working with graphs.
+ """
+
+ @classmethod
+ def identifier(self, node: Node) -> Identifier:
+ """
+ Return the identifier of the provided node.
+ """
+ if isinstance(node, Graph):
+ return node.identifier
+ else:
+ return cast(Identifier, node)
+
@classmethod
- def triple_set(cls, graph: Graph) -> Set[Tuple[Node, Node, Node]]:
- return set(graph.triples((None, None, None)))
+ def identifiers(cls, nodes: Tuple[Node, ...]) -> Tuple[Identifier, ...]:
+ """
+ Return the identifiers of the provided nodes.
+ """
+ result = []
+ for node in nodes:
+ result.append(cls.identifier(node))
+ return tuple(result)
@classmethod
- def triple_sets(cls, graphs: Iterable[Graph]) -> List[Set[Tuple[Node, Node, Node]]]:
- result: List[Set[Tuple[Node, Node, Node]]] = []
+ def triple_set(
+ cls, graph: Graph, exclude_blanks: bool = False
+ ) -> IdentifierTripleSet:
+ result = set()
+ for sn, pn, on in graph.triples((None, None, None)):
+ s, p, o = cls.identifiers((sn, pn, on))
+ if exclude_blanks and (
+ isinstance(s, BNode) or isinstance(p, BNode) or isinstance(o, BNode)
+ ):
+ continue
+ result.add((s, p, o))
+ return result
+
+ @classmethod
+ def triple_sets(
+ cls, graphs: Iterable[Graph], exclude_blanks: bool = False
+ ) -> List[IdentifierTripleSet]:
+ """
+ Extracts the set of all triples from the supplied Graph.
+ """
+ result: List[IdentifierTripleSet] = []
for graph in graphs:
- result.append(cls.triple_set(graph))
+ result.append(cls.triple_set(graph, exclude_blanks))
return result
@classmethod
- def equals(cls, lhs: Graph, rhs: Graph) -> bool:
- return cls.triple_set(lhs) == cls.triple_set(rhs)
+ def quad_set(
+ cls, graph: ConjunctiveGraph, exclude_blanks: bool = False
+ ) -> IdentifierQuadSet:
+ """
+ Extracts the set of all quads from the supplied ConjunctiveGraph.
+ """
+ result = set()
+ for sn, pn, on, gn in graph.quads((None, None, None, None)):
+ s, p, o, g = cls.identifiers((sn, pn, on, gn))
+ if exclude_blanks and (
+ isinstance(s, BNode)
+ or isinstance(p, BNode)
+ or isinstance(o, BNode)
+ or isinstance(g, BNode)
+ ):
+ continue
+ result.add((s, p, o, g))
+ return result
+
+ @classmethod
+ def triple_or_quad_set(
+ cls, graph: Graph, exclude_blanks: bool = False
+ ) -> Union[IdentifierQuadSet, IdentifierTripleSet]:
+ """
+ Extracts quad or triple sets depending on whether or not the graph is
+ ConjunctiveGraph or a normal Graph.
+ """
+ if isinstance(graph, ConjunctiveGraph):
+ return cls.quad_set(graph, exclude_blanks)
+ return cls.triple_set(graph, exclude_blanks)
+
+ @classmethod
+ def assert_triple_sets_equals(
+ cls, lhs: Graph, rhs: Graph, exclude_blanks: bool = False
+ ) -> None:
+ """
+ Asserts that the triple sets in the two graphs are equal.
+ """
+ lhs_set = cls.triple_set(lhs, exclude_blanks)
+ rhs_set = cls.triple_set(rhs, exclude_blanks)
+ assert lhs_set == rhs_set
+
+ @classmethod
+ def assert_quad_sets_equals(
+ cls, lhs: ConjunctiveGraph, rhs: ConjunctiveGraph, exclude_blanks: bool = False
+ ) -> None:
+ """
+ Asserts that the quads sets in the two graphs are equal.
+ """
+ lhs_set = cls.quad_set(lhs, exclude_blanks)
+ rhs_set = cls.quad_set(rhs, exclude_blanks)
+ assert lhs_set == rhs_set
+
+ @classmethod
+ def assert_sets_equals(
+ cls, lhs: Graph, rhs: Graph, exclude_blanks: bool = False
+ ) -> None:
+ """
+ Asserts that that ther quad or triple sets from the two graphs are equal.
+ """
+ lhs_set = cls.triple_or_quad_set(lhs, exclude_blanks)
+ rhs_set = cls.triple_or_quad_set(rhs, exclude_blanks)
+ assert lhs_set == rhs_set
+
+ @classmethod
+ def format_set(
+ cls, item_set: Union[IdentifierQuadSet, IdentifierTripleSet], prefix: str = " "
+ ) -> str:
+ items = []
+ for item in item_set:
+ items.append(f"{prefix}{item}")
+ return "\n".join(items)
+
+ @classmethod
+ def format_graph_set(cls, graph: Graph, prefix: str = " ") -> str:
+ return cls.format_set(cls.triple_or_quad_set(graph), prefix)
+
+ @classmethod
+ def assert_isomorphic(cls, lhs: Graph, rhs: Graph) -> None:
+ """
+ This asserts that the two graphs are isomorphic, providing a nicely
+ formatted error message if they are not.
+ """
+
+ def format_report() -> str:
+ in_both, in_lhs, in_rhs = rdflib.compare.graph_diff(lhs, rhs)
+ return (
+ "in both:\n"
+ f"{cls.format_graph_set(in_both)}"
+ "\nonly in first:\n"
+ f"{cls.format_graph_set(in_lhs)}"
+ "\nonly in second:\n"
+ f"{cls.format_graph_set(in_rhs)}"
+ )
+
+ assert rdflib.compare.isomorphic(lhs, rhs), format_report()
+
+ @classmethod
+ def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None:
+ """
+ Strips datatypes in the provided set from literals in the graph.
+ """
+ for object in graph.objects():
+ if not isinstance(object, Literal):
+ continue
+ if object.datatype is None:
+ continue
+ if object.datatype in datatypes:
+ object._datatype = None
GenericT = TypeVar("GenericT", bound=Any)
diff --git a/test/variants/README.md b/test/variants/README.md
new file mode 100644
index 00000000..dce13c0e
--- /dev/null
+++ b/test/variants/README.md
@@ -0,0 +1,21 @@
+# multi variant graphs
+
+This directory containts variants of the same graph encoded in different
+formats, or differently in the same format.
+
+The graph that a specific file is a variant of is determined by it's filename.
+Files that differ only in file extention but have the same basename are
+considered variants of the same graph. Additionally, any suffix that matches
+`-variant-[^/]*` is excluded when determening the graph key, so the following
+files are all considered variants of the same graph:
+
+```
+test/variants/literal_with_lang-variant-control.ttl
+test/variants/literal_with_lang.nt
+test/variants/literal_with_lang.rdf
+test/variants/literal_with_lang.ttl
+```
+
+Some additional assertions on graphs can be specified in file names that end
+with `-asserts.json`, for details on supported asserts see
+`test/test_variants.py`.
diff --git a/test/variants/rdf_prefix.jsonld b/test/variants/rdf_prefix.jsonld
new file mode 100644
index 00000000..c0033d8b
--- /dev/null
+++ b/test/variants/rdf_prefix.jsonld
@@ -0,0 +1,17 @@
+{
+ "slots": [
+ {
+ "name": "type",
+ "slot_uri": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+ }
+ ],
+ "@context": [
+ {
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+ "@vocab": "https://w3id.org/linkml/",
+ "slot_uri": {
+ "@type": "@id"
+ }
+ }
+ ]
+}
diff --git a/test/variants/rdf_prefix.ttl b/test/variants/rdf_prefix.ttl
new file mode 100644
index 00000000..38048f9c
--- /dev/null
+++ b/test/variants/rdf_prefix.ttl
@@ -0,0 +1,5 @@
+@prefix : <https://w3id.org/linkml/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+
+[] :slots [ :name "type" ;
+ :slot_uri rdf:type ] .
diff --git a/test/variants/schema_only_base-asserts.json b/test/variants/schema_only_base-asserts.json
new file mode 100644
index 00000000..6e4a630d
--- /dev/null
+++ b/test/variants/schema_only_base-asserts.json
@@ -0,0 +1,3 @@
+{
+ "quad_count": 4
+}
diff --git a/test/variants/schema_only_base.hext b/test/variants/schema_only_base.hext
new file mode 100644
index 00000000..12154366
--- /dev/null
+++ b/test/variants/schema_only_base.hext
@@ -0,0 +1,4 @@
+["example:/class_to_class", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2000/01/rdf-schema#Class", "globalId", "", ""]
+["example:/class_to_class", "example:/priority", "4", "http://www.w3.org/2001/XMLSchema#integer", "", ""]
+["example:/class_to_class", "example:/color", "blue", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+["example:/class_to_class", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "example:/Category", "globalId", "", ""]
diff --git a/test/variants/schema_only_base.n3 b/test/variants/schema_only_base.n3
new file mode 100644
index 00000000..bdae1e0d
--- /dev/null
+++ b/test/variants/schema_only_base.n3
@@ -0,0 +1,8 @@
+@base <example:> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+<class_to_class>
+ a
+ rdfs:Class ,
+ <Category> ;
+ <color> "blue" ;
+ <priority> 4 .
diff --git a/test/variants/schema_only_base.nt b/test/variants/schema_only_base.nt
new file mode 100644
index 00000000..7a89f2aa
--- /dev/null
+++ b/test/variants/schema_only_base.nt
@@ -0,0 +1,4 @@
+<example:/class_to_class> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <example:/Category> .
+<example:/class_to_class> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2000/01/rdf-schema#Class> .
+<example:/class_to_class> <example:/color> "blue" .
+<example:/class_to_class> <example:/priority> "4"^^<http://www.w3.org/2001/XMLSchema#integer> .
diff --git a/test/variants/schema_only_base.ttl b/test/variants/schema_only_base.ttl
new file mode 100644
index 00000000..a23d5d08
--- /dev/null
+++ b/test/variants/schema_only_base.ttl
@@ -0,0 +1,8 @@
+@base <example:> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+
+<class_to_class> a <Category>,
+ rdfs:Class ;
+ <color> "blue" ;
+ <priority> 4 .
diff --git a/test/variants/special_chars-asserts.json b/test/variants/special_chars-asserts.json
new file mode 100644
index 00000000..4a62d909
--- /dev/null
+++ b/test/variants/special_chars-asserts.json
@@ -0,0 +1,3 @@
+{
+ "quad_count": 7
+}
diff --git a/test/variants/special_chars.hext b/test/variants/special_chars.hext
new file mode 100644
index 00000000..cef4cb82
--- /dev/null
+++ b/test/variants/special_chars.hext
@@ -0,0 +1,7 @@
+["example:special", "example:newline", "\n", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+["example:special", "example:form_feed", "\f", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+["example:special", "example:backspace", "\b", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+["example:special", "example:carriage_return", "\r", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+["example:special", "example:backslash", "\\", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+["example:special", "example:string-000", "\\r", "http://www.w3.org/2001/XMLSchema#string", "", ""]
+["example:special", "example:string-001", "\\\r", "http://www.w3.org/2001/XMLSchema#string", "", ""]
diff --git a/test/variants/special_chars.nt b/test/variants/special_chars.nt
new file mode 100644
index 00000000..6620d909
--- /dev/null
+++ b/test/variants/special_chars.nt
@@ -0,0 +1,7 @@
+<example:special> <example:newline> "\n" .
+<example:special> <example:form_feed> "\f" .
+<example:special> <example:backspace> "\b" .
+<example:special> <example:carriage_return> "\r" .
+<example:special> <example:backslash> "\\" .
+<example:special> <example:string-000> "\\r" .
+<example:special> <example:string-001> "\\\r" .
diff --git a/test/variants/special_chars.ttl b/test/variants/special_chars.ttl
new file mode 100644
index 00000000..6620d909
--- /dev/null
+++ b/test/variants/special_chars.ttl
@@ -0,0 +1,7 @@
+<example:special> <example:newline> "\n" .
+<example:special> <example:form_feed> "\f" .
+<example:special> <example:backspace> "\b" .
+<example:special> <example:carriage_return> "\r" .
+<example:special> <example:backslash> "\\" .
+<example:special> <example:string-000> "\\r" .
+<example:special> <example:string-001> "\\\r" .
diff --git a/test/variants/xml_literal-asserts.json b/test/variants/xml_literal-asserts.json
new file mode 100644
index 00000000..be09e348
--- /dev/null
+++ b/test/variants/xml_literal-asserts.json
@@ -0,0 +1,3 @@
+{
+ "quad_count": 1
+}
diff --git a/test/variants/xml_literal-variant-control.ttl b/test/variants/xml_literal-variant-control.ttl
new file mode 100644
index 00000000..79a4c054
--- /dev/null
+++ b/test/variants/xml_literal-variant-control.ttl
@@ -0,0 +1,5 @@
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+
+<http://example.org/> dc:description """\n <p xmlns="http://www.w3.org/1999/xhtml"/>\n """^^rdf:XMLLiteral .
+
diff --git a/test/variants/xml_literal.hext b/test/variants/xml_literal.hext
new file mode 100644
index 00000000..1b5922cd
--- /dev/null
+++ b/test/variants/xml_literal.hext
@@ -0,0 +1 @@
+["http://example.org/", "http://purl.org/dc/elements/1.1/description", "\n <p xmlns=\"http://www.w3.org/1999/xhtml\"/>\n ", "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral", "", ""]
diff --git a/test/variants/xml_literal.nt b/test/variants/xml_literal.nt
new file mode 100644
index 00000000..1060bb6e
--- /dev/null
+++ b/test/variants/xml_literal.nt
@@ -0,0 +1,2 @@
+<http://example.org/> <http://purl.org/dc/elements/1.1/description> "\n <p xmlns=\"http://www.w3.org/1999/xhtml\"/>\n "^^<http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral> .
+
diff --git a/test/variants/xml_literal.rdf b/test/variants/xml_literal.rdf
new file mode 100644
index 00000000..74a02957
--- /dev/null
+++ b/test/variants/xml_literal.rdf
@@ -0,0 +1,12 @@
+<rdf:RDF
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+>
+
+<rdf:Description rdf:about="http://example.org/">
+ <dc:description rdf:parseType="Literal">
+ <p xmlns="http://www.w3.org/1999/xhtml"></p>
+ </dc:description>
+</rdf:Description>
+
+</rdf:RDF>
diff --git a/test/variants/xml_literal.ttl b/test/variants/xml_literal.ttl
new file mode 100644
index 00000000..eadae6f8
--- /dev/null
+++ b/test/variants/xml_literal.ttl
@@ -0,0 +1,7 @@
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+
+<http://example.org/> dc:description """
+ <p xmlns="http://www.w3.org/1999/xhtml"/>
+ """^^rdf:XMLLiteral .
+