summaryrefslogtreecommitdiff
path: root/test/test_w3c_spec/test_sparql_w3c.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/test_w3c_spec/test_sparql_w3c.py')
-rw-r--r--test/test_w3c_spec/test_sparql_w3c.py568
1 files changed, 0 insertions, 568 deletions
diff --git a/test/test_w3c_spec/test_sparql_w3c.py b/test/test_w3c_spec/test_sparql_w3c.py
deleted file mode 100644
index 1f5792b7..00000000
--- a/test/test_w3c_spec/test_sparql_w3c.py
+++ /dev/null
@@ -1,568 +0,0 @@
-from __future__ import print_function
-
-import sys
-from io import TextIOWrapper
-from pathlib import PurePath
-
-# Needed to pass
-# http://www.w3.org/2009/sparql/docs/tests/data-sparql11/
-# syntax-update-2/manifest#syntax-update-other-01
-from test.utils import file_uri_to_path
-from test.utils.manifest import RDFTest, ResultType, read_manifest
-from test.utils.namespace import MF, UT
-
-import pytest
-
-sys.setrecursionlimit(6000) # default is 1000
-
-
-import typing
-from collections import Counter
-from io import BytesIO
-from typing import Callable, Dict, List, Optional, Tuple, cast
-from urllib.parse import urljoin
-
-from rdflib import BNode, Dataset, Graph, URIRef
-from rdflib.compare import isomorphic
-from rdflib.compat import bopen, decodeStringEscape
-from rdflib.plugins import sparql as rdflib_sparql_module
-from rdflib.plugins.sparql.algebra import pprintAlgebra, translateQuery, translateUpdate
-from rdflib.plugins.sparql.parser import parseQuery, parseUpdate
-from rdflib.plugins.sparql.results.rdfresults import RDFResultParser
-from rdflib.plugins.sparql.update import evalUpdate
-from rdflib.query import Result
-from rdflib.term import Identifier, Node
-
-
-def eq(a, b, msg):
- # return eq_(a, b, msg + ": (%r!=%r)" % (a, b))
- assert a == b, msg + ": (%r!=%r)" % (a, b)
-
-
-def setFlags():
- import rdflib
-
- # Several tests rely on lexical form of literals being kept!
- rdflib.NORMALIZE_LITERALS = False
-
- # we need an explicit default graph
- rdflib_sparql_module.SPARQL_DEFAULT_GRAPH_UNION = False
-
- # we obviously need this
- rdflib.DAWG_LITERAL_COLLATION = True
-
-
-def resetFlags():
- import rdflib
-
- # Several tests rely on lexical form of literals being kept!
- rdflib.NORMALIZE_LITERALS = True
-
- # we need an explicit default graph
- rdflib_sparql_module.SPARQL_DEFAULT_GRAPH_UNION = True
-
- # we obviously need this
- rdflib.DAWG_LITERAL_COLLATION = False
-
-
-DEBUG_FAIL = True
-DEBUG_FAIL = False
-
-DEBUG_ERROR = True
-DEBUG_ERROR = False
-
-SPARQL10Tests = True
-# SPARQL10Tests = False
-
-SPARQL11Tests = True
-# SPARQL11Tests=False
-
-RDFLibTests = True
-
-DETAILEDASSERT = True
-# DETAILEDASSERT=False
-
-
-NAME = None
-
-fails: typing.Counter[str] = Counter()
-errors: typing.Counter[str] = Counter()
-
-failed_tests = []
-error_tests = []
-
-
-def bopen_read_close(fn):
- with bopen(fn) as f:
- return f.read()
-
-
-from test.test_w3c_spec.test_sparql_w3c_skipped import skiptests
-
-
-def _fmt(f):
- if f.endswith(".rdf"):
- return "xml"
- return "turtle"
-
-
-def bindingsCompatible(a, b):
- """
-
- Are two binding-sets compatible.
-
- From the spec: http://www.w3.org/2009/sparql/docs/tests/#queryevaltests
-
- A SPARQL implementation passes a query evaluation test if the
- graph produced by evaluating the query against the RDF dataset
- (and encoding in the DAWG result set vocabulary, if necessary) is
- equivalent [RDF-CONCEPTS] to the graph named in the result (after
- encoding in the DAWG result set vocabulary, if necessary). Note
- that, solution order only is considered relevant, if the result is
- expressed in the test suite in the DAWG result set vocabulary,
- with explicit rs:index triples; otherwise solution order is
- considered irrelevant for passing. Equivalence can be tested by
- checking that the graphs are isomorphic and have identical IRI and
- literal nodes. Note that testing whether two result sets are
- isomorphic is simpler than full graph isomorphism. Iterating over
- rows in one set, finding a match with the other set, removing this
- pair, then making sure all rows are accounted for, achieves the
- same effect.
- """
-
- def rowCompatible(x, y):
- m = {}
- y = y.asdict()
- for v1, b1 in x.asdict().items():
- if v1 not in y:
- return False
- if isinstance(b1, BNode):
- if b1 in m:
- if y[v1] != m[b1]:
- return False
- else:
- m[b1] = y[v1]
- else:
- # if y[v1]!=b1:
- # return False
- try:
- if y[v1].neq(b1):
- return False
- except TypeError:
- return False
- return True
-
- if not a:
- if b:
- return False
- return True
-
- x = next(iter(a))
-
- for y in b:
- if rowCompatible(x, y):
- if bindingsCompatible(a - set((x,)), b - set((y,))):
- return True
-
- return False
-
-
-def pp_binding(solutions):
- """
- Pretty print a single binding - for less eye-strain when debugging
- """
- return (
- "\n["
- + ",\n\t".join(
- "{" + ", ".join("%s:%s" % (x[0], x[1].n3()) for x in bindings.items()) + "}"
- for bindings in solutions
- )
- + "]\n"
- )
-
-
-def update_test(t: RDFTest):
-
- # the update-eval tests refer to graphs on http://example.org
- rdflib_sparql_module.SPARQL_LOAD_GRAPHS = False
-
- uri, name, comment, data, graphdata, query, res, syntax = t
- # These casts are here because the RDFTest type is not sufficently
- # expressive to capture the two different flavors of tests.
- res = cast(Optional[ResultType], res)
- graphdata = cast(Optional[List[Tuple[Identifier, Identifier]]], graphdata)
-
- query_path: PurePath = file_uri_to_path(query)
-
- if uri in skiptests:
- pytest.xfail()
-
- try:
- g = Dataset()
-
- if not res:
- if syntax:
- with bopen(query_path) as f:
- translateUpdate(parseUpdate(f))
- else:
- try:
- with bopen(query_path) as f:
- translateUpdate(parseUpdate(f))
- raise AssertionError("Query shouldn't have parsed!")
- except:
- pass # negative syntax test
- return
-
- res = cast(ResultType, res)
- resdata: Identifier
- resgraphdata: List[Tuple[Identifier, Identifier]]
- resdata, resgraphdata = res # type: ignore[assignment]
-
- # read input graphs
- if data:
- g.default_context.parse(data, format=_fmt(data))
-
- if graphdata:
- for x, l in graphdata:
- g.parse(x, publicID=URIRef(l), format=_fmt(x))
-
- with bopen(query_path) as f:
- req = translateUpdate(parseUpdate(f))
- evalUpdate(g, req)
-
- # read expected results
- resg = Dataset()
- if resdata:
- resg.default_context.parse(resdata, format=_fmt(resdata))
-
- if resgraphdata:
- for x, l in resgraphdata:
- resg.parse(x, publicID=URIRef(l), format=_fmt(x))
-
- eq(
- set(ctx.identifier for ctx in g.contexts() if ctx != g.default_context),
- set(
- ctx.identifier for ctx in resg.contexts() if ctx != resg.default_context
- ),
- "named graphs in datasets do not match",
- )
- assert isomorphic(
- g.default_context, resg.default_context
- ), "Default graphs are not isomorphic"
-
- for ctx in g.contexts():
- if ctx == g.default_context:
- continue
- assert isomorphic(ctx, resg.get_context(ctx.identifier)), (
- "Graphs with ID %s are not isomorphic" % ctx.identifier
- )
-
- except Exception as e:
-
- if isinstance(e, AssertionError):
- failed_tests.append(uri)
- fails[str(e)] += 1
- else:
- error_tests.append(uri)
- errors[str(e)] += 1
-
- if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL:
- print("======================================")
- print(uri)
- print(name)
- print(comment)
-
- if not res:
- if syntax:
- print("Positive syntax test")
- else:
- print("Negative syntax test")
-
- if data:
- print("----------------- DATA --------------------")
- print(">>>", data)
- data_path: PurePath = file_uri_to_path(data)
- print(bopen_read_close(data_path))
- if graphdata:
- print("----------------- GRAPHDATA --------------------")
- for x, l in graphdata:
- print(">>>", x, l)
- x_path: PurePath = file_uri_to_path(x)
- print(bopen_read_close(x_path))
-
- print("----------------- Request -------------------")
- print(">>>", query)
- print(bopen_read_close(query_path))
-
- if res:
- if resdata:
- print("----------------- RES DATA --------------------")
- print(">>>", resdata)
- resdata_path: PurePath = file_uri_to_path(resdata)
- print(bopen_read_close(resdata_path))
- if resgraphdata:
- print("----------------- RES GRAPHDATA -------------------")
- for x, l in resgraphdata:
- print(">>>", x, l)
- x_path = file_uri_to_path(x)
- print(bopen_read_close(x_path))
-
- print("------------- MY RESULT ----------")
- print(g.serialize(format="trig"))
-
- try:
- pq = translateUpdate(parseUpdate(bopen_read_close(query_path)))
- print("----------------- Parsed ------------------")
- pprintAlgebra(pq)
- # print pq
- except:
- print("(parser error)")
-
- print(decodeStringEscape(str(e)))
-
- import pdb
-
- pdb.post_mortem(sys.exc_info()[2])
- raise
-
-
-def query_test(t: RDFTest):
- uri, name, comment, data, graphdata, query, resfile, syntax = t
-
- # These casts are here because the RDFTest type is not sufficently
- # expressive to capture the two different flavors of tests.
- graphdata = cast(Optional[List[Identifier]], graphdata)
- resfile = cast(Optional[Identifier], resfile)
-
- # the query-eval tests refer to graphs to load by resolvable filenames
- rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True
-
- query_path: PurePath = file_uri_to_path(query)
-
- resfile_path = file_uri_to_path(resfile) if resfile else None
-
- if uri in skiptests:
- pytest.xfail()
-
- def skip(reason="(none)"):
- print("Skipping %s from now on." % uri)
- with bopen("skiptests.list", "a") as f:
- f.write("%s\t%s\n" % (uri, reason))
-
- try:
- g = Dataset()
- if data:
- g.default_context.parse(data, format=_fmt(data))
-
- if graphdata:
- for x in graphdata:
- g.parse(x, format=_fmt(x))
-
- if not resfile:
- # no result - syntax test
-
- if syntax:
- translateQuery(
- parseQuery(bopen_read_close(query_path)), base=urljoin(query, ".")
- )
- else:
- # negative syntax test
- try:
- translateQuery(
- parseQuery(bopen_read_close(query_path)),
- base=urljoin(query, "."),
- )
-
- assert False, "Query should not have parsed!"
- except:
- pass # it's fine - the query should not parse
- return
-
- # eval test - carry out query
- res2 = g.query(bopen_read_close(query_path), base=urljoin(query, "."))
-
- if resfile.endswith("ttl"):
- resg = Graph()
- resg.parse(resfile, format="turtle", publicID=resfile)
- res = RDFResultParser().parse(resg)
- elif resfile.endswith("rdf"):
- resg = Graph()
- resg.parse(resfile, publicID=resfile)
- res = RDFResultParser().parse(resg)
- else:
- with bopen(resfile_path) as f:
- if resfile.endswith("srj"):
- res = Result.parse(f, format="json")
- elif resfile.endswith("tsv"):
- res = Result.parse(TextIOWrapper(f), format="tsv")
-
- elif resfile.endswith("csv"):
- res = Result.parse(f, format="csv")
-
- # CSV is lossy, round-trip our own resultset to
- # lose the same info :)
-
- # write bytes, read strings...
- s = BytesIO()
- res2.serialize(s, format="csv")
- s.seek(0)
- res2 = Result.parse(s, format="csv")
- s.close()
-
- else:
- res = Result.parse(f, format="xml")
-
- if not DETAILEDASSERT:
- eq(res.type, res2.type, "Types do not match")
- if res.type == "SELECT":
- assert res2.vars is not None
- eq(set(res.vars), set(res2.vars), "Vars do not match")
- comp = bindingsCompatible(set(res), set(res2))
- assert comp, "Bindings do not match"
- elif res.type == "ASK":
- eq(res.askAnswer, res2.askAnswer, "Ask answer does not match")
- elif res.type in ("DESCRIBE", "CONSTRUCT"):
- assert isomorphic(res.graph, res2.graph), "graphs are not isomorphic!"
- else:
- raise Exception("Unknown result type: %s" % res.type)
- else:
- eq(
- res.type,
- res2.type,
- "Types do not match: %r != %r" % (res.type, res2.type),
- )
- if res.type == "SELECT":
- assert res2.vars is not None
- eq(
- set(res.vars),
- set(res2.vars),
- "Vars do not match: %r != %r" % (set(res.vars), set(res2.vars)),
- )
- assert bindingsCompatible(
- set(res), set(res2)
- ), "Bindings do not match: \nexpected:\n%r\n!=\ngot:\n%r" % (
- res.serialize(format="txt", namespace_manager=g.namespace_manager),
- res2.serialize(format="txt", namespace_manager=g.namespace_manager),
- )
- elif res.type == "ASK":
- eq(
- res.askAnswer,
- res2.askAnswer,
- "Ask answer does not match: %r != %r"
- % (res.askAnswer, res2.askAnswer),
- )
- elif res.type in ("DESCRIBE", "CONSTRUCT"):
- assert isomorphic(res.graph, res2.graph), "graphs are not isomorphic!"
- else:
- raise Exception("Unknown result type: %s" % res.type)
-
- except Exception as e:
-
- if isinstance(e, AssertionError):
- failed_tests.append(uri)
- fails[str(e)] += 1
- else:
- error_tests.append(uri)
- errors[str(e)] += 1
-
- if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL:
- print("======================================")
- print(uri)
- print(name)
- print(comment)
-
- if not resfile:
- if syntax:
- print("Positive syntax test")
- else:
- print("Negative syntax test")
-
- if data:
- print("----------------- DATA --------------------")
- print(">>>", data)
- data_path: PurePath = file_uri_to_path(data)
- print(bopen_read_close(data_path))
- if graphdata:
- print("----------------- GRAPHDATA --------------------")
- for x in graphdata:
- print(">>>", x)
- x_path: PurePath = file_uri_to_path(x)
- print(bopen_read_close(x_path))
-
- print("----------------- Query -------------------")
- print(">>>", query)
- print(bopen_read_close(query_path))
- if resfile:
- print("----------------- Res -------------------")
- print(">>>", resfile)
- print(bopen_read_close(resfile_path))
-
- try:
- pq = parseQuery(bopen_read_close(query_path))
- print("----------------- Parsed ------------------")
- pprintAlgebra(translateQuery(pq, base=urljoin(query, ".")))
- except:
- print("(parser error)")
-
- print(decodeStringEscape(str(e)))
-
- import pdb
-
- pdb.post_mortem(sys.exc_info()[2])
- raise
-
-
-testers: Dict[Node, Callable[[RDFTest], None]] = {
- UT.UpdateEvaluationTest: update_test,
- MF.UpdateEvaluationTest: update_test,
- MF.PositiveUpdateSyntaxTest11: update_test,
- MF.NegativeUpdateSyntaxTest11: update_test,
- MF.QueryEvaluationTest: query_test,
- MF.NegativeSyntaxTest11: query_test,
- MF.PositiveSyntaxTest11: query_test,
- MF.CSVResultFormatTest: query_test,
-}
-
-
-@pytest.fixture(scope="module", autouse=True)
-def handle_flags():
- setFlags()
- yield
- resetFlags()
-
-
-@pytest.mark.parametrize(
- "rdf_test_uri, type, rdf_test",
- read_manifest("test/data/suites/w3c/dawg-data-r2/manifest-evaluation.ttl"),
-)
-def test_dawg_data_sparql10(rdf_test_uri: URIRef, type: Node, rdf_test: RDFTest):
- testers[type](rdf_test)
-
-
-@pytest.mark.parametrize(
- "rdf_test_uri, type, rdf_test",
- read_manifest("test/data/suites/w3c/sparql11/manifest-all.ttl"),
-)
-def test_dawg_data_sparql11(rdf_test_uri: URIRef, type: Node, rdf_test: RDFTest):
- testers[type](rdf_test)
-
-
-EXPECTED_FAILURES: Dict[str, str] = {}
-
-for test in [
- "test-codepoint-escape-02",
- "test-codepoint-escape-03",
- "test-codepoint-escape-04",
-]:
- EXPECTED_FAILURES[test] = "known codepoint escape issue"
-
-
-@pytest.mark.parametrize(
- "rdf_test_uri, type, rdf_test",
- read_manifest("test/data/suites/rdflib/sparql/manifest.ttl"),
-)
-def test_dawg_rdflib(rdf_test_uri: URIRef, type: Node, rdf_test: RDFTest):
- suffix = rdf_test_uri.split("#")[1]
- if suffix in EXPECTED_FAILURES:
- pytest.xfail(EXPECTED_FAILURES[suffix])
- testers[type](rdf_test)