diff options
author | Nicholas Car <nick@kurrawong.net> | 2020-03-23 12:56:21 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-23 12:56:21 +1000 |
commit | c3857e6539ebd2681b9ced7eadd2dc818fc24ac2 (patch) | |
tree | dadfa6be8c8fd01cb97730d9e39743eba47880b4 | |
parent | b53dec4bf68df6f0f75d0dfe116fa21fb593b252 (diff) | |
parent | 31fb70710bcfe87a9c3284b24c9633b54c839148 (diff) | |
download | rdflib-c3857e6539ebd2681b9ced7eadd2dc818fc24ac2.tar.gz |
Merge branch 'master' into pr_451_redux
29 files changed, 501 insertions, 58 deletions
diff --git a/rdflib/extras/infixowl.py b/rdflib/extras/infixowl.py index f3ce40d3..fb033198 100644 --- a/rdflib/extras/infixowl.py +++ b/rdflib/extras/infixowl.py @@ -318,7 +318,7 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False): for val in store.objects(subject=thing, predicate=OWL_NS.hasValue): return u'( %s VALUE %s )' % ( propString, - manchesterSyntax(val.encode('utf-8', 'ignore'), store)) + manchesterSyntax(val, store)) for someClass in store.objects( subject=thing, predicate=OWL_NS.someValuesFrom): return u'( %s SOME %s )' % ( @@ -329,7 +329,7 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False): for s, p, o in store.triples_choices( (thing, list(cardLookup.keys()), None)): return u'( %s %s %s )' % ( - propString, cardLookup[p], o.encode('utf-8', 'ignore')) + propString, cardLookup[p], o) compl = list(store.objects(subject=thing, predicate=OWL_NS.complementOf)) if compl: return '( NOT %s )' % (manchesterSyntax(compl[0], store)) @@ -357,9 +357,9 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False): return '[]' # +thing._id.encode('utf-8')+'</em>' label = first(Class(thing, graph=store).label) if label: - return label.encode('utf-8', 'ignore') + return label else: - return qname.encode('utf-8', 'ignore') + return qname def GetIdentifiedClasses(graph): diff --git a/rdflib/namespace.py b/rdflib/namespace.py index 4ca1b09a..bea99905 100644 --- a/rdflib/namespace.py +++ b/rdflib/namespace.py @@ -353,7 +353,12 @@ class NamespaceManager(object): def __init__(self, graph): self.graph = graph self.__cache = {} + self.__cache_strict = {} self.__log = None + self.__strie = {} + self.__trie = {} + for p, n in self.namespaces(): # self.bind is not always called + insert_trie(self.__trie, str(n)) self.bind("xml", "http://www.w3.org/XML/1998/namespace") self.bind("rdf", RDF) self.bind("rdfs", RDFS) @@ -361,6 +366,10 @@ class NamespaceManager(object): def reset(self): self.__cache = {} + self.__strie = {} + self.__trie = {} + for p, n in self.namespaces(): # repopulate the trie + insert_trie(self.__trie, str(n)) def __get_store(self): return self.graph.store @@ -373,6 +382,13 @@ class NamespaceManager(object): else: return ":".join((prefix, name)) + def qname_strict(self, uri): + prefix, namespace, name = self.compute_qname_strict(uri) + if prefix == '': + return name + else: + return ':'.join((prefix, name)) + def normalizeUri(self, rdfTerm): """ Takes an RDF Term and 'normalizes' it into a QName (using the @@ -381,6 +397,8 @@ class NamespaceManager(object): """ try: namespace, name = split_uri(rdfTerm) + if namespace not in self.__strie: + insert_strie(self.__strie, self.__trie, str(namespace)) namespace = URIRef(text_type(namespace)) except: if isinstance(rdfTerm, Variable): @@ -404,9 +422,25 @@ class NamespaceManager(object): ) if uri not in self.__cache: - namespace, name = split_uri(uri) + try: + namespace, name = split_uri(uri) + except ValueError as e: + namespace = URIRef(uri) + prefix = self.store.prefix(namespace) + if not prefix: + raise e + if namespace not in self.__strie: + insert_strie(self.__strie, self.__trie, namespace) + + if self.__strie[namespace]: + pl_namespace = get_longest_namespace(self.__strie[namespace], uri) + if pl_namespace is not None: + namespace = pl_namespace + name = uri[len(namespace):] + namespace = URIRef(namespace) - prefix = self.store.prefix(namespace) + prefix = self.store.prefix(namespace) # warning multiple prefixes problem + if prefix is None: if not generate: raise KeyError( @@ -422,6 +456,56 @@ class NamespaceManager(object): self.__cache[uri] = (prefix, namespace, name) return self.__cache[uri] + def compute_qname_strict(self, uri, generate=True): + # code repeated to avoid branching on strict every time + # if output needs to be strict (e.g. for xml) then + # only the strict output should bear the overhead + prefix, namespace, name = self.compute_qname(uri) + if is_ncname(text_type(name)): + return prefix, namespace, name + else: + if uri not in self.__cache_strict: + try: + namespace, name = split_uri(uri, NAME_START_CATEGORIES) + except ValueError as e: + message = ('This graph cannot be serialized to a strict format ' + 'because there is no valid way to shorten {}'.format(uri)) + raise ValueError(message) + # omitted for strict since NCNames cannot be empty + #namespace = URIRef(uri) + #prefix = self.store.prefix(namespace) + #if not prefix: + #raise e + + if namespace not in self.__strie: + insert_strie(self.__strie, self.__trie, namespace) + + # omitted for strict + #if self.__strie[namespace]: + #pl_namespace = get_longest_namespace(self.__strie[namespace], uri) + #if pl_namespace is not None: + #namespace = pl_namespace + #name = uri[len(namespace):] + + namespace = URIRef(namespace) + prefix = self.store.prefix(namespace) # warning multiple prefixes problem + + if prefix is None: + if not generate: + raise KeyError( + "No known prefix for {} and generate=False".format(namespace) + ) + num = 1 + while 1: + prefix = "ns%s" % num + if not self.store.namespace(prefix): + break + num += 1 + self.bind(prefix, namespace) + self.__cache_strict[uri] = (prefix, namespace, name) + + return self.__cache_strict[uri] + def bind(self, prefix, namespace, override=True, replace=False): """bind a given namespace to the prefix @@ -447,6 +531,7 @@ class NamespaceManager(object): if replace: self.store.bind(prefix, namespace) + insert_trie(self.__trie, str(namespace)) return # prefix already in use for different namespace @@ -476,6 +561,7 @@ class NamespaceManager(object): else: if override or bound_prefix.startswith("_"): # or a generated prefix self.store.bind(prefix, namespace) + insert_trie(self.__trie, str(namespace)) def namespaces(self): for prefix, namespace in self.store.namespaces(): @@ -527,6 +613,7 @@ class NamespaceManager(object): NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"] +SPLIT_START_CATEGORIES = NAME_START_CATEGORIES + ['Nd'] NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"] ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"] @@ -539,27 +626,28 @@ ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"] def is_ncname(name): - first = name[0] - if first == "_" or category(first) in NAME_START_CATEGORIES: - for i in range(1, len(name)): - c = name[i] - if not category(c) in NAME_CATEGORIES: - if c != ':' and c in ALLOWED_NAME_CHARS: - continue - return 0 - # if in compatibility area - # if decomposition(c)!='': - # return 0 - - return 1 - else: - return 0 + if name: + first = name[0] + if first == "_" or category(first) in NAME_START_CATEGORIES: + for i in range(1, len(name)): + c = name[i] + if not category(c) in NAME_CATEGORIES: + if c != ':' and c in ALLOWED_NAME_CHARS: + continue + return 0 + # if in compatibility area + # if decomposition(c)!='': + # return 0 + + return 1 + + return 0 XMLNS = "http://www.w3.org/XML/1998/namespace" -def split_uri(uri): +def split_uri(uri, split_start=SPLIT_START_CATEGORIES): if uri.startswith(XMLNS): return (XMLNS, uri.split(XMLNS)[1]) length = len(uri) @@ -569,7 +657,8 @@ def split_uri(uri): if c in ALLOWED_NAME_CHARS: continue for j in range(-1 - i, length): - if category(uri[j]) in NAME_START_CATEGORIES or uri[j] == "_": + if category(uri[j]) in split_start or uri[j] == "_": + # _ prevents early split, roundtrip not generate ns = uri[:j] if not ns: break @@ -577,3 +666,37 @@ def split_uri(uri): return (ns, ln) break raise ValueError("Can't split '{}'".format(uri)) + +def insert_trie(trie, value): # aka get_subtrie_or_insert + """ Insert a value into the trie if it is not already contained in the trie. + Return the subtree for the value regardless of whether it is a new value + or not. """ + if value in trie: + return trie[value] + multi_check = False + for key in tuple(trie.keys()): + if len(value) > len(key) and value.startswith(key): + return insert_trie(trie[key], value) + elif key.startswith(value): # we know the value is not in the trie + if not multi_check: + trie[value] = {} + multi_check = True # there can be multiple longer existing prefixes + dict_ = trie.pop(key) # does not break strie since key<->dict_ remains unchanged + trie[value][key] = dict_ + if value not in trie: + trie[value] = {} + return trie[value] + +def insert_strie(strie, trie, value): + if value not in strie: + strie[value] = insert_trie(trie, value) + +def get_longest_namespace(trie, value): + for key in trie: + if value.startswith(key): + out = get_longest_namespace(trie[key], value) + if out is None: + return key + else: + return out + return None diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 99b966b1..67dbe9d7 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -28,7 +28,7 @@ from six import unichr __all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser'] -uriref = r'<([^:]+:[^\s"<>]+)>' +uriref = r'<([^:]+:[^\s"<>]*)>' literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"' litinfo = r'(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)|\^\^' + uriref + r')?' diff --git a/rdflib/plugins/serializers/rdfxml.py b/rdflib/plugins/serializers/rdfxml.py index d5ca78b9..631c8fe0 100644 --- a/rdflib/plugins/serializers/rdfxml.py +++ b/rdflib/plugins/serializers/rdfxml.py @@ -32,7 +32,7 @@ class XMLSerializer(Serializer): bindings = {} for predicate in set(store.predicates()): - prefix, namespace, name = nm.compute_qname(predicate) + prefix, namespace, name = nm.compute_qname_strict(predicate) bindings[prefix] = URIRef(namespace) RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") @@ -116,7 +116,7 @@ class XMLSerializer(Serializer): def predicate(self, predicate, object, depth=1): write = self.write indent = " " * depth - qname = self.store.namespace_manager.qname(predicate) + qname = self.store.namespace_manager.qname_strict(predicate) if isinstance(object, Literal): attributes = "" @@ -175,7 +175,7 @@ class PrettyXMLSerializer(Serializer): store.objects(None, RDF.type)) for predicate in possible: - prefix, namespace, local = nm.compute_qname(predicate) + prefix, namespace, local = nm.compute_qname_strict(predicate) namespaces[prefix] = namespace namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" diff --git a/rdflib/plugins/serializers/turtle.py b/rdflib/plugins/serializers/turtle.py index 3a1e5a61..1c58ba1b 100644 --- a/rdflib/plugins/serializers/turtle.py +++ b/rdflib/plugins/serializers/turtle.py @@ -43,6 +43,7 @@ class RecursiveSerializer(Serializer): predicateOrder = [RDF.type, RDFS.label] maxDepth = 10 indentString = u" " + roundtrip_prefixes = tuple() def __init__(self, store): @@ -110,6 +111,15 @@ class RecursiveSerializer(Serializer): self._subjects = {} self._topLevels = {} + if self.roundtrip_prefixes: + if hasattr(self.roundtrip_prefixes, '__iter__'): + for prefix, ns in self.store.namespaces(): + if prefix in self.roundtrip_prefixes: + self.addNamespace(prefix, ns) + else: + for prefix, ns in self.store.namespaces(): + self.addNamespace(prefix, ns) + def buildPredicateHash(self, subject): """ Build a hash key by predicate to a list of objects for the given diff --git a/rdflib/plugins/serializers/xmlwriter.py b/rdflib/plugins/serializers/xmlwriter.py index 1b67b8a7..de720e8c 100644 --- a/rdflib/plugins/serializers/xmlwriter.py +++ b/rdflib/plugins/serializers/xmlwriter.py @@ -107,4 +107,4 @@ class XMLWriter(object): else: return uri[len(ns):] - return self.nm.qname(uri) + return self.nm.qname_strict(uri) diff --git a/rdflib/query.py b/rdflib/query.py index 25d82b2e..d04440fb 100644 --- a/rdflib/query.py +++ b/rdflib/query.py @@ -156,7 +156,7 @@ class Result(object): There is a bit of magic here that makes this appear like different Python objects, depending on the type of result. - If the type is "SELECT", iterating will yield lists of QueryRow objects + If the type is "SELECT", iterating will yield lists of ResultRow objects If the type is "ASK", iterating will yield a single bool (or bool(result) will return the same bool) @@ -200,7 +200,15 @@ class Result(object): @staticmethod def parse(source=None, format=None, content_type=None, **kwargs): from rdflib import plugin - parser = plugin.get(format or content_type or 'xml', ResultParser)() + + if format: + plugin_key = format + elif content_type: + plugin_key = content_type.split(";", 1)[0] + else: + plugin_key = 'xml' + + parser = plugin.get(plugin_key, ResultParser)() return parser.parse(source, content_type=content_type, **kwargs) diff --git a/rdflib/term.py b/rdflib/term.py index d4f784ef..acdb0b43 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -51,8 +51,10 @@ import xml.dom.minidom from datetime import date, time, datetime, timedelta from re import sub, compile from collections import defaultdict +from unicodedata import category from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat +from binascii import hexlify, unhexlify import rdflib from six import PY2 @@ -74,10 +76,7 @@ _invalid_uri_chars = '<>" {}|\\^`' def _is_valid_uri(uri): - for c in _invalid_uri_chars: - if c in uri: - return False - return True + return all(map(lambda c: ord(c) > 256 or not c in _invalid_uri_chars, uri)) _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$') @@ -561,20 +560,20 @@ class Literal(Identifier): datatype = lexical_or_value.datatype value = lexical_or_value.value - elif isinstance(lexical_or_value, string_types): + elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)): # passed a string # try parsing lexical form of datatyped literal value = _castLexicalToPython(lexical_or_value, datatype) if value is not None and normalize: - _value, _datatype = _castPythonToLiteral(value) + _value, _datatype = _castPythonToLiteral(value, datatype) if _value is not None and _is_valid_unicode(_value): lexical_or_value = _value else: # passed some python object value = lexical_or_value - _value, _datatype = _castPythonToLiteral(lexical_or_value) + _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype) datatype = datatype or _datatype if _value is not None: @@ -1378,6 +1377,12 @@ def _writeXML(xmlnode): return s +def _unhexlify(value): + # In Python 3.2, unhexlify does not support str (only bytes) + if PY3 and isinstance(value, str): + value = value.encode() + return unhexlify(value) + # Cannot import Namespace/XSD because of circular dependencies _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' @@ -1401,6 +1406,7 @@ _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration') _XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration') _OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational') +_XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary') # TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth _NUMERIC_LITERAL_TYPES = ( @@ -1468,19 +1474,19 @@ _STRING_LITERAL_TYPES = ( def _castPythonToLiteral(obj): """ - Casts a python datatype to a tuple of the lexical value and a + Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a datatype URI (or None) """ - for pType, (castFunc, dType) in _PythonToXSD: + for (pType, dType), castFunc in _SpecificPythonToXSDRules: + if isinstance(obj, pType) and dType == datatype: + return _py2literal(obj, pType, castFunc, dType) + + for pType, (castFunc, dType) in _GenericPythonToXSDRules: if isinstance(obj, pType): - if castFunc: - return castFunc(obj), dType - elif dType: - return obj, dType - else: - return obj, None + return _py2literal(obj, pType, castFunc, dType) return obj, None # TODO: is this right for the fall through case? + from decimal import Decimal # Mappings from Python types to XSD datatypes and back (borrowed from sparta) @@ -1494,7 +1500,7 @@ from decimal import Decimal # python longs have no limit # both map to the abstract integer type, # rather than some concrete bit-limited datatype -_PythonToXSD = [ +_GenericPythonToXSDRules = [ (string_types, (None, None)), (float, (None, _XSD_DOUBLE)), (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)), @@ -1515,6 +1521,12 @@ _PythonToXSD = [ (Fraction, (None, _OWL_RATIONAL)) ] +_SpecificPythonToXSDRules = [ + ((string_types, _XSD_HEXBINARY), hexlify), +] +if PY3: + _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify)) + XSDToPython = { None: None, # plain literals map directly to value space URIRef(_XSD_PFX + 'time'): parse_time, @@ -1525,6 +1537,7 @@ XSDToPython = { URIRef(_XSD_PFX + 'duration'): parse_duration, URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration, URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration, + URIRef(_XSD_PFX + 'hexBinary'): _unhexlify, URIRef(_XSD_PFX + 'string'): None, URIRef(_XSD_PFX + 'normalizedString'): None, URIRef(_XSD_PFX + 'token'): None, @@ -1580,7 +1593,7 @@ def _castLexicalToPython(lexical, datatype): return None -def bind(datatype, pythontype, constructor=None, lexicalizer=None): +def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False): """ register a new datatype<->pythontype binding @@ -1588,10 +1601,16 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None): into a Python instances, if not given the pythontype is used directly - :param lexicalizer: an optinoal function for converting python objects to + :param lexicalizer: an optional function for converting python objects to lexical form, if not given object.__str__ is used + :param datatype_specific: makes the lexicalizer function be accessible + from the pair (pythontype, datatype) if set to True + or from the pythontype otherwise. False by default """ + if datatype_specific and datatype is None: + raise Exception("No datatype given for a datatype-specific binding") + if datatype in _toPythonMapping: logger.warning("datatype '%s' was already bound. Rebinding." % datatype) @@ -1599,7 +1618,10 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None): if constructor is None: constructor = pythontype _toPythonMapping[datatype] = constructor - _PythonToXSD.append((pythontype, (lexicalizer, datatype))) + if datatype_specific: + _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer)) + else: + _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype))) class Variable(Identifier): diff --git a/rdflib/util.py b/rdflib/util.py index f0c6207d..1789aa70 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -156,7 +156,9 @@ def from_n3(s, default=None, backend=None, nsm=None): if not s: return default if s.startswith('<'): - return URIRef(s[1:-1]) + # Hack: this should correctly handle strings with either native unicode + # characters, or \u1234 unicode escapes. + return URIRef(s[1:-1].encode("raw-unicode-escape").decode("unicode-escape")) elif s.startswith('"'): if s.startswith('"""'): quotes = '"""' @@ -8,7 +8,7 @@ kwargs = {} kwargs['install_requires'] = [ 'six', 'isodate', 'pyparsing'] kwargs['tests_require'] = ['html5lib', 'networkx'] kwargs['test_suite'] = "nose.collector" -kwargs['extras_require'] = {'html': ['html5lib']} +kwargs['extras_require'] = {'html': ['html5lib'], 'sparql': ['requests']} def find_version(filename): _version_re = re.compile(r'__version__ = "(.*)"') diff --git a/test/n3/n3-writer-test-30.n3 b/test/n3/n3-writer-test-30.n3 new file mode 100644 index 00000000..88cf2210 --- /dev/null +++ b/test/n3/n3-writer-test-30.n3 @@ -0,0 +1,22 @@ +# Test full length qnames + +@prefix : <http://example.org/here#> . +@prefix full: <http://example.org/full> . +@prefix pref: <http://example.org/prefix/> . +@prefix more: <http://example.org/prefix/more> . + +# Test namespace generation + +full: :x :y . +:x full: :y . +:x :y full: . + +full: full: full: . + +# Test existing namespace + +more: :x :y . +:x more: :y . +:x :y more: . + +more: more: more: . diff --git a/test/n3/n3-writer-test-31.n3 b/test/n3/n3-writer-test-31.n3 new file mode 100644 index 00000000..1c4494f1 --- /dev/null +++ b/test/n3/n3-writer-test-31.n3 @@ -0,0 +1,15 @@ +# Test unshortenable strict qnames no predicates for xml sanity check + +@prefix : <http://example.org/here#> . +@prefix evil1: <http://example.org/1> . +@prefix evil2: <http://example.org/prefix/1#> . + +# Test namespace generation + +evil1: :x :y . +:x :y evil1: . + +# Test existing namespace + +evil2:1 :x :y . +:x :y evil2:1 . diff --git a/test/test_dawg.py b/test/test_dawg.py index c6370e89..77c4e419 100644 --- a/test/test_dawg.py +++ b/test/test_dawg.py @@ -613,7 +613,7 @@ if __name__ == '__main__': now, i, success, f_sum, e_sum, skip, 100. * success / i) ) - earl_report = 'test_reports/rdflib_sparql-%s.ttl' % now + earl_report = 'test_reports/rdflib_sparql-%s.ttl' % now.replace(":", "") report.serialize(earl_report, format='n3') report.serialize('test_reports/rdflib_sparql-latest.ttl', format='n3') diff --git a/test/test_hex_binary.py b/test/test_hex_binary.py new file mode 100644 index 00000000..419a47e2 --- /dev/null +++ b/test/test_hex_binary.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +import unittest +import binascii +from rdflib import Literal, XSD +import six + + +class HexBinaryTestCase(unittest.TestCase): + + def test_int(self): + self._test_integer(5) + self._test_integer(3452) + self._test_integer(4886) + + def _test_integer(self, i): + hex_i = format(i, "x") + # Make it has a even-length (Byte) + len_hex_i = len(hex_i) + hex_i = hex_i.zfill(len_hex_i + len_hex_i % 2) + + l = Literal(hex_i, datatype=XSD.hexBinary) + bin_i = l.toPython() + self.assertEquals(int(binascii.hexlify(bin_i), 16), i) + + if six.PY2: + self.assertEquals(unicode(l), hex_i) + else: + self.assertEquals(str(l), hex_i) + self.assertEquals(int(hex_i, 16), i) + if six.PY2: + self.assertEquals(int(unicode(l), 16), i) + else: + self.assertEquals(int(l, 16), i) + self.assertEquals(int(str(l), 16), i) + + def test_unicode(self): + str1 = u"Test utf-8 string éàë" + # u hexstring + hex_str1 = binascii.hexlify(str1.encode('utf-8')).decode() + l1 = Literal(hex_str1, datatype=XSD.hexBinary) + b_str1 = l1.toPython() + self.assertEquals(b_str1.decode('utf-8'), str1) + if six.PY2: + self.assertEquals(unicode(l1), hex_str1) + else: + self.assertEquals(str(l1), hex_str1) + + # b hexstring + hex_str1b = binascii.hexlify(str1.encode('utf-8')) + l1b = Literal(hex_str1b, datatype=XSD.hexBinary) + b_str1b = l1b.toPython() + self.assertEquals(b_str1, b_str1b) + self.assertEquals(b_str1b.decode('utf-8'), str1) + if six.PY2: + self.assertEquals(unicode(l1b), hex_str1) + else: + self.assertEquals(str(l1b), hex_str1) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_issue920.py b/test/test_issue920.py new file mode 100644 index 00000000..eb12edc4 --- /dev/null +++ b/test/test_issue920.py @@ -0,0 +1,36 @@ +""" +Issue 920 - NTriples fails to parse URIs with only a scheme + +from rdflib import Graph +g=Graph() +g.parse(data='<a:> <b:> <c:> .', format='nt') # nquads also fails + +N3, by contrast, succeeds: + +g.parse(data='<a:> <b:> <c:> .', format='n3') +""" +from rdflib import Graph +import unittest + + +class TestIssue920(unittest.TestCase): + + def test_issue_920(self): + g = Graph() + # NT tests + g.parse(data='<a:> <b:> <c:> .', format='nt') + g.parse(data='<http://a> <http://b> <http://c> .', format='nt') + g.parse(data='<https://a> <http://> <http://c> .', format='nt') + + # related parser tests + g.parse(data='<a:> <b:> <c:> .', format='turtle') + g.parse(data='<http://a> <http://b> <http://c> .', format='turtle') + g.parse(data='<https://a> <http://> <http://c> .', format='turtle') + + g.parse(data='<a:> <b:> <c:> .', format='n3') + g.parse(data='<http://a> <http://b> <http://c> .', format='n3') + g.parse(data='<https://a> <http://> <http://c> .', format='n3') + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_issue923.py b/test/test_issue923.py new file mode 100644 index 00000000..3becb6f8 --- /dev/null +++ b/test/test_issue923.py @@ -0,0 +1,35 @@ +""" +Issue 923: split charset off of Content-Type before looking up Result-parsing plugin. +""" +from io import StringIO + +from rdflib.query import Result + +RESULT_SOURCE = u"""\ +{ + "head" : { + "vars" : [ "subject", "predicate", "object", "context" ] + }, + "results" : { + "bindings" : [ { + "subject" : { + "type" : "bnode", + "value" : "service" + }, + "predicate" : { + "type" : "uri", + "value" : "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + }, + "object" : { + "type" : "uri", + "value" : "http://www.w3.org/ns/sparql-service-description#Service" + } + }] + } +} +""" + + +def test_issue_923(): + with StringIO(RESULT_SOURCE) as result_source: + Result.parse(source=result_source, content_type="application/sparql-results+json;charset=utf-8") diff --git a/test/test_literal.py b/test/test_literal.py index feb9d72d..dae2d187 100644 --- a/test/test_literal.py +++ b/test/test_literal.py @@ -2,7 +2,7 @@ import unittest import rdflib # needed for eval(repr(...)) below from rdflib.term import Literal, URIRef, _XSD_DOUBLE, bind -from six import integer_types, PY3 +from six import integer_types, PY3, string_types def uformat(s): @@ -139,6 +139,30 @@ class TestBindings(unittest.TestCase): self.assertEqual(lb.value, vb) self.assertEqual(lb.datatype, dtB) + def testSpecificBinding(self): + + def lexify(s): + return "--%s--" % s + + def unlexify(s): + return s[2:-2] + + datatype = rdflib.URIRef('urn:dt:mystring') + + #Datatype-specific rule + bind(datatype, string_types, unlexify, lexify, datatype_specific=True) + + s = "Hello" + normal_l = Literal(s) + self.assertEqual(str(normal_l), s) + self.assertEqual(normal_l.toPython(), s) + self.assertEqual(normal_l.datatype, None) + + specific_l = Literal("--%s--" % s, datatype=datatype) + self.assertEqual(str(specific_l), lexify(s)) + self.assertEqual(specific_l.toPython(), s) + self.assertEqual(specific_l.datatype, datatype) + if __name__ == "__main__": unittest.main() diff --git a/test/test_n3_suite.py b/test/test_n3_suite.py index f6efe34a..21e6bcba 100644 --- a/test/test_n3_suite.py +++ b/test/test_n3_suite.py @@ -1,5 +1,8 @@ import os import sys +import logging + +log = logging.getLogger(__name__) try: from .testutils import check_serialize_parse @@ -18,6 +21,15 @@ def _get_test_files_formats(): elif f.endswith('.n3'): yield fpath, 'n3' +def all_n3_files(): + skiptests = [ + 'test/n3/example-lots_of_graphs.n3', # only n3 can serialize QuotedGraph, no point in testing roundtrip + ] + for fpath, fmt in _get_test_files_formats(): + if fpath in skiptests: + log.debug("Skipping %s, known issue" % fpath) + else: + yield fpath, fmt def test_n3_writing(): for fpath, fmt in _get_test_files_formats(): diff --git a/test/test_namespace.py b/test/test_namespace.py index 9de7ff29..4041433e 100644 --- a/test/test_namespace.py +++ b/test/test_namespace.py @@ -22,6 +22,33 @@ class NamespacePrefixTest(unittest.TestCase): self.assertEqual(g.compute_qname(URIRef("http://blip/blop")), ("ns4", URIRef("http://blip/"), "blop")) + # should return empty qnames correctly + self.assertEqual(g.compute_qname(URIRef("http://foo/bar/")), + ("ns1", URIRef("http://foo/bar/"), "")) + + def test_reset(self): + data = ('@prefix a: <http://example.org/a> .\n' + 'a: <http://example.org/b> <http://example.org/c> .') + graph = Graph().parse(data=data, format='turtle') + for p, n in tuple(graph.namespaces()): + graph.store._IOMemory__namespace.pop(p) + graph.store._IOMemory__prefix.pop(n) + graph.namespace_manager.reset() + self.assertFalse(tuple(graph.namespaces())) + u = URIRef('http://example.org/a') + prefix, namespace, name = graph.namespace_manager.compute_qname(u, generate=True) + self.assertNotEqual(namespace, u) + + def test_reset_preserve_prefixes(self): + data = ('@prefix a: <http://example.org/a> .\n' + 'a: <http://example.org/b> <http://example.org/c> .') + graph = Graph().parse(data=data, format='turtle') + graph.namespace_manager.reset() + self.assertTrue(tuple(graph.namespaces())) + u = URIRef('http://example.org/a') + prefix, namespace, name = graph.namespace_manager.compute_qname(u, generate=True) + self.assertEqual(namespace, u) + def test_n3(self): g = Graph() g.add((URIRef("http://example.com/foo"), diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index 819c944a..9dfed952 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -5,8 +5,11 @@ import rdflib.compare try: from .test_nt_suite import all_nt_files assert all_nt_files + from .test_n3_suite import all_n3_files + assert all_n3_files except: from test.test_nt_suite import all_nt_files + from test.test_n3_suite import all_n3_files """ Test round-tripping by all serializers/parser that are registerd. @@ -25,9 +28,10 @@ tests roundtripping through rdf/xml with only the literals-02 file SKIP = [ + ('xml', 'test/n3/n3-writer-test-29.n3'), # has predicates that cannot be shortened to strict qnames ('xml', 'test/nt/qname-02.nt'), # uses a property that cannot be qname'd - # uses a property that cannot be qname'd - ('application/rdf+xml', 'test/nt/qname-02.nt'), + ('trix', 'test/n3/strquot.n3'), # contains charachters forbidden by the xml spec + ('xml', 'test/n3/strquot.n3'), # contains charachters forbidden by the xml spec ] @@ -43,6 +47,7 @@ def roundtrip(e, verbose=False): if verbose: print("S:") print(s) + print(s.decode()) g2 = rdflib.ConjunctiveGraph() g2.parse(data=s, format=testfmt) @@ -52,12 +57,12 @@ def roundtrip(e, verbose=False): print("Diff:") print("%d triples in both" % len(both)) print("G1 Only:") - for t in first: + for t in sorted(first): print(t) print("--------------------") print("G2 Only") - for t in second: + for t in sorted(second): print(t) assert rdflib.compare.isomorphic(g1, g2) @@ -88,6 +93,24 @@ def test_cases(): yield roundtrip, (infmt, testfmt, f) +def test_n3(): + global formats + if not formats: + serializers = set( + x.name for x in rdflib.plugin.plugins( + None, rdflib.plugin.Serializer)) + parsers = set( + x.name for x in rdflib.plugin.plugins( + None, rdflib.plugin.Parser)) + formats = parsers.intersection(serializers) + + for testfmt in formats: + if "/" in testfmt: continue # skip double testing + for f, infmt in all_n3_files(): + if (testfmt, f) not in SKIP: + yield roundtrip, (infmt, testfmt, f) + + if __name__ == "__main__": import nose if len(sys.argv) == 1: diff --git a/test/test_turtle_serialize.py b/test/test_turtle_serialize.py index 0b602b34..81f57847 100644 --- a/test/test_turtle_serialize.py +++ b/test/test_turtle_serialize.py @@ -70,6 +70,24 @@ def test_turtle_valid_list(): assert turtle_serializer.isValidList(o) +def test_turtle_namespace(): + graph = Graph() + graph.bind('OBO', 'http://purl.obolibrary.org/obo/') + graph.bind('GENO', 'http://purl.obolibrary.org/obo/GENO_') + graph.bind('RO', 'http://purl.obolibrary.org/obo/RO_') + graph.bind('RO_has_phenotype', + 'http://purl.obolibrary.org/obo/RO_0002200') + graph.add((URIRef('http://example.org'), + URIRef('http://purl.obolibrary.org/obo/RO_0002200'), + URIRef('http://purl.obolibrary.org/obo/GENO_0000385'))) + output = [val for val in + graph.serialize(format='turtle').decode().splitlines() + if not val.startswith('@prefix')] + output = ' '.join(output) + assert 'RO_has_phenotype:' in output + assert 'GENO:0000385' in output + + if __name__ == "__main__": import nose import sys diff --git a/test/test_util.py b/test/test_util.py index 61e5b33c..4184b659 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import unittest import time from rdflib.graph import Graph @@ -236,6 +238,8 @@ class TestUtilTermConvert(unittest.TestCase): def test_util_from_n3_expectpartialidempotencewithn3(self): for n3 in ('<http://ex.com/foo>', '"foo"@de', + u'<http://ex.com/漢字>', + u'<http://ex.com/a#あ>', # '"\\""', # exception as '\\"' --> '"' by orig parser as well '"""multi\n"line"\nstring"""@en'): self.assertEqual(util.from_n3(n3).n3(), n3, diff --git a/test/testutils.py b/test/testutils.py index 02bc4b23..20b060d3 100644 --- a/test/testutils.py +++ b/test/testutils.py @@ -96,7 +96,7 @@ def nose_tst_earl_report(generator, earl_report_name=None): print("Ran %d tests, %d skipped, %d failed. "%(tests, skip, tests-skip-success)) if earl_report_name: now = isodate.datetime_isoformat(datetime.datetime.utcnow()) - earl_report = 'test_reports/%s-%s.ttl' % (earl_report_name, now) + earl_report = 'test_reports/%s-%s.ttl' % (earl_report_name, now.replace(":", "")) report.serialize(earl_report, format='n3') report.serialize('test_reports/%s-latest.ttl'%earl_report_name, format='n3') diff --git a/test_reports/rdflib_nquads-2013-12-22T19:22:34.ttl b/test_reports/rdflib_nquads-2013-12-22T192234.ttl index 344e1861..344e1861 100644 --- a/test_reports/rdflib_nquads-2013-12-22T19:22:34.ttl +++ b/test_reports/rdflib_nquads-2013-12-22T192234.ttl diff --git a/test_reports/rdflib_nt-2013-12-22T19:12:25.ttl b/test_reports/rdflib_nt-2013-12-22T191225.ttl index 24dc06a4..24dc06a4 100644 --- a/test_reports/rdflib_nt-2013-12-22T19:12:25.ttl +++ b/test_reports/rdflib_nt-2013-12-22T191225.ttl diff --git a/test_reports/rdflib_sparql-2013-12-22T19:36:48.ttl b/test_reports/rdflib_sparql-2013-12-22T193648.ttl index 1e2b4f9b..1e2b4f9b 100644 --- a/test_reports/rdflib_sparql-2013-12-22T19:36:48.ttl +++ b/test_reports/rdflib_sparql-2013-12-22T193648.ttl diff --git a/test_reports/rdflib_trig-2013-12-22T19:31:52.ttl b/test_reports/rdflib_trig-2013-12-22T193152.ttl index 325759f1..325759f1 100644 --- a/test_reports/rdflib_trig-2013-12-22T19:31:52.ttl +++ b/test_reports/rdflib_trig-2013-12-22T193152.ttl diff --git a/test_reports/rdflib_trig-2013-12-30T15:56:57.ttl b/test_reports/rdflib_trig-2013-12-30T155657.ttl index 84bbbc06..84bbbc06 100644 --- a/test_reports/rdflib_trig-2013-12-30T15:56:57.ttl +++ b/test_reports/rdflib_trig-2013-12-30T155657.ttl diff --git a/test_reports/rdflib_turtle-2013-12-22T19:13:51.ttl b/test_reports/rdflib_turtle-2013-12-22T191351.ttl index 22bc440b..22bc440b 100644 --- a/test_reports/rdflib_turtle-2013-12-22T19:13:51.ttl +++ b/test_reports/rdflib_turtle-2013-12-22T191351.ttl |