Merge remote-tracking branch 'origin/master' into pr_451_redux

# Conflicts: # rdflib/term.py
author: Nicholas Car <nicholas.car@csiro.au> 2020-03-23 13:11:49 +1000
committer: Nicholas Car <nicholas.car@csiro.au> 2020-03-23 13:11:49 +1000
commit: 7a1dc28314996d48727ff0cd022ac2b5df60c464 (patch)
tree: e7770df246ebc4d3fd04e9738ce4e4cd55b682ec
parent: b53dec4bf68df6f0f75d0dfe116fa21fb593b252 (diff)
parent: 31fb70710bcfe87a9c3284b24c9633b54c839148 (diff)
download: rdflib-7a1dc28314996d48727ff0cd022ac2b5df60c464.tar.gz
29 files changed, 512 insertions, 59 deletions
diff --git a/rdflib/extras/infixowl.py b/rdflib/extras/infixowl.py
index f3ce40d3..fb033198 100644
--- a/rdflib/extras/infixowl.py
+++ b/rdflib/extras/infixowl.py
@@ -318,7 +318,7 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False):
         for val in store.objects(subject=thing, predicate=OWL_NS.hasValue):
             return u'( %s VALUE %s )' % (
                 propString,
-                manchesterSyntax(val.encode('utf-8', 'ignore'), store))
+                manchesterSyntax(val, store))
         for someClass in store.objects(
                 subject=thing, predicate=OWL_NS.someValuesFrom):
             return u'( %s SOME %s )' % (
@@ -329,7 +329,7 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False):
         for s, p, o in store.triples_choices(
                 (thing, list(cardLookup.keys()), None)):
             return u'( %s %s %s )' % (
-                propString, cardLookup[p], o.encode('utf-8', 'ignore'))
+                propString, cardLookup[p], o)
     compl = list(store.objects(subject=thing, predicate=OWL_NS.complementOf))
     if compl:
         return '( NOT %s )' % (manchesterSyntax(compl[0], store))
@@ -357,9 +357,9 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False):
             return '[]'  # +thing._id.encode('utf-8')+'</em>'
         label = first(Class(thing, graph=store).label)
         if label:
-            return label.encode('utf-8', 'ignore')
+            return label
         else:
-            return qname.encode('utf-8', 'ignore')
+            return qname
 
 
 def GetIdentifiedClasses(graph):
diff --git a/rdflib/namespace.py b/rdflib/namespace.py
index 4ca1b09a..bea99905 100644
--- a/rdflib/namespace.py
+++ b/rdflib/namespace.py
@@ -353,7 +353,12 @@ class NamespaceManager(object):
     def __init__(self, graph):
         self.graph = graph
         self.__cache = {}
+        self.__cache_strict = {}
         self.__log = None
+        self.__strie = {}
+        self.__trie = {}
+        for p, n in self.namespaces():  # self.bind is not always called
+            insert_trie(self.__trie, str(n))
         self.bind("xml", "http://www.w3.org/XML/1998/namespace")
         self.bind("rdf", RDF)
         self.bind("rdfs", RDFS)
@@ -361,6 +366,10 @@ class NamespaceManager(object):
 
     def reset(self):
         self.__cache = {}
+        self.__strie = {}
+        self.__trie = {}
+        for p, n in self.namespaces():  # repopulate the trie
+            insert_trie(self.__trie, str(n))
 
     def __get_store(self):
         return self.graph.store
@@ -373,6 +382,13 @@ class NamespaceManager(object):
         else:
             return ":".join((prefix, name))
 
+    def qname_strict(self, uri):
+        prefix, namespace, name = self.compute_qname_strict(uri)
+        if prefix == '':
+            return name
+        else:
+            return ':'.join((prefix, name))
+
     def normalizeUri(self, rdfTerm):
         """
         Takes an RDF Term and 'normalizes' it into a QName (using the
@@ -381,6 +397,8 @@ class NamespaceManager(object):
         """
         try:
             namespace, name = split_uri(rdfTerm)
+            if namespace not in self.__strie:
+                insert_strie(self.__strie, self.__trie, str(namespace))
             namespace = URIRef(text_type(namespace))
         except:
             if isinstance(rdfTerm, Variable):
@@ -404,9 +422,25 @@ class NamespaceManager(object):
             )
 
         if uri not in self.__cache:
-            namespace, name = split_uri(uri)
+            try:
+                namespace, name = split_uri(uri)
+            except ValueError as e:
+                namespace = URIRef(uri)
+                prefix = self.store.prefix(namespace)
+                if not prefix:
+                    raise e
+            if namespace not in self.__strie:
+                insert_strie(self.__strie, self.__trie, namespace)
+
+            if self.__strie[namespace]:
+                pl_namespace = get_longest_namespace(self.__strie[namespace], uri)
+                if pl_namespace is not None:
+                    namespace = pl_namespace
+                    name = uri[len(namespace):]
+
             namespace = URIRef(namespace)
-            prefix = self.store.prefix(namespace)
+            prefix = self.store.prefix(namespace)  # warning multiple prefixes problem
+
             if prefix is None:
                 if not generate:
                     raise KeyError(
@@ -422,6 +456,56 @@ class NamespaceManager(object):
             self.__cache[uri] = (prefix, namespace, name)
         return self.__cache[uri]
 
+    def compute_qname_strict(self, uri, generate=True):
+        # code repeated to avoid branching on strict every time
+        # if output needs to be strict (e.g. for xml) then
+        # only the strict output should bear the overhead
+        prefix, namespace, name = self.compute_qname(uri)
+        if is_ncname(text_type(name)):
+            return prefix, namespace, name
+        else:
+            if uri not in self.__cache_strict:
+                try:
+                    namespace, name = split_uri(uri, NAME_START_CATEGORIES)
+                except ValueError as e:
+                    message = ('This graph cannot be serialized to a strict format '
+                               'because there is no valid way to shorten {}'.format(uri))
+                    raise ValueError(message)
+                    # omitted for strict since NCNames cannot be empty
+                    #namespace = URIRef(uri)
+                    #prefix = self.store.prefix(namespace)
+                    #if not prefix:
+                        #raise e
+
+                if namespace not in self.__strie:
+                    insert_strie(self.__strie, self.__trie, namespace)
+
+                # omitted for strict
+                #if self.__strie[namespace]:
+                    #pl_namespace = get_longest_namespace(self.__strie[namespace], uri)
+                    #if pl_namespace is not None:
+                        #namespace = pl_namespace
+                        #name = uri[len(namespace):]
+
+                namespace = URIRef(namespace)
+                prefix = self.store.prefix(namespace)  # warning multiple prefixes problem
+
+                if prefix is None:
+                    if not generate:
+                        raise KeyError(
+                            "No known prefix for {} and generate=False".format(namespace)
+                        )
+                    num = 1
+                    while 1:
+                        prefix = "ns%s" % num
+                        if not self.store.namespace(prefix):
+                            break
+                        num += 1
+                    self.bind(prefix, namespace)
+                self.__cache_strict[uri] = (prefix, namespace, name)
+
+            return self.__cache_strict[uri]
+
     def bind(self, prefix, namespace, override=True, replace=False):
         """bind a given namespace to the prefix
 
@@ -447,6 +531,7 @@ class NamespaceManager(object):
 
             if replace:
                 self.store.bind(prefix, namespace)
+                insert_trie(self.__trie, str(namespace))
                 return
 
             # prefix already in use for different namespace
@@ -476,6 +561,7 @@ class NamespaceManager(object):
             else:
                 if override or bound_prefix.startswith("_"):  # or a generated prefix
                     self.store.bind(prefix, namespace)
+        insert_trie(self.__trie, str(namespace))
 
     def namespaces(self):
         for prefix, namespace in self.store.namespaces():
@@ -527,6 +613,7 @@ class NamespaceManager(object):
 
 
 NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"]
+SPLIT_START_CATEGORIES = NAME_START_CATEGORIES + ['Nd']
 NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"]
 ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"]
 
@@ -539,27 +626,28 @@ ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"]
 
 
 def is_ncname(name):
-    first = name[0]
-    if first == "_" or category(first) in NAME_START_CATEGORIES:
-        for i in range(1, len(name)):
-            c = name[i]
-            if not category(c) in NAME_CATEGORIES:
-                if c != ':' and c in ALLOWED_NAME_CHARS:
-                    continue
-                return 0
-            # if in compatibility area
-            # if decomposition(c)!='':
-            #    return 0
-
-        return 1
-    else:
-        return 0
+    if name:
+        first = name[0]
+        if first == "_" or category(first) in NAME_START_CATEGORIES:
+            for i in range(1, len(name)):
+                c = name[i]
+                if not category(c) in NAME_CATEGORIES:
+                    if c != ':' and c in ALLOWED_NAME_CHARS:
+                        continue
+                    return 0
+                # if in compatibility area
+                # if decomposition(c)!='':
+                #    return 0
+
+            return 1
+
+    return 0
 
 
 XMLNS = "http://www.w3.org/XML/1998/namespace"
 
 
-def split_uri(uri):
+def split_uri(uri, split_start=SPLIT_START_CATEGORIES):
     if uri.startswith(XMLNS):
         return (XMLNS, uri.split(XMLNS)[1])
     length = len(uri)
@@ -569,7 +657,8 @@ def split_uri(uri):
             if c in ALLOWED_NAME_CHARS:
                 continue
             for j in range(-1 - i, length):
-                if category(uri[j]) in NAME_START_CATEGORIES or uri[j] == "_":
+                if category(uri[j]) in split_start or uri[j] == "_":
+                    # _ prevents early split, roundtrip not generate
                     ns = uri[:j]
                     if not ns:
                         break
@@ -577,3 +666,37 @@ def split_uri(uri):
                     return (ns, ln)
             break
     raise ValueError("Can't split '{}'".format(uri))
+
+def insert_trie(trie, value):  # aka get_subtrie_or_insert
+    """ Insert a value into the trie if it is not already contained in the trie.
+        Return the subtree for the value regardless of whether it is a new value
+        or not. """
+    if value in trie:
+        return trie[value]
+    multi_check = False
+    for key in tuple(trie.keys()):
+        if len(value) > len(key) and value.startswith(key):
+            return insert_trie(trie[key], value)
+        elif key.startswith(value):  # we know the value is not in the trie
+            if not multi_check:
+                trie[value] = {}
+                multi_check = True  # there can be multiple longer existing prefixes
+            dict_ = trie.pop(key)  # does not break strie since key<->dict_ remains unchanged
+            trie[value][key] = dict_
+    if value not in trie:
+        trie[value] = {}
+    return trie[value]
+
+def insert_strie(strie, trie, value):
+    if value not in strie:
+        strie[value] = insert_trie(trie, value)
+
+def get_longest_namespace(trie, value):
+    for key in trie:
+        if value.startswith(key):
+            out = get_longest_namespace(trie[key], value)
+            if out is None:
+                return key
+            else:
+                return out
+    return None
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 99b966b1..67dbe9d7 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -28,7 +28,7 @@ from six import unichr
 
 __all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser']
 
-uriref = r'<([^:]+:[^\s"<>]+)>'
+uriref = r'<([^:]+:[^\s"<>]*)>'
 literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
 litinfo = r'(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)|\^\^' + uriref + r')?'
 
diff --git a/rdflib/plugins/serializers/rdfxml.py b/rdflib/plugins/serializers/rdfxml.py
index d5ca78b9..631c8fe0 100644
--- a/rdflib/plugins/serializers/rdfxml.py
+++ b/rdflib/plugins/serializers/rdfxml.py
@@ -32,7 +32,7 @@ class XMLSerializer(Serializer):
         bindings = {}
 
         for predicate in set(store.predicates()):
-            prefix, namespace, name = nm.compute_qname(predicate)
+            prefix, namespace, name = nm.compute_qname_strict(predicate)
             bindings[prefix] = URIRef(namespace)
 
         RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
@@ -116,7 +116,7 @@ class XMLSerializer(Serializer):
     def predicate(self, predicate, object, depth=1):
         write = self.write
         indent = "  " * depth
-        qname = self.store.namespace_manager.qname(predicate)
+        qname = self.store.namespace_manager.qname_strict(predicate)
 
         if isinstance(object, Literal):
             attributes = ""
@@ -175,7 +175,7 @@ class PrettyXMLSerializer(Serializer):
             store.objects(None, RDF.type))
 
         for predicate in possible:
-            prefix, namespace, local = nm.compute_qname(predicate)
+            prefix, namespace, local = nm.compute_qname_strict(predicate)
             namespaces[prefix] = namespace
 
         namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
diff --git a/rdflib/plugins/serializers/turtle.py b/rdflib/plugins/serializers/turtle.py
index 3a1e5a61..1c58ba1b 100644
--- a/rdflib/plugins/serializers/turtle.py
+++ b/rdflib/plugins/serializers/turtle.py
@@ -43,6 +43,7 @@ class RecursiveSerializer(Serializer):
     predicateOrder = [RDF.type, RDFS.label]
     maxDepth = 10
     indentString = u"  "
+    roundtrip_prefixes = tuple()
 
     def __init__(self, store):
 
@@ -110,6 +111,15 @@ class RecursiveSerializer(Serializer):
         self._subjects = {}
         self._topLevels = {}
 
+        if self.roundtrip_prefixes:
+            if hasattr(self.roundtrip_prefixes, '__iter__'):
+                for prefix, ns in self.store.namespaces():
+                    if prefix in self.roundtrip_prefixes:
+                        self.addNamespace(prefix, ns)
+            else:
+                for prefix, ns in self.store.namespaces():
+                    self.addNamespace(prefix, ns)
+
     def buildPredicateHash(self, subject):
         """
         Build a hash key by predicate to a list of objects for the given
diff --git a/rdflib/plugins/serializers/xmlwriter.py b/rdflib/plugins/serializers/xmlwriter.py
index 1b67b8a7..de720e8c 100644
--- a/rdflib/plugins/serializers/xmlwriter.py
+++ b/rdflib/plugins/serializers/xmlwriter.py
@@ -107,4 +107,4 @@ class XMLWriter(object):
                 else:
                     return uri[len(ns):]
 
-        return self.nm.qname(uri)
+        return self.nm.qname_strict(uri)
diff --git a/rdflib/query.py b/rdflib/query.py
index 25d82b2e..d04440fb 100644
--- a/rdflib/query.py
+++ b/rdflib/query.py
@@ -156,7 +156,7 @@ class Result(object):
     There is a bit of magic here that makes this appear like different
     Python objects, depending on the type of result.
 
-    If the type is "SELECT", iterating will yield lists of QueryRow objects
+    If the type is "SELECT", iterating will yield lists of ResultRow objects
 
     If the type is "ASK", iterating will yield a single bool (or
     bool(result) will return the same bool)
@@ -200,7 +200,15 @@ class Result(object):
     @staticmethod
     def parse(source=None, format=None, content_type=None, **kwargs):
         from rdflib import plugin
-        parser = plugin.get(format or content_type or 'xml', ResultParser)()
+
+        if format:
+            plugin_key = format
+        elif content_type:
+            plugin_key = content_type.split(";", 1)[0]
+        else:
+            plugin_key = 'xml'
+
+        parser = plugin.get(plugin_key, ResultParser)()
 
         return parser.parse(source, content_type=content_type, **kwargs)
 
diff --git a/rdflib/term.py b/rdflib/term.py
index d4f784ef..3d290258 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -51,8 +51,10 @@ import xml.dom.minidom
 from datetime import date, time, datetime, timedelta
 from re import sub, compile
 from collections import defaultdict
+from unicodedata import category
 
 from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat
+from binascii import hexlify, unhexlify
 
 import rdflib
 from six import PY2
@@ -74,10 +76,7 @@ _invalid_uri_chars = '<>" {}|\\^`'
 
 
 def _is_valid_uri(uri):
-    for c in _invalid_uri_chars:
-        if c in uri:
-            return False
-    return True
+    return all(map(lambda c: ord(c) > 256 or not c in _invalid_uri_chars, uri))
 
 
 _lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$')
@@ -561,20 +560,20 @@ class Literal(Identifier):
                 datatype = lexical_or_value.datatype
                 value = lexical_or_value.value
 
-        elif isinstance(lexical_or_value, string_types):
+        elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)):
                 # passed a string
                 # try parsing lexical form of datatyped literal
             value = _castLexicalToPython(lexical_or_value, datatype)
 
             if value is not None and normalize:
-                _value, _datatype = _castPythonToLiteral(value)
+                _value, _datatype = _castPythonToLiteral(value, datatype)
                 if _value is not None and _is_valid_unicode(_value):
                     lexical_or_value = _value
 
         else:
             # passed some python object
             value = lexical_or_value
-            _value, _datatype = _castPythonToLiteral(lexical_or_value)
+            _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype)
 
             datatype = datatype or _datatype
             if _value is not None:
@@ -1378,6 +1377,12 @@ def _writeXML(xmlnode):
     return s
 
 
+def _unhexlify(value):
+    # In Python 3.2, unhexlify does not support str (only bytes)
+    if PY3 and isinstance(value, str):
+        value = value.encode()
+    return unhexlify(value)
+
 # Cannot import Namespace/XSD because of circular dependencies
 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
 _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
@@ -1401,6 +1406,7 @@ _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration')
 _XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration')
 
 _OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational')
+_XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary')
 # TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth
 
 _NUMERIC_LITERAL_TYPES = (
@@ -1466,21 +1472,31 @@ _STRING_LITERAL_TYPES = (
     URIRef(_XSD_PFX + 'token')
 )
 
-def _castPythonToLiteral(obj):
+
+def _py2literal(obj, pType, castFunc, dType):
+    if castFunc:
+        return castFunc(obj), dType
+    elif dType:
+        return obj, dType
+    else:
+        return obj, None
+
+
+def _castPythonToLiteral(obj, datatype):
     """
-    Casts a python datatype to a tuple of the lexical value and a
+    Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a
     datatype URI (or None)
     """
-    for pType, (castFunc, dType) in _PythonToXSD:
+    for (pType, dType), castFunc in _SpecificPythonToXSDRules:
+        if isinstance(obj, pType) and dType == datatype:
+            return _py2literal(obj, pType, castFunc, dType)
+
+    for pType, (castFunc, dType) in _GenericPythonToXSDRules:
         if isinstance(obj, pType):
-            if castFunc:
-                return castFunc(obj), dType
-            elif dType:
-                return obj, dType
-            else:
-                return obj, None
+            return _py2literal(obj, pType, castFunc, dType)
     return obj, None  # TODO: is this right for the fall through case?
 
+
 from decimal import Decimal
 
 # Mappings from Python types to XSD datatypes and back (borrowed from sparta)
@@ -1494,7 +1510,7 @@ from decimal import Decimal
 # python longs have no limit
 # both map to the abstract integer type,
 # rather than some concrete bit-limited datatype
-_PythonToXSD = [
+_GenericPythonToXSDRules = [
     (string_types, (None, None)),
     (float, (None, _XSD_DOUBLE)),
     (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)),
@@ -1515,6 +1531,12 @@ _PythonToXSD = [
     (Fraction, (None, _OWL_RATIONAL))
 ]
 
+_SpecificPythonToXSDRules = [
+    ((string_types, _XSD_HEXBINARY), hexlify),
+]
+if PY3:
+    _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify))
+
 XSDToPython = {
     None: None,  # plain literals map directly to value space
     URIRef(_XSD_PFX + 'time'): parse_time,
@@ -1525,6 +1547,7 @@ XSDToPython = {
     URIRef(_XSD_PFX + 'duration'): parse_duration,
     URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration,
     URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration,
+    URIRef(_XSD_PFX + 'hexBinary'): _unhexlify,
     URIRef(_XSD_PFX + 'string'): None,
     URIRef(_XSD_PFX + 'normalizedString'): None,
     URIRef(_XSD_PFX + 'token'): None,
@@ -1580,7 +1603,7 @@ def _castLexicalToPython(lexical, datatype):
         return None
 
 
-def bind(datatype, pythontype, constructor=None, lexicalizer=None):
+def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False):
     """
     register a new datatype<->pythontype binding
 
@@ -1588,10 +1611,16 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
                         into a Python instances, if not given the pythontype
                         is used directly
 
-    :param lexicalizer: an optinoal function for converting python objects to
+    :param lexicalizer: an optional function for converting python objects to
                         lexical form, if not given object.__str__ is used
 
+    :param datatype_specific: makes the lexicalizer function be accessible
+                              from the pair (pythontype, datatype) if set to True
+                              or from the pythontype otherwise.  False by default
     """
+    if datatype_specific and datatype is None:
+        raise Exception("No datatype given for a datatype-specific binding")
+
     if datatype in _toPythonMapping:
         logger.warning("datatype '%s' was already bound. Rebinding." %
                        datatype)
@@ -1599,7 +1628,10 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
     if constructor is None:
         constructor = pythontype
     _toPythonMapping[datatype] = constructor
-    _PythonToXSD.append((pythontype, (lexicalizer, datatype)))
+    if datatype_specific:
+        _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer))
+    else:
+        _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype)))
 
 
 class Variable(Identifier):
diff --git a/rdflib/util.py b/rdflib/util.py
index f0c6207d..1789aa70 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -156,7 +156,9 @@ def from_n3(s, default=None, backend=None, nsm=None):
     if not s:
         return default
     if s.startswith('<'):
-        return URIRef(s[1:-1])
+        # Hack: this should correctly handle strings with either native unicode
+        # characters, or \u1234 unicode escapes.
+        return URIRef(s[1:-1].encode("raw-unicode-escape").decode("unicode-escape"))
     elif s.startswith('"'):
         if s.startswith('"""'):
             quotes = '"""'
diff --git a/setup.py b/setup.py
index 9859360e..0203f299 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ kwargs = {}
 kwargs['install_requires'] = [ 'six', 'isodate', 'pyparsing']
 kwargs['tests_require'] = ['html5lib', 'networkx']
 kwargs['test_suite'] = "nose.collector"
-kwargs['extras_require'] = {'html': ['html5lib']}
+kwargs['extras_require'] = {'html': ['html5lib'], 'sparql': ['requests']}
 
 def find_version(filename):
     _version_re = re.compile(r'__version__ = "(.*)"')
diff --git a/test/n3/n3-writer-test-30.n3 b/test/n3/n3-writer-test-30.n3
new file mode 100644
index 00000000..88cf2210
--- /dev/null
+++ b/test/n3/n3-writer-test-30.n3
@@ -0,0 +1,22 @@
+# Test full length qnames
+
+@prefix :       <http://example.org/here#> .
+@prefix full:   <http://example.org/full> .
+@prefix pref:     <http://example.org/prefix/> .
+@prefix more:   <http://example.org/prefix/more> .
+
+# Test namespace generation
+
+full: :x :y .
+:x full: :y .
+:x :y full: .
+
+full: full: full: . 
+
+# Test existing namespace
+
+more: :x :y .
+:x more: :y .
+:x :y more: .
+
+more: more: more: .
diff --git a/test/n3/n3-writer-test-31.n3 b/test/n3/n3-writer-test-31.n3
new file mode 100644
index 00000000..1c4494f1
--- /dev/null
+++ b/test/n3/n3-writer-test-31.n3
@@ -0,0 +1,15 @@
+# Test unshortenable strict qnames no predicates for xml sanity check
+
+@prefix :       <http://example.org/here#> .
+@prefix evil1:   <http://example.org/1> .
+@prefix evil2:   <http://example.org/prefix/1#> .
+
+# Test namespace generation
+
+evil1: :x :y .
+:x :y evil1: .
+
+# Test existing namespace
+
+evil2:1 :x :y .
+:x :y evil2:1 .
diff --git a/test/test_dawg.py b/test/test_dawg.py
index c6370e89..77c4e419 100644
--- a/test/test_dawg.py
+++ b/test/test_dawg.py
@@ -613,7 +613,7 @@ if __name__ == '__main__':
                     now, i, success, f_sum, e_sum, skip, 100. * success / i)
             )
 
-        earl_report = 'test_reports/rdflib_sparql-%s.ttl' % now
+        earl_report = 'test_reports/rdflib_sparql-%s.ttl' % now.replace(":", "")
 
         report.serialize(earl_report, format='n3')
         report.serialize('test_reports/rdflib_sparql-latest.ttl', format='n3')
diff --git a/test/test_hex_binary.py b/test/test_hex_binary.py
new file mode 100644
index 00000000..419a47e2
--- /dev/null
+++ b/test/test_hex_binary.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import binascii
+from rdflib import Literal, XSD
+import six
+
+
+class HexBinaryTestCase(unittest.TestCase):
+
+    def test_int(self):
+        self._test_integer(5)
+        self._test_integer(3452)
+        self._test_integer(4886)
+
+    def _test_integer(self, i):
+        hex_i = format(i, "x")
+        # Make it has a even-length (Byte)
+        len_hex_i = len(hex_i)
+        hex_i = hex_i.zfill(len_hex_i + len_hex_i % 2)
+
+        l = Literal(hex_i, datatype=XSD.hexBinary)
+        bin_i = l.toPython()
+        self.assertEquals(int(binascii.hexlify(bin_i), 16), i)
+
+        if six.PY2:
+            self.assertEquals(unicode(l), hex_i)
+        else:
+            self.assertEquals(str(l), hex_i)
+        self.assertEquals(int(hex_i, 16), i)
+        if six.PY2:
+            self.assertEquals(int(unicode(l), 16), i)
+        else:
+            self.assertEquals(int(l, 16), i)
+        self.assertEquals(int(str(l), 16), i)
+
+    def test_unicode(self):
+        str1 = u"Test utf-8 string éàë"
+        # u hexstring
+        hex_str1 = binascii.hexlify(str1.encode('utf-8')).decode()
+        l1 = Literal(hex_str1, datatype=XSD.hexBinary)
+        b_str1 = l1.toPython()
+        self.assertEquals(b_str1.decode('utf-8'), str1)
+        if six.PY2:
+            self.assertEquals(unicode(l1), hex_str1)
+        else:
+            self.assertEquals(str(l1), hex_str1)
+
+        # b hexstring
+        hex_str1b = binascii.hexlify(str1.encode('utf-8'))
+        l1b = Literal(hex_str1b, datatype=XSD.hexBinary)
+        b_str1b = l1b.toPython()
+        self.assertEquals(b_str1, b_str1b)
+        self.assertEquals(b_str1b.decode('utf-8'), str1)
+        if six.PY2:
+            self.assertEquals(unicode(l1b), hex_str1)
+        else:
+            self.assertEquals(str(l1b), hex_str1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_issue920.py b/test/test_issue920.py
new file mode 100644
index 00000000..eb12edc4
--- /dev/null
+++ b/test/test_issue920.py
@@ -0,0 +1,36 @@
+"""
+Issue 920 - NTriples fails to parse URIs with only a scheme
+
+from rdflib import Graph
+g=Graph()
+g.parse(data='<a:> <b:> <c:> .', format='nt') # nquads also fails
+
+N3, by contrast, succeeds:
+
+g.parse(data='<a:> <b:> <c:> .', format='n3')
+"""
+from rdflib import Graph
+import unittest
+
+
+class TestIssue920(unittest.TestCase):
+
+    def test_issue_920(self):
+        g = Graph()
+        # NT tests
+        g.parse(data='<a:> <b:> <c:> .', format='nt')
+        g.parse(data='<http://a> <http://b> <http://c> .', format='nt')
+        g.parse(data='<https://a> <http://> <http://c> .', format='nt')
+
+        # related parser tests
+        g.parse(data='<a:> <b:> <c:> .', format='turtle')
+        g.parse(data='<http://a> <http://b> <http://c> .', format='turtle')
+        g.parse(data='<https://a> <http://> <http://c> .', format='turtle')
+
+        g.parse(data='<a:> <b:> <c:> .', format='n3')
+        g.parse(data='<http://a> <http://b> <http://c> .', format='n3')
+        g.parse(data='<https://a> <http://> <http://c> .', format='n3')
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/test_issue923.py b/test/test_issue923.py
new file mode 100644
index 00000000..3becb6f8
--- /dev/null
+++ b/test/test_issue923.py
@@ -0,0 +1,35 @@
+"""
+Issue 923: split charset off of Content-Type before looking up Result-parsing plugin.
+"""
+from io import StringIO
+
+from rdflib.query import Result
+
+RESULT_SOURCE = u"""\
+{
+  "head" : {
+    "vars" : [ "subject", "predicate", "object", "context" ]
+  },
+  "results" : {
+    "bindings" : [ {
+      "subject" : {
+        "type" : "bnode",
+        "value" : "service"
+      },
+      "predicate" : {
+        "type" : "uri",
+        "value" : "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+      },
+      "object" : {
+        "type" : "uri",
+        "value" : "http://www.w3.org/ns/sparql-service-description#Service"
+      }
+    }]
+  }
+}
+"""
+
+
+def test_issue_923():
+    with StringIO(RESULT_SOURCE) as result_source:
+        Result.parse(source=result_source, content_type="application/sparql-results+json;charset=utf-8")
diff --git a/test/test_literal.py b/test/test_literal.py
index feb9d72d..dae2d187 100644
--- a/test/test_literal.py
+++ b/test/test_literal.py
@@ -2,7 +2,7 @@ import unittest
 
 import rdflib  # needed for eval(repr(...)) below
 from rdflib.term import Literal, URIRef, _XSD_DOUBLE, bind
-from six import integer_types, PY3
+from six import integer_types, PY3, string_types
 
 
 def uformat(s):
@@ -139,6 +139,30 @@ class TestBindings(unittest.TestCase):
         self.assertEqual(lb.value, vb)
         self.assertEqual(lb.datatype, dtB)
 
+    def testSpecificBinding(self):
+
+        def lexify(s):
+            return "--%s--" % s
+
+        def unlexify(s):
+            return s[2:-2]
+
+        datatype = rdflib.URIRef('urn:dt:mystring')
+
+        #Datatype-specific rule
+        bind(datatype, string_types, unlexify, lexify, datatype_specific=True)
+
+        s = "Hello"
+        normal_l = Literal(s)
+        self.assertEqual(str(normal_l), s)
+        self.assertEqual(normal_l.toPython(), s)
+        self.assertEqual(normal_l.datatype, None)
+
+        specific_l = Literal("--%s--" % s, datatype=datatype)
+        self.assertEqual(str(specific_l), lexify(s))
+        self.assertEqual(specific_l.toPython(), s)
+        self.assertEqual(specific_l.datatype, datatype)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_n3_suite.py b/test/test_n3_suite.py
index f6efe34a..21e6bcba 100644
--- a/test/test_n3_suite.py
+++ b/test/test_n3_suite.py
@@ -1,5 +1,8 @@
 import os
 import sys
+import logging
+
+log = logging.getLogger(__name__)
 
 try:
     from .testutils import check_serialize_parse
@@ -18,6 +21,15 @@ def _get_test_files_formats():
             elif f.endswith('.n3'):
                 yield fpath, 'n3'
 
+def all_n3_files():
+    skiptests = [
+        'test/n3/example-lots_of_graphs.n3',  # only n3 can serialize QuotedGraph, no point in testing roundtrip
+    ]
+    for fpath, fmt in _get_test_files_formats():
+        if fpath in skiptests:
+            log.debug("Skipping %s, known issue" % fpath)
+        else:
+            yield fpath, fmt
 
 def test_n3_writing():
     for fpath, fmt in _get_test_files_formats():
diff --git a/test/test_namespace.py b/test/test_namespace.py
index 9de7ff29..4041433e 100644
--- a/test/test_namespace.py
+++ b/test/test_namespace.py
@@ -22,6 +22,33 @@ class NamespacePrefixTest(unittest.TestCase):
         self.assertEqual(g.compute_qname(URIRef("http://blip/blop")),
                          ("ns4", URIRef("http://blip/"), "blop"))
 
+        # should return empty qnames correctly
+        self.assertEqual(g.compute_qname(URIRef("http://foo/bar/")),
+            ("ns1", URIRef("http://foo/bar/"), ""))
+
+    def test_reset(self):
+        data = ('@prefix a: <http://example.org/a> .\n'
+                'a: <http://example.org/b> <http://example.org/c> .')
+        graph = Graph().parse(data=data, format='turtle')
+        for p, n in tuple(graph.namespaces()):
+            graph.store._IOMemory__namespace.pop(p)
+            graph.store._IOMemory__prefix.pop(n)
+        graph.namespace_manager.reset()
+        self.assertFalse(tuple(graph.namespaces()))
+        u = URIRef('http://example.org/a')
+        prefix, namespace, name = graph.namespace_manager.compute_qname(u, generate=True)
+        self.assertNotEqual(namespace, u)
+
+    def test_reset_preserve_prefixes(self):
+        data = ('@prefix a: <http://example.org/a> .\n'
+                'a: <http://example.org/b> <http://example.org/c> .')
+        graph = Graph().parse(data=data, format='turtle')
+        graph.namespace_manager.reset()
+        self.assertTrue(tuple(graph.namespaces()))
+        u = URIRef('http://example.org/a')
+        prefix, namespace, name = graph.namespace_manager.compute_qname(u, generate=True)
+        self.assertEqual(namespace, u)
+
     def test_n3(self):
         g = Graph()
         g.add((URIRef("http://example.com/foo"),
diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py
index 819c944a..9dfed952 100644
--- a/test/test_roundtrip.py
+++ b/test/test_roundtrip.py
@@ -5,8 +5,11 @@ import rdflib.compare
 try:
     from .test_nt_suite import all_nt_files
     assert all_nt_files
+    from .test_n3_suite import all_n3_files
+    assert all_n3_files
 except:
     from test.test_nt_suite import all_nt_files
+    from test.test_n3_suite import all_n3_files
 
 """
 Test round-tripping by all serializers/parser that are registerd.
@@ -25,9 +28,10 @@ tests roundtripping through rdf/xml with only the literals-02 file
 
 
 SKIP = [
+    ('xml', 'test/n3/n3-writer-test-29.n3'),  # has predicates that cannot be shortened to strict qnames
     ('xml', 'test/nt/qname-02.nt'),  # uses a property that cannot be qname'd
-    # uses a property that cannot be qname'd
-    ('application/rdf+xml', 'test/nt/qname-02.nt'),
+    ('trix', 'test/n3/strquot.n3'),  # contains charachters forbidden by the xml spec
+    ('xml', 'test/n3/strquot.n3'),  # contains charachters forbidden by the xml spec
 ]
 
 
@@ -43,6 +47,7 @@ def roundtrip(e, verbose=False):
     if verbose:
         print("S:")
         print(s)
+        print(s.decode())
 
     g2 = rdflib.ConjunctiveGraph()
     g2.parse(data=s, format=testfmt)
@@ -52,12 +57,12 @@ def roundtrip(e, verbose=False):
         print("Diff:")
         print("%d triples in both" % len(both))
         print("G1 Only:")
-        for t in first:
+        for t in sorted(first):
             print(t)
 
         print("--------------------")
         print("G2 Only")
-        for t in second:
+        for t in sorted(second):
             print(t)
 
     assert rdflib.compare.isomorphic(g1, g2)
@@ -88,6 +93,24 @@ def test_cases():
                 yield roundtrip, (infmt, testfmt, f)
 
 
+def test_n3():
+    global formats
+    if not formats:
+        serializers = set(
+            x.name for x in rdflib.plugin.plugins(
+                None, rdflib.plugin.Serializer))
+        parsers = set(
+            x.name for x in rdflib.plugin.plugins(
+                None, rdflib.plugin.Parser))
+        formats = parsers.intersection(serializers)
+
+    for testfmt in formats:
+        if "/" in testfmt: continue # skip double testing
+        for f, infmt in all_n3_files():
+            if (testfmt, f) not in SKIP:
+                yield roundtrip, (infmt, testfmt, f)
+
+
 if __name__ == "__main__":
     import nose
     if len(sys.argv) == 1:
diff --git a/test/test_turtle_serialize.py b/test/test_turtle_serialize.py
index 0b602b34..81f57847 100644
--- a/test/test_turtle_serialize.py
+++ b/test/test_turtle_serialize.py
@@ -70,6 +70,24 @@ def test_turtle_valid_list():
         assert turtle_serializer.isValidList(o)
 
 
+def test_turtle_namespace():
+   graph = Graph()
+   graph.bind('OBO', 'http://purl.obolibrary.org/obo/')
+   graph.bind('GENO', 'http://purl.obolibrary.org/obo/GENO_')
+   graph.bind('RO', 'http://purl.obolibrary.org/obo/RO_')
+   graph.bind('RO_has_phenotype',
+                   'http://purl.obolibrary.org/obo/RO_0002200')
+   graph.add((URIRef('http://example.org'),
+              URIRef('http://purl.obolibrary.org/obo/RO_0002200'),
+              URIRef('http://purl.obolibrary.org/obo/GENO_0000385')))
+   output = [val for val in
+             graph.serialize(format='turtle').decode().splitlines()
+             if not val.startswith('@prefix')]
+   output = ' '.join(output)
+   assert 'RO_has_phenotype:' in output
+   assert 'GENO:0000385' in output
+
+
 if __name__ == "__main__":
     import nose
     import sys
diff --git a/test/test_util.py b/test/test_util.py
index 61e5b33c..4184b659 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import unittest
 import time
 from rdflib.graph import Graph
@@ -236,6 +238,8 @@ class TestUtilTermConvert(unittest.TestCase):
     def test_util_from_n3_expectpartialidempotencewithn3(self):
         for n3 in ('<http://ex.com/foo>',
                    '"foo"@de',
+                   u'<http://ex.com/漢字>',
+                   u'<http://ex.com/a#あ>',
                    # '"\\""', # exception as '\\"' --> '"' by orig parser as well
                    '"""multi\n"line"\nstring"""@en'):
             self.assertEqual(util.from_n3(n3).n3(), n3,
diff --git a/test/testutils.py b/test/testutils.py
index 02bc4b23..20b060d3 100644
--- a/test/testutils.py
+++ b/test/testutils.py
@@ -96,7 +96,7 @@ def nose_tst_earl_report(generator, earl_report_name=None):
     print("Ran %d tests, %d skipped, %d failed. "%(tests, skip, tests-skip-success))
     if earl_report_name:
         now = isodate.datetime_isoformat(datetime.datetime.utcnow())
-        earl_report = 'test_reports/%s-%s.ttl' % (earl_report_name, now)
+        earl_report = 'test_reports/%s-%s.ttl' % (earl_report_name, now.replace(":", ""))
 
         report.serialize(earl_report, format='n3')
         report.serialize('test_reports/%s-latest.ttl'%earl_report_name, format='n3')
diff --git a/test_reports/rdflib_nquads-2013-12-22T19:22:34.ttl b/test_reports/rdflib_nquads-2013-12-22T192234.ttl
index 344e1861..344e1861 100644
--- a/test_reports/rdflib_nquads-2013-12-22T19:22:34.ttl
+++ b/test_reports/rdflib_nquads-2013-12-22T192234.ttl
diff --git a/test_reports/rdflib_nt-2013-12-22T19:12:25.ttl b/test_reports/rdflib_nt-2013-12-22T191225.ttl
index 24dc06a4..24dc06a4 100644
--- a/test_reports/rdflib_nt-2013-12-22T19:12:25.ttl
+++ b/test_reports/rdflib_nt-2013-12-22T191225.ttl
diff --git a/test_reports/rdflib_sparql-2013-12-22T19:36:48.ttl b/test_reports/rdflib_sparql-2013-12-22T193648.ttl
index 1e2b4f9b..1e2b4f9b 100644
--- a/test_reports/rdflib_sparql-2013-12-22T19:36:48.ttl
+++ b/test_reports/rdflib_sparql-2013-12-22T193648.ttl
diff --git a/test_reports/rdflib_trig-2013-12-22T19:31:52.ttl b/test_reports/rdflib_trig-2013-12-22T193152.ttl
index 325759f1..325759f1 100644
--- a/test_reports/rdflib_trig-2013-12-22T19:31:52.ttl
+++ b/test_reports/rdflib_trig-2013-12-22T193152.ttl
diff --git a/test_reports/rdflib_trig-2013-12-30T15:56:57.ttl b/test_reports/rdflib_trig-2013-12-30T155657.ttl
index 84bbbc06..84bbbc06 100644
--- a/test_reports/rdflib_trig-2013-12-30T15:56:57.ttl
+++ b/test_reports/rdflib_trig-2013-12-30T155657.ttl
diff --git a/test_reports/rdflib_turtle-2013-12-22T19:13:51.ttl b/test_reports/rdflib_turtle-2013-12-22T191351.ttl
index 22bc440b..22bc440b 100644
--- a/test_reports/rdflib_turtle-2013-12-22T19:13:51.ttl
+++ b/test_reports/rdflib_turtle-2013-12-22T191351.ttl
author	Nicholas Car <nicholas.car@csiro.au>	2020-03-23 13:11:49 +1000
committer	Nicholas Car <nicholas.car@csiro.au>	2020-03-23 13:11:49 +1000
commit	7a1dc28314996d48727ff0cd022ac2b5df60c464 (patch)
tree	e7770df246ebc4d3fd04e9738ce4e4cd55b682ec
parent	b53dec4bf68df6f0f75d0dfe116fa21fb593b252 (diff)
parent	31fb70710bcfe87a9c3284b24c9633b54c839148 (diff)
download	rdflib-7a1dc28314996d48727ff0cd022ac2b5df60c464.tar.gz