summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Car <nicholas.car@csiro.au>2020-03-23 13:11:49 +1000
committerNicholas Car <nicholas.car@csiro.au>2020-03-23 13:11:49 +1000
commit7a1dc28314996d48727ff0cd022ac2b5df60c464 (patch)
treee7770df246ebc4d3fd04e9738ce4e4cd55b682ec
parentb53dec4bf68df6f0f75d0dfe116fa21fb593b252 (diff)
parent31fb70710bcfe87a9c3284b24c9633b54c839148 (diff)
downloadrdflib-7a1dc28314996d48727ff0cd022ac2b5df60c464.tar.gz
Merge remote-tracking branch 'origin/master' into pr_451_redux
# Conflicts: # rdflib/term.py
-rw-r--r--rdflib/extras/infixowl.py8
-rw-r--r--rdflib/namespace.py161
-rw-r--r--rdflib/plugins/parsers/ntriples.py2
-rw-r--r--rdflib/plugins/serializers/rdfxml.py6
-rw-r--r--rdflib/plugins/serializers/turtle.py10
-rw-r--r--rdflib/plugins/serializers/xmlwriter.py2
-rw-r--r--rdflib/query.py12
-rw-r--r--rdflib/term.py72
-rw-r--r--rdflib/util.py4
-rw-r--r--setup.py2
-rw-r--r--test/n3/n3-writer-test-30.n322
-rw-r--r--test/n3/n3-writer-test-31.n315
-rw-r--r--test/test_dawg.py2
-rw-r--r--test/test_hex_binary.py62
-rw-r--r--test/test_issue920.py36
-rw-r--r--test/test_issue923.py35
-rw-r--r--test/test_literal.py26
-rw-r--r--test/test_n3_suite.py12
-rw-r--r--test/test_namespace.py27
-rw-r--r--test/test_roundtrip.py31
-rw-r--r--test/test_turtle_serialize.py18
-rw-r--r--test/test_util.py4
-rw-r--r--test/testutils.py2
-rw-r--r--test_reports/rdflib_nquads-2013-12-22T192234.ttl (renamed from test_reports/rdflib_nquads-2013-12-22T19:22:34.ttl)0
-rw-r--r--test_reports/rdflib_nt-2013-12-22T191225.ttl (renamed from test_reports/rdflib_nt-2013-12-22T19:12:25.ttl)0
-rw-r--r--test_reports/rdflib_sparql-2013-12-22T193648.ttl (renamed from test_reports/rdflib_sparql-2013-12-22T19:36:48.ttl)0
-rw-r--r--test_reports/rdflib_trig-2013-12-22T193152.ttl (renamed from test_reports/rdflib_trig-2013-12-22T19:31:52.ttl)0
-rw-r--r--test_reports/rdflib_trig-2013-12-30T155657.ttl (renamed from test_reports/rdflib_trig-2013-12-30T15:56:57.ttl)0
-rw-r--r--test_reports/rdflib_turtle-2013-12-22T191351.ttl (renamed from test_reports/rdflib_turtle-2013-12-22T19:13:51.ttl)0
29 files changed, 512 insertions, 59 deletions
diff --git a/rdflib/extras/infixowl.py b/rdflib/extras/infixowl.py
index f3ce40d3..fb033198 100644
--- a/rdflib/extras/infixowl.py
+++ b/rdflib/extras/infixowl.py
@@ -318,7 +318,7 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False):
for val in store.objects(subject=thing, predicate=OWL_NS.hasValue):
return u'( %s VALUE %s )' % (
propString,
- manchesterSyntax(val.encode('utf-8', 'ignore'), store))
+ manchesterSyntax(val, store))
for someClass in store.objects(
subject=thing, predicate=OWL_NS.someValuesFrom):
return u'( %s SOME %s )' % (
@@ -329,7 +329,7 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False):
for s, p, o in store.triples_choices(
(thing, list(cardLookup.keys()), None)):
return u'( %s %s %s )' % (
- propString, cardLookup[p], o.encode('utf-8', 'ignore'))
+ propString, cardLookup[p], o)
compl = list(store.objects(subject=thing, predicate=OWL_NS.complementOf))
if compl:
return '( NOT %s )' % (manchesterSyntax(compl[0], store))
@@ -357,9 +357,9 @@ def manchesterSyntax(thing, store, boolean=None, transientList=False):
return '[]' # +thing._id.encode('utf-8')+'</em>'
label = first(Class(thing, graph=store).label)
if label:
- return label.encode('utf-8', 'ignore')
+ return label
else:
- return qname.encode('utf-8', 'ignore')
+ return qname
def GetIdentifiedClasses(graph):
diff --git a/rdflib/namespace.py b/rdflib/namespace.py
index 4ca1b09a..bea99905 100644
--- a/rdflib/namespace.py
+++ b/rdflib/namespace.py
@@ -353,7 +353,12 @@ class NamespaceManager(object):
def __init__(self, graph):
self.graph = graph
self.__cache = {}
+ self.__cache_strict = {}
self.__log = None
+ self.__strie = {}
+ self.__trie = {}
+ for p, n in self.namespaces(): # self.bind is not always called
+ insert_trie(self.__trie, str(n))
self.bind("xml", "http://www.w3.org/XML/1998/namespace")
self.bind("rdf", RDF)
self.bind("rdfs", RDFS)
@@ -361,6 +366,10 @@ class NamespaceManager(object):
def reset(self):
self.__cache = {}
+ self.__strie = {}
+ self.__trie = {}
+ for p, n in self.namespaces(): # repopulate the trie
+ insert_trie(self.__trie, str(n))
def __get_store(self):
return self.graph.store
@@ -373,6 +382,13 @@ class NamespaceManager(object):
else:
return ":".join((prefix, name))
+ def qname_strict(self, uri):
+ prefix, namespace, name = self.compute_qname_strict(uri)
+ if prefix == '':
+ return name
+ else:
+ return ':'.join((prefix, name))
+
def normalizeUri(self, rdfTerm):
"""
Takes an RDF Term and 'normalizes' it into a QName (using the
@@ -381,6 +397,8 @@ class NamespaceManager(object):
"""
try:
namespace, name = split_uri(rdfTerm)
+ if namespace not in self.__strie:
+ insert_strie(self.__strie, self.__trie, str(namespace))
namespace = URIRef(text_type(namespace))
except:
if isinstance(rdfTerm, Variable):
@@ -404,9 +422,25 @@ class NamespaceManager(object):
)
if uri not in self.__cache:
- namespace, name = split_uri(uri)
+ try:
+ namespace, name = split_uri(uri)
+ except ValueError as e:
+ namespace = URIRef(uri)
+ prefix = self.store.prefix(namespace)
+ if not prefix:
+ raise e
+ if namespace not in self.__strie:
+ insert_strie(self.__strie, self.__trie, namespace)
+
+ if self.__strie[namespace]:
+ pl_namespace = get_longest_namespace(self.__strie[namespace], uri)
+ if pl_namespace is not None:
+ namespace = pl_namespace
+ name = uri[len(namespace):]
+
namespace = URIRef(namespace)
- prefix = self.store.prefix(namespace)
+ prefix = self.store.prefix(namespace) # warning multiple prefixes problem
+
if prefix is None:
if not generate:
raise KeyError(
@@ -422,6 +456,56 @@ class NamespaceManager(object):
self.__cache[uri] = (prefix, namespace, name)
return self.__cache[uri]
+ def compute_qname_strict(self, uri, generate=True):
+ # code repeated to avoid branching on strict every time
+ # if output needs to be strict (e.g. for xml) then
+ # only the strict output should bear the overhead
+ prefix, namespace, name = self.compute_qname(uri)
+ if is_ncname(text_type(name)):
+ return prefix, namespace, name
+ else:
+ if uri not in self.__cache_strict:
+ try:
+ namespace, name = split_uri(uri, NAME_START_CATEGORIES)
+ except ValueError as e:
+ message = ('This graph cannot be serialized to a strict format '
+ 'because there is no valid way to shorten {}'.format(uri))
+ raise ValueError(message)
+ # omitted for strict since NCNames cannot be empty
+ #namespace = URIRef(uri)
+ #prefix = self.store.prefix(namespace)
+ #if not prefix:
+ #raise e
+
+ if namespace not in self.__strie:
+ insert_strie(self.__strie, self.__trie, namespace)
+
+ # omitted for strict
+ #if self.__strie[namespace]:
+ #pl_namespace = get_longest_namespace(self.__strie[namespace], uri)
+ #if pl_namespace is not None:
+ #namespace = pl_namespace
+ #name = uri[len(namespace):]
+
+ namespace = URIRef(namespace)
+ prefix = self.store.prefix(namespace) # warning multiple prefixes problem
+
+ if prefix is None:
+ if not generate:
+ raise KeyError(
+ "No known prefix for {} and generate=False".format(namespace)
+ )
+ num = 1
+ while 1:
+ prefix = "ns%s" % num
+ if not self.store.namespace(prefix):
+ break
+ num += 1
+ self.bind(prefix, namespace)
+ self.__cache_strict[uri] = (prefix, namespace, name)
+
+ return self.__cache_strict[uri]
+
def bind(self, prefix, namespace, override=True, replace=False):
"""bind a given namespace to the prefix
@@ -447,6 +531,7 @@ class NamespaceManager(object):
if replace:
self.store.bind(prefix, namespace)
+ insert_trie(self.__trie, str(namespace))
return
# prefix already in use for different namespace
@@ -476,6 +561,7 @@ class NamespaceManager(object):
else:
if override or bound_prefix.startswith("_"): # or a generated prefix
self.store.bind(prefix, namespace)
+ insert_trie(self.__trie, str(namespace))
def namespaces(self):
for prefix, namespace in self.store.namespaces():
@@ -527,6 +613,7 @@ class NamespaceManager(object):
NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"]
+SPLIT_START_CATEGORIES = NAME_START_CATEGORIES + ['Nd']
NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"]
ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"]
@@ -539,27 +626,28 @@ ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_", u":"]
def is_ncname(name):
- first = name[0]
- if first == "_" or category(first) in NAME_START_CATEGORIES:
- for i in range(1, len(name)):
- c = name[i]
- if not category(c) in NAME_CATEGORIES:
- if c != ':' and c in ALLOWED_NAME_CHARS:
- continue
- return 0
- # if in compatibility area
- # if decomposition(c)!='':
- # return 0
-
- return 1
- else:
- return 0
+ if name:
+ first = name[0]
+ if first == "_" or category(first) in NAME_START_CATEGORIES:
+ for i in range(1, len(name)):
+ c = name[i]
+ if not category(c) in NAME_CATEGORIES:
+ if c != ':' and c in ALLOWED_NAME_CHARS:
+ continue
+ return 0
+ # if in compatibility area
+ # if decomposition(c)!='':
+ # return 0
+
+ return 1
+
+ return 0
XMLNS = "http://www.w3.org/XML/1998/namespace"
-def split_uri(uri):
+def split_uri(uri, split_start=SPLIT_START_CATEGORIES):
if uri.startswith(XMLNS):
return (XMLNS, uri.split(XMLNS)[1])
length = len(uri)
@@ -569,7 +657,8 @@ def split_uri(uri):
if c in ALLOWED_NAME_CHARS:
continue
for j in range(-1 - i, length):
- if category(uri[j]) in NAME_START_CATEGORIES or uri[j] == "_":
+ if category(uri[j]) in split_start or uri[j] == "_":
+ # _ prevents early split, roundtrip not generate
ns = uri[:j]
if not ns:
break
@@ -577,3 +666,37 @@ def split_uri(uri):
return (ns, ln)
break
raise ValueError("Can't split '{}'".format(uri))
+
+def insert_trie(trie, value): # aka get_subtrie_or_insert
+ """ Insert a value into the trie if it is not already contained in the trie.
+ Return the subtree for the value regardless of whether it is a new value
+ or not. """
+ if value in trie:
+ return trie[value]
+ multi_check = False
+ for key in tuple(trie.keys()):
+ if len(value) > len(key) and value.startswith(key):
+ return insert_trie(trie[key], value)
+ elif key.startswith(value): # we know the value is not in the trie
+ if not multi_check:
+ trie[value] = {}
+ multi_check = True # there can be multiple longer existing prefixes
+ dict_ = trie.pop(key) # does not break strie since key<->dict_ remains unchanged
+ trie[value][key] = dict_
+ if value not in trie:
+ trie[value] = {}
+ return trie[value]
+
+def insert_strie(strie, trie, value):
+ if value not in strie:
+ strie[value] = insert_trie(trie, value)
+
+def get_longest_namespace(trie, value):
+ for key in trie:
+ if value.startswith(key):
+ out = get_longest_namespace(trie[key], value)
+ if out is None:
+ return key
+ else:
+ return out
+ return None
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 99b966b1..67dbe9d7 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -28,7 +28,7 @@ from six import unichr
__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser']
-uriref = r'<([^:]+:[^\s"<>]+)>'
+uriref = r'<([^:]+:[^\s"<>]*)>'
literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"'
litinfo = r'(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)|\^\^' + uriref + r')?'
diff --git a/rdflib/plugins/serializers/rdfxml.py b/rdflib/plugins/serializers/rdfxml.py
index d5ca78b9..631c8fe0 100644
--- a/rdflib/plugins/serializers/rdfxml.py
+++ b/rdflib/plugins/serializers/rdfxml.py
@@ -32,7 +32,7 @@ class XMLSerializer(Serializer):
bindings = {}
for predicate in set(store.predicates()):
- prefix, namespace, name = nm.compute_qname(predicate)
+ prefix, namespace, name = nm.compute_qname_strict(predicate)
bindings[prefix] = URIRef(namespace)
RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
@@ -116,7 +116,7 @@ class XMLSerializer(Serializer):
def predicate(self, predicate, object, depth=1):
write = self.write
indent = " " * depth
- qname = self.store.namespace_manager.qname(predicate)
+ qname = self.store.namespace_manager.qname_strict(predicate)
if isinstance(object, Literal):
attributes = ""
@@ -175,7 +175,7 @@ class PrettyXMLSerializer(Serializer):
store.objects(None, RDF.type))
for predicate in possible:
- prefix, namespace, local = nm.compute_qname(predicate)
+ prefix, namespace, local = nm.compute_qname_strict(predicate)
namespaces[prefix] = namespace
namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
diff --git a/rdflib/plugins/serializers/turtle.py b/rdflib/plugins/serializers/turtle.py
index 3a1e5a61..1c58ba1b 100644
--- a/rdflib/plugins/serializers/turtle.py
+++ b/rdflib/plugins/serializers/turtle.py
@@ -43,6 +43,7 @@ class RecursiveSerializer(Serializer):
predicateOrder = [RDF.type, RDFS.label]
maxDepth = 10
indentString = u" "
+ roundtrip_prefixes = tuple()
def __init__(self, store):
@@ -110,6 +111,15 @@ class RecursiveSerializer(Serializer):
self._subjects = {}
self._topLevels = {}
+ if self.roundtrip_prefixes:
+ if hasattr(self.roundtrip_prefixes, '__iter__'):
+ for prefix, ns in self.store.namespaces():
+ if prefix in self.roundtrip_prefixes:
+ self.addNamespace(prefix, ns)
+ else:
+ for prefix, ns in self.store.namespaces():
+ self.addNamespace(prefix, ns)
+
def buildPredicateHash(self, subject):
"""
Build a hash key by predicate to a list of objects for the given
diff --git a/rdflib/plugins/serializers/xmlwriter.py b/rdflib/plugins/serializers/xmlwriter.py
index 1b67b8a7..de720e8c 100644
--- a/rdflib/plugins/serializers/xmlwriter.py
+++ b/rdflib/plugins/serializers/xmlwriter.py
@@ -107,4 +107,4 @@ class XMLWriter(object):
else:
return uri[len(ns):]
- return self.nm.qname(uri)
+ return self.nm.qname_strict(uri)
diff --git a/rdflib/query.py b/rdflib/query.py
index 25d82b2e..d04440fb 100644
--- a/rdflib/query.py
+++ b/rdflib/query.py
@@ -156,7 +156,7 @@ class Result(object):
There is a bit of magic here that makes this appear like different
Python objects, depending on the type of result.
- If the type is "SELECT", iterating will yield lists of QueryRow objects
+ If the type is "SELECT", iterating will yield lists of ResultRow objects
If the type is "ASK", iterating will yield a single bool (or
bool(result) will return the same bool)
@@ -200,7 +200,15 @@ class Result(object):
@staticmethod
def parse(source=None, format=None, content_type=None, **kwargs):
from rdflib import plugin
- parser = plugin.get(format or content_type or 'xml', ResultParser)()
+
+ if format:
+ plugin_key = format
+ elif content_type:
+ plugin_key = content_type.split(";", 1)[0]
+ else:
+ plugin_key = 'xml'
+
+ parser = plugin.get(plugin_key, ResultParser)()
return parser.parse(source, content_type=content_type, **kwargs)
diff --git a/rdflib/term.py b/rdflib/term.py
index d4f784ef..3d290258 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -51,8 +51,10 @@ import xml.dom.minidom
from datetime import date, time, datetime, timedelta
from re import sub, compile
from collections import defaultdict
+from unicodedata import category
from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat
+from binascii import hexlify, unhexlify
import rdflib
from six import PY2
@@ -74,10 +76,7 @@ _invalid_uri_chars = '<>" {}|\\^`'
def _is_valid_uri(uri):
- for c in _invalid_uri_chars:
- if c in uri:
- return False
- return True
+ return all(map(lambda c: ord(c) > 256 or not c in _invalid_uri_chars, uri))
_lang_tag_regex = compile('^[a-zA-Z]+(?:-[a-zA-Z0-9]+)*$')
@@ -561,20 +560,20 @@ class Literal(Identifier):
datatype = lexical_or_value.datatype
value = lexical_or_value.value
- elif isinstance(lexical_or_value, string_types):
+ elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)):
# passed a string
# try parsing lexical form of datatyped literal
value = _castLexicalToPython(lexical_or_value, datatype)
if value is not None and normalize:
- _value, _datatype = _castPythonToLiteral(value)
+ _value, _datatype = _castPythonToLiteral(value, datatype)
if _value is not None and _is_valid_unicode(_value):
lexical_or_value = _value
else:
# passed some python object
value = lexical_or_value
- _value, _datatype = _castPythonToLiteral(lexical_or_value)
+ _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype)
datatype = datatype or _datatype
if _value is not None:
@@ -1378,6 +1377,12 @@ def _writeXML(xmlnode):
return s
+def _unhexlify(value):
+ # In Python 3.2, unhexlify does not support str (only bytes)
+ if PY3 and isinstance(value, str):
+ value = value.encode()
+ return unhexlify(value)
+
# Cannot import Namespace/XSD because of circular dependencies
_XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
_RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
@@ -1401,6 +1406,7 @@ _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration')
_XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration')
_OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational')
+_XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary')
# TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth
_NUMERIC_LITERAL_TYPES = (
@@ -1466,21 +1472,31 @@ _STRING_LITERAL_TYPES = (
URIRef(_XSD_PFX + 'token')
)
-def _castPythonToLiteral(obj):
+
+def _py2literal(obj, pType, castFunc, dType):
+ if castFunc:
+ return castFunc(obj), dType
+ elif dType:
+ return obj, dType
+ else:
+ return obj, None
+
+
+def _castPythonToLiteral(obj, datatype):
"""
- Casts a python datatype to a tuple of the lexical value and a
+ Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a
datatype URI (or None)
"""
- for pType, (castFunc, dType) in _PythonToXSD:
+ for (pType, dType), castFunc in _SpecificPythonToXSDRules:
+ if isinstance(obj, pType) and dType == datatype:
+ return _py2literal(obj, pType, castFunc, dType)
+
+ for pType, (castFunc, dType) in _GenericPythonToXSDRules:
if isinstance(obj, pType):
- if castFunc:
- return castFunc(obj), dType
- elif dType:
- return obj, dType
- else:
- return obj, None
+ return _py2literal(obj, pType, castFunc, dType)
return obj, None # TODO: is this right for the fall through case?
+
from decimal import Decimal
# Mappings from Python types to XSD datatypes and back (borrowed from sparta)
@@ -1494,7 +1510,7 @@ from decimal import Decimal
# python longs have no limit
# both map to the abstract integer type,
# rather than some concrete bit-limited datatype
-_PythonToXSD = [
+_GenericPythonToXSDRules = [
(string_types, (None, None)),
(float, (None, _XSD_DOUBLE)),
(bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)),
@@ -1515,6 +1531,12 @@ _PythonToXSD = [
(Fraction, (None, _OWL_RATIONAL))
]
+_SpecificPythonToXSDRules = [
+ ((string_types, _XSD_HEXBINARY), hexlify),
+]
+if PY3:
+ _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify))
+
XSDToPython = {
None: None, # plain literals map directly to value space
URIRef(_XSD_PFX + 'time'): parse_time,
@@ -1525,6 +1547,7 @@ XSDToPython = {
URIRef(_XSD_PFX + 'duration'): parse_duration,
URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration,
URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration,
+ URIRef(_XSD_PFX + 'hexBinary'): _unhexlify,
URIRef(_XSD_PFX + 'string'): None,
URIRef(_XSD_PFX + 'normalizedString'): None,
URIRef(_XSD_PFX + 'token'): None,
@@ -1580,7 +1603,7 @@ def _castLexicalToPython(lexical, datatype):
return None
-def bind(datatype, pythontype, constructor=None, lexicalizer=None):
+def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False):
"""
register a new datatype<->pythontype binding
@@ -1588,10 +1611,16 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
into a Python instances, if not given the pythontype
is used directly
- :param lexicalizer: an optinoal function for converting python objects to
+ :param lexicalizer: an optional function for converting python objects to
lexical form, if not given object.__str__ is used
+ :param datatype_specific: makes the lexicalizer function be accessible
+ from the pair (pythontype, datatype) if set to True
+ or from the pythontype otherwise. False by default
"""
+ if datatype_specific and datatype is None:
+ raise Exception("No datatype given for a datatype-specific binding")
+
if datatype in _toPythonMapping:
logger.warning("datatype '%s' was already bound. Rebinding." %
datatype)
@@ -1599,7 +1628,10 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
if constructor is None:
constructor = pythontype
_toPythonMapping[datatype] = constructor
- _PythonToXSD.append((pythontype, (lexicalizer, datatype)))
+ if datatype_specific:
+ _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer))
+ else:
+ _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype)))
class Variable(Identifier):
diff --git a/rdflib/util.py b/rdflib/util.py
index f0c6207d..1789aa70 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -156,7 +156,9 @@ def from_n3(s, default=None, backend=None, nsm=None):
if not s:
return default
if s.startswith('<'):
- return URIRef(s[1:-1])
+ # Hack: this should correctly handle strings with either native unicode
+ # characters, or \u1234 unicode escapes.
+ return URIRef(s[1:-1].encode("raw-unicode-escape").decode("unicode-escape"))
elif s.startswith('"'):
if s.startswith('"""'):
quotes = '"""'
diff --git a/setup.py b/setup.py
index 9859360e..0203f299 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@ kwargs = {}
kwargs['install_requires'] = [ 'six', 'isodate', 'pyparsing']
kwargs['tests_require'] = ['html5lib', 'networkx']
kwargs['test_suite'] = "nose.collector"
-kwargs['extras_require'] = {'html': ['html5lib']}
+kwargs['extras_require'] = {'html': ['html5lib'], 'sparql': ['requests']}
def find_version(filename):
_version_re = re.compile(r'__version__ = "(.*)"')
diff --git a/test/n3/n3-writer-test-30.n3 b/test/n3/n3-writer-test-30.n3
new file mode 100644
index 00000000..88cf2210
--- /dev/null
+++ b/test/n3/n3-writer-test-30.n3
@@ -0,0 +1,22 @@
+# Test full length qnames
+
+@prefix : <http://example.org/here#> .
+@prefix full: <http://example.org/full> .
+@prefix pref: <http://example.org/prefix/> .
+@prefix more: <http://example.org/prefix/more> .
+
+# Test namespace generation
+
+full: :x :y .
+:x full: :y .
+:x :y full: .
+
+full: full: full: .
+
+# Test existing namespace
+
+more: :x :y .
+:x more: :y .
+:x :y more: .
+
+more: more: more: .
diff --git a/test/n3/n3-writer-test-31.n3 b/test/n3/n3-writer-test-31.n3
new file mode 100644
index 00000000..1c4494f1
--- /dev/null
+++ b/test/n3/n3-writer-test-31.n3
@@ -0,0 +1,15 @@
+# Test unshortenable strict qnames no predicates for xml sanity check
+
+@prefix : <http://example.org/here#> .
+@prefix evil1: <http://example.org/1> .
+@prefix evil2: <http://example.org/prefix/1#> .
+
+# Test namespace generation
+
+evil1: :x :y .
+:x :y evil1: .
+
+# Test existing namespace
+
+evil2:1 :x :y .
+:x :y evil2:1 .
diff --git a/test/test_dawg.py b/test/test_dawg.py
index c6370e89..77c4e419 100644
--- a/test/test_dawg.py
+++ b/test/test_dawg.py
@@ -613,7 +613,7 @@ if __name__ == '__main__':
now, i, success, f_sum, e_sum, skip, 100. * success / i)
)
- earl_report = 'test_reports/rdflib_sparql-%s.ttl' % now
+ earl_report = 'test_reports/rdflib_sparql-%s.ttl' % now.replace(":", "")
report.serialize(earl_report, format='n3')
report.serialize('test_reports/rdflib_sparql-latest.ttl', format='n3')
diff --git a/test/test_hex_binary.py b/test/test_hex_binary.py
new file mode 100644
index 00000000..419a47e2
--- /dev/null
+++ b/test/test_hex_binary.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import binascii
+from rdflib import Literal, XSD
+import six
+
+
+class HexBinaryTestCase(unittest.TestCase):
+
+ def test_int(self):
+ self._test_integer(5)
+ self._test_integer(3452)
+ self._test_integer(4886)
+
+ def _test_integer(self, i):
+ hex_i = format(i, "x")
+ # Make it has a even-length (Byte)
+ len_hex_i = len(hex_i)
+ hex_i = hex_i.zfill(len_hex_i + len_hex_i % 2)
+
+ l = Literal(hex_i, datatype=XSD.hexBinary)
+ bin_i = l.toPython()
+ self.assertEquals(int(binascii.hexlify(bin_i), 16), i)
+
+ if six.PY2:
+ self.assertEquals(unicode(l), hex_i)
+ else:
+ self.assertEquals(str(l), hex_i)
+ self.assertEquals(int(hex_i, 16), i)
+ if six.PY2:
+ self.assertEquals(int(unicode(l), 16), i)
+ else:
+ self.assertEquals(int(l, 16), i)
+ self.assertEquals(int(str(l), 16), i)
+
+ def test_unicode(self):
+ str1 = u"Test utf-8 string éàë"
+ # u hexstring
+ hex_str1 = binascii.hexlify(str1.encode('utf-8')).decode()
+ l1 = Literal(hex_str1, datatype=XSD.hexBinary)
+ b_str1 = l1.toPython()
+ self.assertEquals(b_str1.decode('utf-8'), str1)
+ if six.PY2:
+ self.assertEquals(unicode(l1), hex_str1)
+ else:
+ self.assertEquals(str(l1), hex_str1)
+
+ # b hexstring
+ hex_str1b = binascii.hexlify(str1.encode('utf-8'))
+ l1b = Literal(hex_str1b, datatype=XSD.hexBinary)
+ b_str1b = l1b.toPython()
+ self.assertEquals(b_str1, b_str1b)
+ self.assertEquals(b_str1b.decode('utf-8'), str1)
+ if six.PY2:
+ self.assertEquals(unicode(l1b), hex_str1)
+ else:
+ self.assertEquals(str(l1b), hex_str1)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_issue920.py b/test/test_issue920.py
new file mode 100644
index 00000000..eb12edc4
--- /dev/null
+++ b/test/test_issue920.py
@@ -0,0 +1,36 @@
+"""
+Issue 920 - NTriples fails to parse URIs with only a scheme
+
+from rdflib import Graph
+g=Graph()
+g.parse(data='<a:> <b:> <c:> .', format='nt') # nquads also fails
+
+N3, by contrast, succeeds:
+
+g.parse(data='<a:> <b:> <c:> .', format='n3')
+"""
+from rdflib import Graph
+import unittest
+
+
+class TestIssue920(unittest.TestCase):
+
+ def test_issue_920(self):
+ g = Graph()
+ # NT tests
+ g.parse(data='<a:> <b:> <c:> .', format='nt')
+ g.parse(data='<http://a> <http://b> <http://c> .', format='nt')
+ g.parse(data='<https://a> <http://> <http://c> .', format='nt')
+
+ # related parser tests
+ g.parse(data='<a:> <b:> <c:> .', format='turtle')
+ g.parse(data='<http://a> <http://b> <http://c> .', format='turtle')
+ g.parse(data='<https://a> <http://> <http://c> .', format='turtle')
+
+ g.parse(data='<a:> <b:> <c:> .', format='n3')
+ g.parse(data='<http://a> <http://b> <http://c> .', format='n3')
+ g.parse(data='<https://a> <http://> <http://c> .', format='n3')
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/test/test_issue923.py b/test/test_issue923.py
new file mode 100644
index 00000000..3becb6f8
--- /dev/null
+++ b/test/test_issue923.py
@@ -0,0 +1,35 @@
+"""
+Issue 923: split charset off of Content-Type before looking up Result-parsing plugin.
+"""
+from io import StringIO
+
+from rdflib.query import Result
+
+RESULT_SOURCE = u"""\
+{
+ "head" : {
+ "vars" : [ "subject", "predicate", "object", "context" ]
+ },
+ "results" : {
+ "bindings" : [ {
+ "subject" : {
+ "type" : "bnode",
+ "value" : "service"
+ },
+ "predicate" : {
+ "type" : "uri",
+ "value" : "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+ },
+ "object" : {
+ "type" : "uri",
+ "value" : "http://www.w3.org/ns/sparql-service-description#Service"
+ }
+ }]
+ }
+}
+"""
+
+
+def test_issue_923():
+ with StringIO(RESULT_SOURCE) as result_source:
+ Result.parse(source=result_source, content_type="application/sparql-results+json;charset=utf-8")
diff --git a/test/test_literal.py b/test/test_literal.py
index feb9d72d..dae2d187 100644
--- a/test/test_literal.py
+++ b/test/test_literal.py
@@ -2,7 +2,7 @@ import unittest
import rdflib # needed for eval(repr(...)) below
from rdflib.term import Literal, URIRef, _XSD_DOUBLE, bind
-from six import integer_types, PY3
+from six import integer_types, PY3, string_types
def uformat(s):
@@ -139,6 +139,30 @@ class TestBindings(unittest.TestCase):
self.assertEqual(lb.value, vb)
self.assertEqual(lb.datatype, dtB)
+ def testSpecificBinding(self):
+
+ def lexify(s):
+ return "--%s--" % s
+
+ def unlexify(s):
+ return s[2:-2]
+
+ datatype = rdflib.URIRef('urn:dt:mystring')
+
+ #Datatype-specific rule
+ bind(datatype, string_types, unlexify, lexify, datatype_specific=True)
+
+ s = "Hello"
+ normal_l = Literal(s)
+ self.assertEqual(str(normal_l), s)
+ self.assertEqual(normal_l.toPython(), s)
+ self.assertEqual(normal_l.datatype, None)
+
+ specific_l = Literal("--%s--" % s, datatype=datatype)
+ self.assertEqual(str(specific_l), lexify(s))
+ self.assertEqual(specific_l.toPython(), s)
+ self.assertEqual(specific_l.datatype, datatype)
+
if __name__ == "__main__":
unittest.main()
diff --git a/test/test_n3_suite.py b/test/test_n3_suite.py
index f6efe34a..21e6bcba 100644
--- a/test/test_n3_suite.py
+++ b/test/test_n3_suite.py
@@ -1,5 +1,8 @@
import os
import sys
+import logging
+
+log = logging.getLogger(__name__)
try:
from .testutils import check_serialize_parse
@@ -18,6 +21,15 @@ def _get_test_files_formats():
elif f.endswith('.n3'):
yield fpath, 'n3'
+def all_n3_files():
+ skiptests = [
+ 'test/n3/example-lots_of_graphs.n3', # only n3 can serialize QuotedGraph, no point in testing roundtrip
+ ]
+ for fpath, fmt in _get_test_files_formats():
+ if fpath in skiptests:
+ log.debug("Skipping %s, known issue" % fpath)
+ else:
+ yield fpath, fmt
def test_n3_writing():
for fpath, fmt in _get_test_files_formats():
diff --git a/test/test_namespace.py b/test/test_namespace.py
index 9de7ff29..4041433e 100644
--- a/test/test_namespace.py
+++ b/test/test_namespace.py
@@ -22,6 +22,33 @@ class NamespacePrefixTest(unittest.TestCase):
self.assertEqual(g.compute_qname(URIRef("http://blip/blop")),
("ns4", URIRef("http://blip/"), "blop"))
+ # should return empty qnames correctly
+ self.assertEqual(g.compute_qname(URIRef("http://foo/bar/")),
+ ("ns1", URIRef("http://foo/bar/"), ""))
+
+ def test_reset(self):
+ data = ('@prefix a: <http://example.org/a> .\n'
+ 'a: <http://example.org/b> <http://example.org/c> .')
+ graph = Graph().parse(data=data, format='turtle')
+ for p, n in tuple(graph.namespaces()):
+ graph.store._IOMemory__namespace.pop(p)
+ graph.store._IOMemory__prefix.pop(n)
+ graph.namespace_manager.reset()
+ self.assertFalse(tuple(graph.namespaces()))
+ u = URIRef('http://example.org/a')
+ prefix, namespace, name = graph.namespace_manager.compute_qname(u, generate=True)
+ self.assertNotEqual(namespace, u)
+
+ def test_reset_preserve_prefixes(self):
+ data = ('@prefix a: <http://example.org/a> .\n'
+ 'a: <http://example.org/b> <http://example.org/c> .')
+ graph = Graph().parse(data=data, format='turtle')
+ graph.namespace_manager.reset()
+ self.assertTrue(tuple(graph.namespaces()))
+ u = URIRef('http://example.org/a')
+ prefix, namespace, name = graph.namespace_manager.compute_qname(u, generate=True)
+ self.assertEqual(namespace, u)
+
def test_n3(self):
g = Graph()
g.add((URIRef("http://example.com/foo"),
diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py
index 819c944a..9dfed952 100644
--- a/test/test_roundtrip.py
+++ b/test/test_roundtrip.py
@@ -5,8 +5,11 @@ import rdflib.compare
try:
from .test_nt_suite import all_nt_files
assert all_nt_files
+ from .test_n3_suite import all_n3_files
+ assert all_n3_files
except:
from test.test_nt_suite import all_nt_files
+ from test.test_n3_suite import all_n3_files
"""
Test round-tripping by all serializers/parser that are registerd.
@@ -25,9 +28,10 @@ tests roundtripping through rdf/xml with only the literals-02 file
SKIP = [
+ ('xml', 'test/n3/n3-writer-test-29.n3'), # has predicates that cannot be shortened to strict qnames
('xml', 'test/nt/qname-02.nt'), # uses a property that cannot be qname'd
- # uses a property that cannot be qname'd
- ('application/rdf+xml', 'test/nt/qname-02.nt'),
+ ('trix', 'test/n3/strquot.n3'), # contains charachters forbidden by the xml spec
+ ('xml', 'test/n3/strquot.n3'), # contains charachters forbidden by the xml spec
]
@@ -43,6 +47,7 @@ def roundtrip(e, verbose=False):
if verbose:
print("S:")
print(s)
+ print(s.decode())
g2 = rdflib.ConjunctiveGraph()
g2.parse(data=s, format=testfmt)
@@ -52,12 +57,12 @@ def roundtrip(e, verbose=False):
print("Diff:")
print("%d triples in both" % len(both))
print("G1 Only:")
- for t in first:
+ for t in sorted(first):
print(t)
print("--------------------")
print("G2 Only")
- for t in second:
+ for t in sorted(second):
print(t)
assert rdflib.compare.isomorphic(g1, g2)
@@ -88,6 +93,24 @@ def test_cases():
yield roundtrip, (infmt, testfmt, f)
+def test_n3():
+ global formats
+ if not formats:
+ serializers = set(
+ x.name for x in rdflib.plugin.plugins(
+ None, rdflib.plugin.Serializer))
+ parsers = set(
+ x.name for x in rdflib.plugin.plugins(
+ None, rdflib.plugin.Parser))
+ formats = parsers.intersection(serializers)
+
+ for testfmt in formats:
+ if "/" in testfmt: continue # skip double testing
+ for f, infmt in all_n3_files():
+ if (testfmt, f) not in SKIP:
+ yield roundtrip, (infmt, testfmt, f)
+
+
if __name__ == "__main__":
import nose
if len(sys.argv) == 1:
diff --git a/test/test_turtle_serialize.py b/test/test_turtle_serialize.py
index 0b602b34..81f57847 100644
--- a/test/test_turtle_serialize.py
+++ b/test/test_turtle_serialize.py
@@ -70,6 +70,24 @@ def test_turtle_valid_list():
assert turtle_serializer.isValidList(o)
+def test_turtle_namespace():
+ graph = Graph()
+ graph.bind('OBO', 'http://purl.obolibrary.org/obo/')
+ graph.bind('GENO', 'http://purl.obolibrary.org/obo/GENO_')
+ graph.bind('RO', 'http://purl.obolibrary.org/obo/RO_')
+ graph.bind('RO_has_phenotype',
+ 'http://purl.obolibrary.org/obo/RO_0002200')
+ graph.add((URIRef('http://example.org'),
+ URIRef('http://purl.obolibrary.org/obo/RO_0002200'),
+ URIRef('http://purl.obolibrary.org/obo/GENO_0000385')))
+ output = [val for val in
+ graph.serialize(format='turtle').decode().splitlines()
+ if not val.startswith('@prefix')]
+ output = ' '.join(output)
+ assert 'RO_has_phenotype:' in output
+ assert 'GENO:0000385' in output
+
+
if __name__ == "__main__":
import nose
import sys
diff --git a/test/test_util.py b/test/test_util.py
index 61e5b33c..4184b659 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import unittest
import time
from rdflib.graph import Graph
@@ -236,6 +238,8 @@ class TestUtilTermConvert(unittest.TestCase):
def test_util_from_n3_expectpartialidempotencewithn3(self):
for n3 in ('<http://ex.com/foo>',
'"foo"@de',
+ u'<http://ex.com/漢字>',
+ u'<http://ex.com/a#あ>',
# '"\\""', # exception as '\\"' --> '"' by orig parser as well
'"""multi\n"line"\nstring"""@en'):
self.assertEqual(util.from_n3(n3).n3(), n3,
diff --git a/test/testutils.py b/test/testutils.py
index 02bc4b23..20b060d3 100644
--- a/test/testutils.py
+++ b/test/testutils.py
@@ -96,7 +96,7 @@ def nose_tst_earl_report(generator, earl_report_name=None):
print("Ran %d tests, %d skipped, %d failed. "%(tests, skip, tests-skip-success))
if earl_report_name:
now = isodate.datetime_isoformat(datetime.datetime.utcnow())
- earl_report = 'test_reports/%s-%s.ttl' % (earl_report_name, now)
+ earl_report = 'test_reports/%s-%s.ttl' % (earl_report_name, now.replace(":", ""))
report.serialize(earl_report, format='n3')
report.serialize('test_reports/%s-latest.ttl'%earl_report_name, format='n3')
diff --git a/test_reports/rdflib_nquads-2013-12-22T19:22:34.ttl b/test_reports/rdflib_nquads-2013-12-22T192234.ttl
index 344e1861..344e1861 100644
--- a/test_reports/rdflib_nquads-2013-12-22T19:22:34.ttl
+++ b/test_reports/rdflib_nquads-2013-12-22T192234.ttl
diff --git a/test_reports/rdflib_nt-2013-12-22T19:12:25.ttl b/test_reports/rdflib_nt-2013-12-22T191225.ttl
index 24dc06a4..24dc06a4 100644
--- a/test_reports/rdflib_nt-2013-12-22T19:12:25.ttl
+++ b/test_reports/rdflib_nt-2013-12-22T191225.ttl
diff --git a/test_reports/rdflib_sparql-2013-12-22T19:36:48.ttl b/test_reports/rdflib_sparql-2013-12-22T193648.ttl
index 1e2b4f9b..1e2b4f9b 100644
--- a/test_reports/rdflib_sparql-2013-12-22T19:36:48.ttl
+++ b/test_reports/rdflib_sparql-2013-12-22T193648.ttl
diff --git a/test_reports/rdflib_trig-2013-12-22T19:31:52.ttl b/test_reports/rdflib_trig-2013-12-22T193152.ttl
index 325759f1..325759f1 100644
--- a/test_reports/rdflib_trig-2013-12-22T19:31:52.ttl
+++ b/test_reports/rdflib_trig-2013-12-22T193152.ttl
diff --git a/test_reports/rdflib_trig-2013-12-30T15:56:57.ttl b/test_reports/rdflib_trig-2013-12-30T155657.ttl
index 84bbbc06..84bbbc06 100644
--- a/test_reports/rdflib_trig-2013-12-30T15:56:57.ttl
+++ b/test_reports/rdflib_trig-2013-12-30T155657.ttl
diff --git a/test_reports/rdflib_turtle-2013-12-22T19:13:51.ttl b/test_reports/rdflib_turtle-2013-12-22T191351.ttl
index 22bc440b..22bc440b 100644
--- a/test_reports/rdflib_turtle-2013-12-22T19:13:51.ttl
+++ b/test_reports/rdflib_turtle-2013-12-22T191351.ttl