summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGunnar Aastrand Grimnes <gromgull@users.noreply.github.com>2017-01-24 13:18:46 +0100
committerGitHub <noreply@github.com>2017-01-24 13:18:46 +0100
commitfa3271d99371e13a85d2420085186dfba893c98c (patch)
treeec584b517a6967a9efa55e150d0a8e49f3621ed1
parent1480ece7cd8e800e163b535139fb075766082433 (diff)
downloadrdflib-fa3271d99371e13a85d2420085186dfba893c98c.tar.gz
remove ancient and broken 2.3 support code. (#681)
fixes #680 We replace a loop over every character in a string with a single call to encode with a custom error-handler. We move the call to the top-level, and only do it once we encode the entire output.
-rw-r--r--rdflib/plugins/serializers/nquads.py7
-rw-r--r--rdflib/plugins/serializers/nt.py45
2 files changed, 26 insertions, 26 deletions
diff --git a/rdflib/plugins/serializers/nquads.py b/rdflib/plugins/serializers/nquads.py
index 26cd85ef..66c5e6f1 100644
--- a/rdflib/plugins/serializers/nquads.py
+++ b/rdflib/plugins/serializers/nquads.py
@@ -4,7 +4,7 @@ from rdflib.term import Literal
from rdflib.serializer import Serializer
from rdflib.py3compat import b
-from rdflib.plugins.serializers.nt import _xmlcharref_encode, _quoteLiteral
+from rdflib.plugins.serializers.nt import _quoteLiteral
__all__ = ['NQuadsSerializer']
@@ -36,11 +36,10 @@ def _nq_row(triple, context):
if isinstance(triple[2], Literal):
return u"%s %s %s %s .\n" % (triple[0].n3(),
triple[1].n3(),
- _xmlcharref_encode(
- _quoteLiteral(triple[2])),
+ _quoteLiteral(triple[2]),
context.n3())
else:
return u"%s %s %s %s .\n" % (triple[0].n3(),
triple[1].n3(),
- _xmlcharref_encode(triple[2].n3()),
+ triple[2].n3(),
context.n3())
diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py
index 6b3c6485..fb6a37cf 100644
--- a/rdflib/plugins/serializers/nt.py
+++ b/rdflib/plugins/serializers/nt.py
@@ -6,7 +6,9 @@ format.
from rdflib.term import Literal
from rdflib.serializer import Serializer
from rdflib.py3compat import b
+
import warnings
+import codecs
__all__ = ['NTSerializer']
@@ -16,6 +18,10 @@ class NTSerializer(Serializer):
Serializes RDF graphs to NTriples format.
"""
+ def __init__(self, store):
+ Serializer.__init__(self, store)
+ self.encoding = 'ascii' # n-triples are ascii encoded
+
def serialize(self, stream, base=None, encoding=None, **args):
if base is not None:
warnings.warn("NTSerializer does not support base.")
@@ -23,7 +29,7 @@ class NTSerializer(Serializer):
warnings.warn("NTSerializer does not use custom encoding.")
encoding = self.encoding
for triple in self.store:
- stream.write(_nt_row(triple).encode(encoding, "replace"))
+ stream.write(_nt_row(triple).encode(self.encoding, "_rdflib_nt_escape"))
stream.write(b("\n"))
@@ -32,11 +38,11 @@ def _nt_row(triple):
return u"%s %s %s .\n" % (
triple[0].n3(),
triple[1].n3(),
- _xmlcharref_encode(_quoteLiteral(triple[2])))
+ _quoteLiteral(triple[2]))
else:
return u"%s %s %s .\n" % (triple[0].n3(),
triple[1].n3(),
- _xmlcharref_encode(triple[2].n3()))
+ triple[2].n3())
def _quoteLiteral(l):
@@ -62,23 +68,18 @@ def _quote_encode(l):
.replace('"', '\\"')\
.replace('\r', '\\r')
+def _nt_unicode_error_resolver(err):
+
+ """
+ Do unicode char replaces as defined in https://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#ntrip_strings
+ """
+
+ def _replace_single(c):
+ c = ord(c)
+ fmt = u'\\u%04X' if c <= 0xFFFF else u'\\U%08X'
+ return fmt % c
+
+ string = err.object[err.start:err.end]
+ return ( "".join( _replace_single(c) for c in string ), err.end )
-# from <http://code.activestate.com/recipes/303668/>
-def _xmlcharref_encode(unicode_data, encoding="ascii"):
- """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler."""
- res = ""
-
- # Step through the unicode_data string one character at a time in
- # order to catch unencodable characters:
- for char in unicode_data:
- try:
- char.encode(encoding, 'strict')
- except UnicodeError:
- if ord(char) <= 0xFFFF:
- res += '\\u%04X' % ord(char)
- else:
- res += '\\U%08X' % ord(char)
- else:
- res += char
-
- return res
+codecs.register_error('_rdflib_nt_escape', _nt_unicode_error_resolver)