diff options
author | Gunnar Aastrand Grimnes <gromgull@users.noreply.github.com> | 2017-01-24 13:18:46 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-01-24 13:18:46 +0100 |
commit | fa3271d99371e13a85d2420085186dfba893c98c (patch) | |
tree | ec584b517a6967a9efa55e150d0a8e49f3621ed1 | |
parent | 1480ece7cd8e800e163b535139fb075766082433 (diff) | |
download | rdflib-fa3271d99371e13a85d2420085186dfba893c98c.tar.gz |
remove ancient and broken 2.3 support code. (#681)
fixes #680
We replace a loop over every character in a string with a single
call to encode with a custom error-handler.
We move the call to the top-level, and only do it once we encode the
entire output.
-rw-r--r-- | rdflib/plugins/serializers/nquads.py | 7 | ||||
-rw-r--r-- | rdflib/plugins/serializers/nt.py | 45 |
2 files changed, 26 insertions, 26 deletions
diff --git a/rdflib/plugins/serializers/nquads.py b/rdflib/plugins/serializers/nquads.py index 26cd85ef..66c5e6f1 100644 --- a/rdflib/plugins/serializers/nquads.py +++ b/rdflib/plugins/serializers/nquads.py @@ -4,7 +4,7 @@ from rdflib.term import Literal from rdflib.serializer import Serializer from rdflib.py3compat import b -from rdflib.plugins.serializers.nt import _xmlcharref_encode, _quoteLiteral +from rdflib.plugins.serializers.nt import _quoteLiteral __all__ = ['NQuadsSerializer'] @@ -36,11 +36,10 @@ def _nq_row(triple, context): if isinstance(triple[2], Literal): return u"%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(), - _xmlcharref_encode( - _quoteLiteral(triple[2])), + _quoteLiteral(triple[2]), context.n3()) else: return u"%s %s %s %s .\n" % (triple[0].n3(), triple[1].n3(), - _xmlcharref_encode(triple[2].n3()), + triple[2].n3(), context.n3()) diff --git a/rdflib/plugins/serializers/nt.py b/rdflib/plugins/serializers/nt.py index 6b3c6485..fb6a37cf 100644 --- a/rdflib/plugins/serializers/nt.py +++ b/rdflib/plugins/serializers/nt.py @@ -6,7 +6,9 @@ format. from rdflib.term import Literal from rdflib.serializer import Serializer from rdflib.py3compat import b + import warnings +import codecs __all__ = ['NTSerializer'] @@ -16,6 +18,10 @@ class NTSerializer(Serializer): Serializes RDF graphs to NTriples format. """ + def __init__(self, store): + Serializer.__init__(self, store) + self.encoding = 'ascii' # n-triples are ascii encoded + def serialize(self, stream, base=None, encoding=None, **args): if base is not None: warnings.warn("NTSerializer does not support base.") @@ -23,7 +29,7 @@ class NTSerializer(Serializer): warnings.warn("NTSerializer does not use custom encoding.") encoding = self.encoding for triple in self.store: - stream.write(_nt_row(triple).encode(encoding, "replace")) + stream.write(_nt_row(triple).encode(self.encoding, "_rdflib_nt_escape")) stream.write(b("\n")) @@ -32,11 +38,11 @@ def _nt_row(triple): return u"%s %s %s .\n" % ( triple[0].n3(), triple[1].n3(), - _xmlcharref_encode(_quoteLiteral(triple[2]))) + _quoteLiteral(triple[2])) else: return u"%s %s %s .\n" % (triple[0].n3(), triple[1].n3(), - _xmlcharref_encode(triple[2].n3())) + triple[2].n3()) def _quoteLiteral(l): @@ -62,23 +68,18 @@ def _quote_encode(l): .replace('"', '\\"')\ .replace('\r', '\\r') +def _nt_unicode_error_resolver(err): + + """ + Do unicode char replaces as defined in https://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#ntrip_strings + """ + + def _replace_single(c): + c = ord(c) + fmt = u'\\u%04X' if c <= 0xFFFF else u'\\U%08X' + return fmt % c + + string = err.object[err.start:err.end] + return ( "".join( _replace_single(c) for c in string ), err.end ) -# from <http://code.activestate.com/recipes/303668/> -def _xmlcharref_encode(unicode_data, encoding="ascii"): - """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler.""" - res = "" - - # Step through the unicode_data string one character at a time in - # order to catch unencodable characters: - for char in unicode_data: - try: - char.encode(encoding, 'strict') - except UnicodeError: - if ord(char) <= 0xFFFF: - res += '\\u%04X' % ord(char) - else: - res += '\\U%08X' % ord(char) - else: - res += char - - return res +codecs.register_error('_rdflib_nt_escape', _nt_unicode_error_resolver) |