diff options
author | Nicholas Car <nicholas.car@csiro.au> | 2020-03-17 04:26:45 +1000 |
---|---|---|
committer | Nicholas Car <nicholas.car@csiro.au> | 2020-03-17 04:26:45 +1000 |
commit | e9bbf2a7d29b897ef85a92c4072a3362cb054ea4 (patch) | |
tree | dbce489e61d8ab78ce113dd21b461561575516d3 | |
parent | 456157b3244d023c55b33e0e42bad25de78726f7 (diff) | |
download | rdflib-e9bbf2a7d29b897ef85a92c4072a3362cb054ea4.tar.gz |
added all code from PR 388
-rw-r--r-- | rdflib/term.py | 65 | ||||
-rw-r--r-- | test/test_hex_binary.py | 62 | ||||
-rw-r--r-- | test/test_literal.py | 26 |
3 files changed, 136 insertions, 17 deletions
diff --git a/rdflib/term.py b/rdflib/term.py index 1a75d91a..25e9e4bc 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -53,6 +53,7 @@ from re import sub, compile from collections import defaultdict from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat +from binascii import hexlify, unhexlify import rdflib from six import PY2 @@ -561,20 +562,20 @@ class Literal(Identifier): datatype = lexical_or_value.datatype value = lexical_or_value.value - elif isinstance(lexical_or_value, string_types): + elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)): # passed a string # try parsing lexical form of datatyped literal value = _castLexicalToPython(lexical_or_value, datatype) if value is not None and normalize: - _value, _datatype = _castPythonToLiteral(value) + _value, _datatype = _castPythonToLiteral(value, datatype) if _value is not None and _is_valid_unicode(_value): lexical_or_value = _value else: # passed some python object value = lexical_or_value - _value, _datatype = _castPythonToLiteral(lexical_or_value) + _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype) datatype = datatype or _datatype if _value is not None: @@ -1348,6 +1349,12 @@ def _writeXML(xmlnode): return s +def _unhexlify(value): + # In Python 3.2, unhexlify does not support str (only bytes) + if PY3 and isinstance(value, str): + value = value.encode() + return unhexlify(value) + # Cannot import Namespace/XSD because of circular dependencies _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' @@ -1371,6 +1378,7 @@ _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration') _XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration') _OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational') +_XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary') # TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth _NUMERIC_LITERAL_TYPES = ( @@ -1428,21 +1436,30 @@ _TOTAL_ORDER_CASTERS = { } -def _castPythonToLiteral(obj): +def _py2literal(obj, pType, castFunc, dType): + if castFunc: + return castFunc(obj), dType + elif dType: + return obj, dType + else: + return obj, None + + +def _castPythonToLiteral(obj, datatype): """ - Casts a python datatype to a tuple of the lexical value and a + Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a datatype URI (or None) """ - for pType, (castFunc, dType) in _PythonToXSD: + for (pType, dType), castFunc in _SpecificPythonToXSDRules: + if isinstance(obj, pType) and dType == datatype: + return _py2literal(obj, pType, castFunc, dType) + + for pType, (castFunc, dType) in _GenericPythonToXSDRules: if isinstance(obj, pType): - if castFunc: - return castFunc(obj), dType - elif dType: - return obj, dType - else: - return obj, None + return _py2literal(obj, pType, castFunc, dType) return obj, None # TODO: is this right for the fall through case? + from decimal import Decimal # Mappings from Python types to XSD datatypes and back (borrowed from sparta) @@ -1456,7 +1473,7 @@ from decimal import Decimal # python longs have no limit # both map to the abstract integer type, # rather than some concrete bit-limited datatype -_PythonToXSD = [ +_GenericPythonToXSDRules = [ (string_types, (None, None)), (float, (None, _XSD_DOUBLE)), (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)), @@ -1477,6 +1494,12 @@ _PythonToXSD = [ (Fraction, (None, _OWL_RATIONAL)) ] +_SpecificPythonToXSDRules = [ + ((string_types, _XSD_HEXBINARY), hexlify), +] +if PY3: + _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify)) + XSDToPython = { None: None, # plain literals map directly to value space URIRef(_XSD_PFX + 'time'): parse_time, @@ -1487,6 +1510,7 @@ XSDToPython = { URIRef(_XSD_PFX + 'duration'): parse_duration, URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration, URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration, + URIRef(_XSD_PFX + 'hexBinary'): _unhexlify, URIRef(_XSD_PFX + 'string'): None, URIRef(_XSD_PFX + 'normalizedString'): None, URIRef(_XSD_PFX + 'token'): None, @@ -1542,7 +1566,7 @@ def _castLexicalToPython(lexical, datatype): return None -def bind(datatype, pythontype, constructor=None, lexicalizer=None): +def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False): """ register a new datatype<->pythontype binding @@ -1550,10 +1574,16 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None): into a Python instances, if not given the pythontype is used directly - :param lexicalizer: an optinoal function for converting python objects to + :param lexicalizer: an optional function for converting python objects to lexical form, if not given object.__str__ is used + :param datatype_specific: makes the lexicalizer function be accessible + from the pair (pythontype, datatype) if set to True + or from the pythontype otherwise. False by default """ + if datatype_specific and datatype is None: + raise Exception("No datatype given for a datatype-specific binding") + if datatype in _toPythonMapping: logger.warning("datatype '%s' was already bound. Rebinding." % datatype) @@ -1561,7 +1591,10 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None): if constructor is None: constructor = pythontype _toPythonMapping[datatype] = constructor - _PythonToXSD.append((pythontype, (lexicalizer, datatype))) + if datatype_specific: + _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer)) + else: + _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype))) class Variable(Identifier): diff --git a/test/test_hex_binary.py b/test/test_hex_binary.py new file mode 100644 index 00000000..711ff0bd --- /dev/null +++ b/test/test_hex_binary.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +import unittest +import binascii +from rdflib import Literal, XSD +import six + + +class HexBinaryTestCase(unittest.TestCase): + + def test_int(self): + self._test_integer(5) + self._test_integer(3452) + self._test_integer(4886) + + def _test_integer(self, i): + hex_i = format(i, "x") + # Make it has a even-length (Byte) + len_hex_i = len(hex_i) + hex_i = hex_i.zfill(len_hex_i + len_hex_i % 2) + + l = Literal(hex_i, datatype=XSD.hexBinary) + bin_i = l.toPython() + self.assertEquals(int(binascii.hexlify(bin_i), 16), i) + + if six.PY2: + self.assertEquals(unicode(l), hex_i) + else: + self.assertEquals(l, hex_i) + self.assertEquals(int(hex_i, 16), i) + if six.PY2: + self.assertEquals(int(unicode(l), 16), i) + else: + self.assertEquals(int(l, 16), i) + self.assertEquals(int(str(l), 16), i) + + def test_unicode(self): + str1 = u"Test utf-8 string éàë" + # u hexstring + hex_str1 = binascii.hexlify(str1.encode('utf-8')).decode() + l1 = Literal(hex_str1, datatype=XSD.hexBinary) + b_str1 = l1.toPython() + self.assertEquals(b_str1.decode('utf-8'), str1) + if six.PY2: + self.assertEquals(unicode(l1), hex_str1) + else: + self.assertEquals(l1, hex_str1) + + # b hexstring + hex_str1b = binascii.hexlify(str1.encode('utf-8')) + l1b = Literal(hex_str1b, datatype=XSD.hexBinary) + b_str1b = l1b.toPython() + self.assertEquals(b_str1, b_str1b) + self.assertEquals(b_str1b.decode('utf-8'), str1) + if six.PY2: + self.assertEquals(unicode(l1b), hex_str1) + else: + self.assertEquals(l1b, hex_str1) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_literal.py b/test/test_literal.py index feb9d72d..dae2d187 100644 --- a/test/test_literal.py +++ b/test/test_literal.py @@ -2,7 +2,7 @@ import unittest import rdflib # needed for eval(repr(...)) below from rdflib.term import Literal, URIRef, _XSD_DOUBLE, bind -from six import integer_types, PY3 +from six import integer_types, PY3, string_types def uformat(s): @@ -139,6 +139,30 @@ class TestBindings(unittest.TestCase): self.assertEqual(lb.value, vb) self.assertEqual(lb.datatype, dtB) + def testSpecificBinding(self): + + def lexify(s): + return "--%s--" % s + + def unlexify(s): + return s[2:-2] + + datatype = rdflib.URIRef('urn:dt:mystring') + + #Datatype-specific rule + bind(datatype, string_types, unlexify, lexify, datatype_specific=True) + + s = "Hello" + normal_l = Literal(s) + self.assertEqual(str(normal_l), s) + self.assertEqual(normal_l.toPython(), s) + self.assertEqual(normal_l.datatype, None) + + specific_l = Literal("--%s--" % s, datatype=datatype) + self.assertEqual(str(specific_l), lexify(s)) + self.assertEqual(specific_l.toPython(), s) + self.assertEqual(specific_l.datatype, datatype) + if __name__ == "__main__": unittest.main() |