added all code from PR 388

author: Nicholas Car <nicholas.car@csiro.au> 2020-03-17 04:26:45 +1000
committer: Nicholas Car <nicholas.car@csiro.au> 2020-03-17 04:26:45 +1000
commit: e9bbf2a7d29b897ef85a92c4072a3362cb054ea4 (patch)
tree: dbce489e61d8ab78ce113dd21b461561575516d3
parent: 456157b3244d023c55b33e0e42bad25de78726f7 (diff)
download: rdflib-e9bbf2a7d29b897ef85a92c4072a3362cb054ea4.tar.gz
3 files changed, 136 insertions, 17 deletions
diff --git a/rdflib/term.py b/rdflib/term.py
index 1a75d91a..25e9e4bc 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -53,6 +53,7 @@ from re import sub, compile
 from collections import defaultdict
 
 from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat
+from binascii import hexlify, unhexlify
 
 import rdflib
 from six import PY2
@@ -561,20 +562,20 @@ class Literal(Identifier):
                 datatype = lexical_or_value.datatype
                 value = lexical_or_value.value
 
-        elif isinstance(lexical_or_value, string_types):
+        elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)):
                 # passed a string
                 # try parsing lexical form of datatyped literal
             value = _castLexicalToPython(lexical_or_value, datatype)
 
             if value is not None and normalize:
-                _value, _datatype = _castPythonToLiteral(value)
+                _value, _datatype = _castPythonToLiteral(value, datatype)
                 if _value is not None and _is_valid_unicode(_value):
                     lexical_or_value = _value
 
         else:
             # passed some python object
             value = lexical_or_value
-            _value, _datatype = _castPythonToLiteral(lexical_or_value)
+            _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype)
 
             datatype = datatype or _datatype
             if _value is not None:
@@ -1348,6 +1349,12 @@ def _writeXML(xmlnode):
     return s
 
 
+def _unhexlify(value):
+    # In Python 3.2, unhexlify does not support str (only bytes)
+    if PY3 and isinstance(value, str):
+        value = value.encode()
+    return unhexlify(value)
+
 # Cannot import Namespace/XSD because of circular dependencies
 _XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
 _RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
@@ -1371,6 +1378,7 @@ _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration')
 _XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration')
 
 _OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational')
+_XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary')
 # TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth
 
 _NUMERIC_LITERAL_TYPES = (
@@ -1428,21 +1436,30 @@ _TOTAL_ORDER_CASTERS = {
 }
 
 
-def _castPythonToLiteral(obj):
+def _py2literal(obj, pType, castFunc, dType):
+    if castFunc:
+        return castFunc(obj), dType
+    elif dType:
+        return obj, dType
+    else:
+        return obj, None
+
+
+def _castPythonToLiteral(obj, datatype):
     """
-    Casts a python datatype to a tuple of the lexical value and a
+    Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a
     datatype URI (or None)
     """
-    for pType, (castFunc, dType) in _PythonToXSD:
+    for (pType, dType), castFunc in _SpecificPythonToXSDRules:
+        if isinstance(obj, pType) and dType == datatype:
+            return _py2literal(obj, pType, castFunc, dType)
+
+    for pType, (castFunc, dType) in _GenericPythonToXSDRules:
         if isinstance(obj, pType):
-            if castFunc:
-                return castFunc(obj), dType
-            elif dType:
-                return obj, dType
-            else:
-                return obj, None
+            return _py2literal(obj, pType, castFunc, dType)
     return obj, None  # TODO: is this right for the fall through case?
 
+
 from decimal import Decimal
 
 # Mappings from Python types to XSD datatypes and back (borrowed from sparta)
@@ -1456,7 +1473,7 @@ from decimal import Decimal
 # python longs have no limit
 # both map to the abstract integer type,
 # rather than some concrete bit-limited datatype
-_PythonToXSD = [
+_GenericPythonToXSDRules = [
     (string_types, (None, None)),
     (float, (None, _XSD_DOUBLE)),
     (bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)),
@@ -1477,6 +1494,12 @@ _PythonToXSD = [
     (Fraction, (None, _OWL_RATIONAL))
 ]
 
+_SpecificPythonToXSDRules = [
+    ((string_types, _XSD_HEXBINARY), hexlify),
+]
+if PY3:
+    _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify))
+
 XSDToPython = {
     None: None,  # plain literals map directly to value space
     URIRef(_XSD_PFX + 'time'): parse_time,
@@ -1487,6 +1510,7 @@ XSDToPython = {
     URIRef(_XSD_PFX + 'duration'): parse_duration,
     URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration,
     URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration,
+    URIRef(_XSD_PFX + 'hexBinary'): _unhexlify,
     URIRef(_XSD_PFX + 'string'): None,
     URIRef(_XSD_PFX + 'normalizedString'): None,
     URIRef(_XSD_PFX + 'token'): None,
@@ -1542,7 +1566,7 @@ def _castLexicalToPython(lexical, datatype):
         return None
 
 
-def bind(datatype, pythontype, constructor=None, lexicalizer=None):
+def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False):
     """
     register a new datatype<->pythontype binding
 
@@ -1550,10 +1574,16 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
                         into a Python instances, if not given the pythontype
                         is used directly
 
-    :param lexicalizer: an optinoal function for converting python objects to
+    :param lexicalizer: an optional function for converting python objects to
                         lexical form, if not given object.__str__ is used
 
+    :param datatype_specific: makes the lexicalizer function be accessible
+                              from the pair (pythontype, datatype) if set to True
+                              or from the pythontype otherwise.  False by default
     """
+    if datatype_specific and datatype is None:
+        raise Exception("No datatype given for a datatype-specific binding")
+
     if datatype in _toPythonMapping:
         logger.warning("datatype '%s' was already bound. Rebinding." %
                        datatype)
@@ -1561,7 +1591,10 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
     if constructor is None:
         constructor = pythontype
     _toPythonMapping[datatype] = constructor
-    _PythonToXSD.append((pythontype, (lexicalizer, datatype)))
+    if datatype_specific:
+        _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer))
+    else:
+        _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype)))
 
 
 class Variable(Identifier):
diff --git a/test/test_hex_binary.py b/test/test_hex_binary.py
new file mode 100644
index 00000000..711ff0bd
--- /dev/null
+++ b/test/test_hex_binary.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import binascii
+from rdflib import Literal, XSD
+import six
+
+
+class HexBinaryTestCase(unittest.TestCase):
+
+    def test_int(self):
+        self._test_integer(5)
+        self._test_integer(3452)
+        self._test_integer(4886)
+
+    def _test_integer(self, i):
+        hex_i = format(i, "x")
+        # Make it has a even-length (Byte)
+        len_hex_i = len(hex_i)
+        hex_i = hex_i.zfill(len_hex_i + len_hex_i % 2)
+
+        l = Literal(hex_i, datatype=XSD.hexBinary)
+        bin_i = l.toPython()
+        self.assertEquals(int(binascii.hexlify(bin_i), 16), i)
+
+        if six.PY2:
+            self.assertEquals(unicode(l), hex_i)
+        else:
+            self.assertEquals(l, hex_i)
+        self.assertEquals(int(hex_i, 16), i)
+        if six.PY2:
+            self.assertEquals(int(unicode(l), 16), i)
+        else:
+            self.assertEquals(int(l, 16), i)
+        self.assertEquals(int(str(l), 16), i)
+
+    def test_unicode(self):
+        str1 = u"Test utf-8 string éàë"
+        # u hexstring
+        hex_str1 = binascii.hexlify(str1.encode('utf-8')).decode()
+        l1 = Literal(hex_str1, datatype=XSD.hexBinary)
+        b_str1 = l1.toPython()
+        self.assertEquals(b_str1.decode('utf-8'), str1)
+        if six.PY2:
+            self.assertEquals(unicode(l1), hex_str1)
+        else:
+            self.assertEquals(l1, hex_str1)
+
+        # b hexstring
+        hex_str1b = binascii.hexlify(str1.encode('utf-8'))
+        l1b = Literal(hex_str1b, datatype=XSD.hexBinary)
+        b_str1b = l1b.toPython()
+        self.assertEquals(b_str1, b_str1b)
+        self.assertEquals(b_str1b.decode('utf-8'), str1)
+        if six.PY2:
+            self.assertEquals(unicode(l1b), hex_str1)
+        else:
+            self.assertEquals(l1b, hex_str1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_literal.py b/test/test_literal.py
index feb9d72d..dae2d187 100644
--- a/test/test_literal.py
+++ b/test/test_literal.py
@@ -2,7 +2,7 @@ import unittest
 
 import rdflib  # needed for eval(repr(...)) below
 from rdflib.term import Literal, URIRef, _XSD_DOUBLE, bind
-from six import integer_types, PY3
+from six import integer_types, PY3, string_types
 
 
 def uformat(s):
@@ -139,6 +139,30 @@ class TestBindings(unittest.TestCase):
         self.assertEqual(lb.value, vb)
         self.assertEqual(lb.datatype, dtB)
 
+    def testSpecificBinding(self):
+
+        def lexify(s):
+            return "--%s--" % s
+
+        def unlexify(s):
+            return s[2:-2]
+
+        datatype = rdflib.URIRef('urn:dt:mystring')
+
+        #Datatype-specific rule
+        bind(datatype, string_types, unlexify, lexify, datatype_specific=True)
+
+        s = "Hello"
+        normal_l = Literal(s)
+        self.assertEqual(str(normal_l), s)
+        self.assertEqual(normal_l.toPython(), s)
+        self.assertEqual(normal_l.datatype, None)
+
+        specific_l = Literal("--%s--" % s, datatype=datatype)
+        self.assertEqual(str(specific_l), lexify(s))
+        self.assertEqual(specific_l.toPython(), s)
+        self.assertEqual(specific_l.datatype, datatype)
+
 
 if __name__ == "__main__":
     unittest.main()
author	Nicholas Car <nicholas.car@csiro.au>	2020-03-17 04:26:45 +1000
committer	Nicholas Car <nicholas.car@csiro.au>	2020-03-17 04:26:45 +1000
commit	e9bbf2a7d29b897ef85a92c4072a3362cb054ea4 (patch)
tree	dbce489e61d8ab78ce113dd21b461561575516d3
parent	456157b3244d023c55b33e0e42bad25de78726f7 (diff)
download	rdflib-e9bbf2a7d29b897ef85a92c4072a3362cb054ea4.tar.gz