summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Car <nicholas.car@csiro.au>2020-03-17 04:26:45 +1000
committerNicholas Car <nicholas.car@csiro.au>2020-03-17 04:26:45 +1000
commite9bbf2a7d29b897ef85a92c4072a3362cb054ea4 (patch)
treedbce489e61d8ab78ce113dd21b461561575516d3
parent456157b3244d023c55b33e0e42bad25de78726f7 (diff)
downloadrdflib-e9bbf2a7d29b897ef85a92c4072a3362cb054ea4.tar.gz
added all code from PR 388
-rw-r--r--rdflib/term.py65
-rw-r--r--test/test_hex_binary.py62
-rw-r--r--test/test_literal.py26
3 files changed, 136 insertions, 17 deletions
diff --git a/rdflib/term.py b/rdflib/term.py
index 1a75d91a..25e9e4bc 100644
--- a/rdflib/term.py
+++ b/rdflib/term.py
@@ -53,6 +53,7 @@ from re import sub, compile
from collections import defaultdict
from isodate import parse_time, parse_date, parse_datetime, Duration, parse_duration, duration_isoformat
+from binascii import hexlify, unhexlify
import rdflib
from six import PY2
@@ -561,20 +562,20 @@ class Literal(Identifier):
datatype = lexical_or_value.datatype
value = lexical_or_value.value
- elif isinstance(lexical_or_value, string_types):
+ elif isinstance(lexical_or_value, string_types) or (PY3 and isinstance(lexical_or_value, bytes)):
# passed a string
# try parsing lexical form of datatyped literal
value = _castLexicalToPython(lexical_or_value, datatype)
if value is not None and normalize:
- _value, _datatype = _castPythonToLiteral(value)
+ _value, _datatype = _castPythonToLiteral(value, datatype)
if _value is not None and _is_valid_unicode(_value):
lexical_or_value = _value
else:
# passed some python object
value = lexical_or_value
- _value, _datatype = _castPythonToLiteral(lexical_or_value)
+ _value, _datatype = _castPythonToLiteral(lexical_or_value, datatype)
datatype = datatype or _datatype
if _value is not None:
@@ -1348,6 +1349,12 @@ def _writeXML(xmlnode):
return s
+def _unhexlify(value):
+ # In Python 3.2, unhexlify does not support str (only bytes)
+ if PY3 and isinstance(value, str):
+ value = value.encode()
+ return unhexlify(value)
+
# Cannot import Namespace/XSD because of circular dependencies
_XSD_PFX = 'http://www.w3.org/2001/XMLSchema#'
_RDF_PFX = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
@@ -1371,6 +1378,7 @@ _XSD_DAYTIMEDURATION = URIRef(_XSD_PFX + 'dayTimeDuration')
_XSD_YEARMONTHDURATION = URIRef(_XSD_PFX + 'yearMonthDuration')
_OWL_RATIONAL = URIRef('http://www.w3.org/2002/07/owl#rational')
+_XSD_HEXBINARY = URIRef(_XSD_PFX + 'hexBinary')
# TODO: gYearMonth, gYear, gMonthDay, gDay, gMonth
_NUMERIC_LITERAL_TYPES = (
@@ -1428,21 +1436,30 @@ _TOTAL_ORDER_CASTERS = {
}
-def _castPythonToLiteral(obj):
+def _py2literal(obj, pType, castFunc, dType):
+ if castFunc:
+ return castFunc(obj), dType
+ elif dType:
+ return obj, dType
+ else:
+ return obj, None
+
+
+def _castPythonToLiteral(obj, datatype):
"""
- Casts a python datatype to a tuple of the lexical value and a
+ Casts a tuple of a python type and a special datatype URI to a tuple of the lexical value and a
datatype URI (or None)
"""
- for pType, (castFunc, dType) in _PythonToXSD:
+ for (pType, dType), castFunc in _SpecificPythonToXSDRules:
+ if isinstance(obj, pType) and dType == datatype:
+ return _py2literal(obj, pType, castFunc, dType)
+
+ for pType, (castFunc, dType) in _GenericPythonToXSDRules:
if isinstance(obj, pType):
- if castFunc:
- return castFunc(obj), dType
- elif dType:
- return obj, dType
- else:
- return obj, None
+ return _py2literal(obj, pType, castFunc, dType)
return obj, None # TODO: is this right for the fall through case?
+
from decimal import Decimal
# Mappings from Python types to XSD datatypes and back (borrowed from sparta)
@@ -1456,7 +1473,7 @@ from decimal import Decimal
# python longs have no limit
# both map to the abstract integer type,
# rather than some concrete bit-limited datatype
-_PythonToXSD = [
+_GenericPythonToXSDRules = [
(string_types, (None, None)),
(float, (None, _XSD_DOUBLE)),
(bool, (lambda i:str(i).lower(), _XSD_BOOLEAN)),
@@ -1477,6 +1494,12 @@ _PythonToXSD = [
(Fraction, (None, _OWL_RATIONAL))
]
+_SpecificPythonToXSDRules = [
+ ((string_types, _XSD_HEXBINARY), hexlify),
+]
+if PY3:
+ _SpecificPythonToXSDRules.append(((bytes, _XSD_HEXBINARY), hexlify))
+
XSDToPython = {
None: None, # plain literals map directly to value space
URIRef(_XSD_PFX + 'time'): parse_time,
@@ -1487,6 +1510,7 @@ XSDToPython = {
URIRef(_XSD_PFX + 'duration'): parse_duration,
URIRef(_XSD_PFX + 'dayTimeDuration'): parse_duration,
URIRef(_XSD_PFX + 'yearMonthDuration'): parse_duration,
+ URIRef(_XSD_PFX + 'hexBinary'): _unhexlify,
URIRef(_XSD_PFX + 'string'): None,
URIRef(_XSD_PFX + 'normalizedString'): None,
URIRef(_XSD_PFX + 'token'): None,
@@ -1542,7 +1566,7 @@ def _castLexicalToPython(lexical, datatype):
return None
-def bind(datatype, pythontype, constructor=None, lexicalizer=None):
+def bind(datatype, pythontype, constructor=None, lexicalizer=None, datatype_specific=False):
"""
register a new datatype<->pythontype binding
@@ -1550,10 +1574,16 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
into a Python instances, if not given the pythontype
is used directly
- :param lexicalizer: an optinoal function for converting python objects to
+ :param lexicalizer: an optional function for converting python objects to
lexical form, if not given object.__str__ is used
+ :param datatype_specific: makes the lexicalizer function be accessible
+ from the pair (pythontype, datatype) if set to True
+ or from the pythontype otherwise. False by default
"""
+ if datatype_specific and datatype is None:
+ raise Exception("No datatype given for a datatype-specific binding")
+
if datatype in _toPythonMapping:
logger.warning("datatype '%s' was already bound. Rebinding." %
datatype)
@@ -1561,7 +1591,10 @@ def bind(datatype, pythontype, constructor=None, lexicalizer=None):
if constructor is None:
constructor = pythontype
_toPythonMapping[datatype] = constructor
- _PythonToXSD.append((pythontype, (lexicalizer, datatype)))
+ if datatype_specific:
+ _SpecificPythonToXSDRules.append(((pythontype, datatype), lexicalizer))
+ else:
+ _GenericPythonToXSDRules.append((pythontype, (lexicalizer, datatype)))
class Variable(Identifier):
diff --git a/test/test_hex_binary.py b/test/test_hex_binary.py
new file mode 100644
index 00000000..711ff0bd
--- /dev/null
+++ b/test/test_hex_binary.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import unittest
+import binascii
+from rdflib import Literal, XSD
+import six
+
+
+class HexBinaryTestCase(unittest.TestCase):
+
+ def test_int(self):
+ self._test_integer(5)
+ self._test_integer(3452)
+ self._test_integer(4886)
+
+ def _test_integer(self, i):
+ hex_i = format(i, "x")
+ # Make it has a even-length (Byte)
+ len_hex_i = len(hex_i)
+ hex_i = hex_i.zfill(len_hex_i + len_hex_i % 2)
+
+ l = Literal(hex_i, datatype=XSD.hexBinary)
+ bin_i = l.toPython()
+ self.assertEquals(int(binascii.hexlify(bin_i), 16), i)
+
+ if six.PY2:
+ self.assertEquals(unicode(l), hex_i)
+ else:
+ self.assertEquals(l, hex_i)
+ self.assertEquals(int(hex_i, 16), i)
+ if six.PY2:
+ self.assertEquals(int(unicode(l), 16), i)
+ else:
+ self.assertEquals(int(l, 16), i)
+ self.assertEquals(int(str(l), 16), i)
+
+ def test_unicode(self):
+ str1 = u"Test utf-8 string éàë"
+ # u hexstring
+ hex_str1 = binascii.hexlify(str1.encode('utf-8')).decode()
+ l1 = Literal(hex_str1, datatype=XSD.hexBinary)
+ b_str1 = l1.toPython()
+ self.assertEquals(b_str1.decode('utf-8'), str1)
+ if six.PY2:
+ self.assertEquals(unicode(l1), hex_str1)
+ else:
+ self.assertEquals(l1, hex_str1)
+
+ # b hexstring
+ hex_str1b = binascii.hexlify(str1.encode('utf-8'))
+ l1b = Literal(hex_str1b, datatype=XSD.hexBinary)
+ b_str1b = l1b.toPython()
+ self.assertEquals(b_str1, b_str1b)
+ self.assertEquals(b_str1b.decode('utf-8'), str1)
+ if six.PY2:
+ self.assertEquals(unicode(l1b), hex_str1)
+ else:
+ self.assertEquals(l1b, hex_str1)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_literal.py b/test/test_literal.py
index feb9d72d..dae2d187 100644
--- a/test/test_literal.py
+++ b/test/test_literal.py
@@ -2,7 +2,7 @@ import unittest
import rdflib # needed for eval(repr(...)) below
from rdflib.term import Literal, URIRef, _XSD_DOUBLE, bind
-from six import integer_types, PY3
+from six import integer_types, PY3, string_types
def uformat(s):
@@ -139,6 +139,30 @@ class TestBindings(unittest.TestCase):
self.assertEqual(lb.value, vb)
self.assertEqual(lb.datatype, dtB)
+ def testSpecificBinding(self):
+
+ def lexify(s):
+ return "--%s--" % s
+
+ def unlexify(s):
+ return s[2:-2]
+
+ datatype = rdflib.URIRef('urn:dt:mystring')
+
+ #Datatype-specific rule
+ bind(datatype, string_types, unlexify, lexify, datatype_specific=True)
+
+ s = "Hello"
+ normal_l = Literal(s)
+ self.assertEqual(str(normal_l), s)
+ self.assertEqual(normal_l.toPython(), s)
+ self.assertEqual(normal_l.datatype, None)
+
+ specific_l = Literal("--%s--" % s, datatype=datatype)
+ self.assertEqual(str(specific_l), lexify(s))
+ self.assertEqual(specific_l.toPython(), s)
+ self.assertEqual(specific_l.datatype, datatype)
+
if __name__ == "__main__":
unittest.main()