summaryrefslogtreecommitdiff
path: root/rdflib/parser.py
diff options
context:
space:
mode:
authorAshley Sommer <ashleysommer@gmail.com>2020-08-27 14:45:45 +1000
committerAshley Sommer <ashleysommer@gmail.com>2020-08-27 14:45:45 +1000
commitfbc091208fbf95c338ead7be7547136f5c3433cc (patch)
tree58e74adbf7fd96257ca5e17c34f50398a7b469e9 /rdflib/parser.py
parent466518ed6f74b6cef3b79aa8a917be3c7184284a (diff)
parentaa527747bd6a5e48ea19463c483f5fb45c7ea230 (diff)
downloadrdflib-fbc091208fbf95c338ead7be7547136f5c3433cc.tar.gz
Merge remote-tracking branch 'origin/master' into t0b3_master
# Conflicts: # rdflib/namespace.py # rdflib/parser.py # rdflib/plugins/memory.py # rdflib/plugins/parsers/ntriples.py # test/test_iomemory.py
Diffstat (limited to 'rdflib/parser.py')
-rw-r--r--rdflib/parser.py100
1 files changed, 85 insertions, 15 deletions
diff --git a/rdflib/parser.py b/rdflib/parser.py
index 4d807e7e..fcaed5e4 100644
--- a/rdflib/parser.py
+++ b/rdflib/parser.py
@@ -10,11 +10,11 @@ want to do so through the Graph class parse method.
"""
+import codecs
import os
import sys
-from io import BytesIO
-
+from io import BytesIO, TextIOBase, TextIOWrapper, StringIO, BufferedIOBase
from urllib.request import pathname2url
from urllib.request import Request
@@ -38,6 +38,8 @@ __all__ = [
class Parser(object):
+ __slots__ = set()
+
def __init__(self):
pass
@@ -45,6 +47,37 @@ class Parser(object):
pass
+class BytesIOWrapper(BufferedIOBase):
+ __slots__ = ("wrapped", "encoded", "encoding")
+
+ def __init__(self, wrapped: str, encoding="utf-8"):
+ super(BytesIOWrapper, self).__init__()
+ self.wrapped = wrapped
+ self.encoding = encoding
+ self.encoded = None
+
+ def read(self, *args, **kwargs):
+ if self.encoded is None:
+ b, blen = codecs.getencoder(self.encoding)(self.wrapped)
+ self.encoded = BytesIO(b)
+ return self.encoded.read(*args, **kwargs)
+
+ def read1(self, *args, **kwargs):
+ if self.encoded is None:
+ b = codecs.getencoder(self.encoding)(self.wrapped)
+ self.encoded = BytesIO(b)
+ return self.encoded.read1(*args, **kwargs)
+
+ def readinto(self, *args, **kwargs):
+ raise NotImplementedError()
+
+ def readinto1(self, *args, **kwargs):
+ raise NotImplementedError()
+
+ def write(self, *args, **kwargs):
+ raise NotImplementedError()
+
+
class InputSource(xmlreader.InputSource, object):
"""
TODO:
@@ -56,23 +89,39 @@ class InputSource(xmlreader.InputSource, object):
self.auto_close = False # see Graph.parse(), true if opened by us
def close(self):
+ c = self.getCharacterStream()
+ if c and hasattr(c, "close"):
+ try:
+ c.close()
+ except Exception:
+ pass
f = self.getByteStream()
if f and hasattr(f, "close"):
- f.close()
+ try:
+ f.close()
+ except Exception:
+ pass
class StringInputSource(InputSource):
"""
- TODO:
+ Constructs an RDFLib Parser InputSource from a Python String or Bytes
"""
- def __init__(self, value, system_id=None):
+ def __init__(self, value, encoding="utf-8", system_id=None):
super(StringInputSource, self).__init__(system_id)
- stream = BytesIO(value)
- self.setByteStream(stream)
- # TODO:
- # encoding = value.encoding
- # self.setEncoding(encoding)
+ if isinstance(value, str):
+ stream = StringIO(value)
+ self.setCharacterStream(stream)
+ self.setEncoding(encoding)
+ b_stream = BytesIOWrapper(value, encoding)
+ self.setByteStream(b_stream)
+ else:
+ stream = BytesIO(value)
+ self.setByteStream(stream)
+ c_stream = TextIOWrapper(stream, encoding)
+ self.setCharacterStream(c_stream)
+ self.setEncoding(c_stream.encoding)
headers = {
@@ -131,8 +180,18 @@ class FileInputSource(InputSource):
system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base)
super(FileInputSource, self).__init__(system_id)
self.file = file
- self.setByteStream(file)
- # TODO: self.setEncoding(encoding)
+ if isinstance(file, TextIOBase): # Python3 unicode fp
+ self.setCharacterStream(file)
+ self.setEncoding(file.encoding)
+ try:
+ b = file.buffer
+ self.setByteStream(b)
+ except (AttributeError, LookupError):
+ self.setByteStream(file)
+ else:
+ self.setByteStream(file)
+ # We cannot set characterStream here because
+ # we do not know the Raw Bytes File encoding.
def __repr__(self):
return repr(self.file)
@@ -168,10 +227,21 @@ def create_input_source(
else:
if isinstance(source, str):
location = source
+ elif isinstance(source, bytes):
+ data = source
elif hasattr(source, "read") and not isinstance(source, Namespace):
f = source
input_source = InputSource()
- input_source.setByteStream(f)
+ if hasattr(source, "encoding"):
+ input_source.setCharacterStream(source)
+ input_source.setEncoding(source.encoding)
+ try:
+ b = file.buffer
+ input_source.setByteStream(b)
+ except (AttributeError, LookupError):
+ input_source.setByteStream(source)
+ else:
+ input_source.setByteStream(f)
if f is sys.stdin:
input_source.setSystemId("file:///dev/stdin")
elif hasattr(f, "name"):
@@ -203,8 +273,8 @@ def create_input_source(
input_source = FileInputSource(file)
if data is not None:
- if isinstance(data, str):
- data = data.encode("utf-8")
+ if not isinstance(data, (str, bytes, bytearray)):
+ raise RuntimeError("parse data can only str, or bytes.")
input_source = StringInputSource(data)
auto_close = True