diff options
author | Ashley Sommer <ashleysommer@gmail.com> | 2020-08-27 14:45:45 +1000 |
---|---|---|
committer | Ashley Sommer <ashleysommer@gmail.com> | 2020-08-27 14:45:45 +1000 |
commit | fbc091208fbf95c338ead7be7547136f5c3433cc (patch) | |
tree | 58e74adbf7fd96257ca5e17c34f50398a7b469e9 /rdflib/parser.py | |
parent | 466518ed6f74b6cef3b79aa8a917be3c7184284a (diff) | |
parent | aa527747bd6a5e48ea19463c483f5fb45c7ea230 (diff) | |
download | rdflib-fbc091208fbf95c338ead7be7547136f5c3433cc.tar.gz |
Merge remote-tracking branch 'origin/master' into t0b3_master
# Conflicts:
# rdflib/namespace.py
# rdflib/parser.py
# rdflib/plugins/memory.py
# rdflib/plugins/parsers/ntriples.py
# test/test_iomemory.py
Diffstat (limited to 'rdflib/parser.py')
-rw-r--r-- | rdflib/parser.py | 100 |
1 files changed, 85 insertions, 15 deletions
diff --git a/rdflib/parser.py b/rdflib/parser.py index 4d807e7e..fcaed5e4 100644 --- a/rdflib/parser.py +++ b/rdflib/parser.py @@ -10,11 +10,11 @@ want to do so through the Graph class parse method. """ +import codecs import os import sys -from io import BytesIO - +from io import BytesIO, TextIOBase, TextIOWrapper, StringIO, BufferedIOBase from urllib.request import pathname2url from urllib.request import Request @@ -38,6 +38,8 @@ __all__ = [ class Parser(object): + __slots__ = set() + def __init__(self): pass @@ -45,6 +47,37 @@ class Parser(object): pass +class BytesIOWrapper(BufferedIOBase): + __slots__ = ("wrapped", "encoded", "encoding") + + def __init__(self, wrapped: str, encoding="utf-8"): + super(BytesIOWrapper, self).__init__() + self.wrapped = wrapped + self.encoding = encoding + self.encoded = None + + def read(self, *args, **kwargs): + if self.encoded is None: + b, blen = codecs.getencoder(self.encoding)(self.wrapped) + self.encoded = BytesIO(b) + return self.encoded.read(*args, **kwargs) + + def read1(self, *args, **kwargs): + if self.encoded is None: + b = codecs.getencoder(self.encoding)(self.wrapped) + self.encoded = BytesIO(b) + return self.encoded.read1(*args, **kwargs) + + def readinto(self, *args, **kwargs): + raise NotImplementedError() + + def readinto1(self, *args, **kwargs): + raise NotImplementedError() + + def write(self, *args, **kwargs): + raise NotImplementedError() + + class InputSource(xmlreader.InputSource, object): """ TODO: @@ -56,23 +89,39 @@ class InputSource(xmlreader.InputSource, object): self.auto_close = False # see Graph.parse(), true if opened by us def close(self): + c = self.getCharacterStream() + if c and hasattr(c, "close"): + try: + c.close() + except Exception: + pass f = self.getByteStream() if f and hasattr(f, "close"): - f.close() + try: + f.close() + except Exception: + pass class StringInputSource(InputSource): """ - TODO: + Constructs an RDFLib Parser InputSource from a Python String or Bytes """ - def __init__(self, value, system_id=None): + def __init__(self, value, encoding="utf-8", system_id=None): super(StringInputSource, self).__init__(system_id) - stream = BytesIO(value) - self.setByteStream(stream) - # TODO: - # encoding = value.encoding - # self.setEncoding(encoding) + if isinstance(value, str): + stream = StringIO(value) + self.setCharacterStream(stream) + self.setEncoding(encoding) + b_stream = BytesIOWrapper(value, encoding) + self.setByteStream(b_stream) + else: + stream = BytesIO(value) + self.setByteStream(stream) + c_stream = TextIOWrapper(stream, encoding) + self.setCharacterStream(c_stream) + self.setEncoding(c_stream.encoding) headers = { @@ -131,8 +180,18 @@ class FileInputSource(InputSource): system_id = URIRef(urljoin("file:", pathname2url(file.name)), base=base) super(FileInputSource, self).__init__(system_id) self.file = file - self.setByteStream(file) - # TODO: self.setEncoding(encoding) + if isinstance(file, TextIOBase): # Python3 unicode fp + self.setCharacterStream(file) + self.setEncoding(file.encoding) + try: + b = file.buffer + self.setByteStream(b) + except (AttributeError, LookupError): + self.setByteStream(file) + else: + self.setByteStream(file) + # We cannot set characterStream here because + # we do not know the Raw Bytes File encoding. def __repr__(self): return repr(self.file) @@ -168,10 +227,21 @@ def create_input_source( else: if isinstance(source, str): location = source + elif isinstance(source, bytes): + data = source elif hasattr(source, "read") and not isinstance(source, Namespace): f = source input_source = InputSource() - input_source.setByteStream(f) + if hasattr(source, "encoding"): + input_source.setCharacterStream(source) + input_source.setEncoding(source.encoding) + try: + b = file.buffer + input_source.setByteStream(b) + except (AttributeError, LookupError): + input_source.setByteStream(source) + else: + input_source.setByteStream(f) if f is sys.stdin: input_source.setSystemId("file:///dev/stdin") elif hasattr(f, "name"): @@ -203,8 +273,8 @@ def create_input_source( input_source = FileInputSource(file) if data is not None: - if isinstance(data, str): - data = data.encode("utf-8") + if not isinstance(data, (str, bytes, bytearray)): + raise RuntimeError("parse data can only str, or bytes.") input_source = StringInputSource(data) auto_close = True |