diff options
author | eikeon <devnull@localhost> | 2009-03-08 00:47:10 +0000 |
---|---|---|
committer | eikeon <devnull@localhost> | 2009-03-08 00:47:10 +0000 |
commit | 33526fbac41d9bf099d9a76bf60d82a9a2ac6343 (patch) | |
tree | 48f6280adfd62ee2ddb5ec742e6db7c88d0ffee9 /rdflib/parser.py | |
parent | 9d48823418ef54fe1339a9e9f53a951b2bdd9d14 (diff) | |
download | rdflib-33526fbac41d9bf099d9a76bf60d82a9a2ac6343.tar.gz |
merged in parser related changes from 3.0 branch; fixing pep8 module name issues for URLInputSource and company along the way
Diffstat (limited to 'rdflib/parser.py')
-rw-r--r-- | rdflib/parser.py | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/rdflib/parser.py b/rdflib/parser.py new file mode 100644 index 00000000..b84e5f44 --- /dev/null +++ b/rdflib/parser.py @@ -0,0 +1,162 @@ +""" +This module defines the parser plugin interface and contains other +related parser support code. + +The module is mainly useful for those wanting to write a parser that +can plugin to rdflib. If you are wanting to invoke a parser you likely +want to do so through the Graph class parse method. + +""" + +import os +import __builtin__ +import warnings +from urllib import pathname2url, url2pathname +from urllib2 import urlopen, Request +from urlparse import urljoin +from StringIO import StringIO +from xml.sax import xmlreader +from xml.sax.saxutils import prepare_input_source +import types +try: + _StringTypes = (types.StringType, types.UnicodeType) +except AttributeError: + _StringTypes = (types.StringType,) + +from rdflib import __version__ +from rdflib.term import URIRef, Namespace + + +class Parser(object): + + def __init__(self): + pass + + def parse(self, source, sink): + pass + + +class InputSource(xmlreader.InputSource): + """ + TODO: + """ + + def __init__(self, system_id = None): + xmlreader.InputSource.__init__(self, system_id=system_id) + self.content_type = None + + +class StringInputSource(InputSource, object): + """ + TODO: + """ + + def __init__(self, value, system_id=None): + super(StringInputSource, self).__init__(system_id) + stream = StringIO(value) + self.setByteStream(stream) + # TODO: + # encoding = value.encoding + # self.setEncoding(encoding) + + +headers = { + 'Accept': 'application/rdf+xml,text/rdf+n3;q=0.9,application/xhtml+xml;q=0.5', + 'User-agent': 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__ + } + + +class URLInputSource(InputSource, object): + """ + TODO: + """ + + def __init__(self, system_id=None): + super(URLInputSource, self).__init__(system_id) + self.url = system_id + # So that we send the headers we want to... + req = Request(system_id, None, headers) + + file = urlopen(req) + self.content_type = file.info().get('content-type') + self.content_type = self.content_type.split(";", 1)[0] + self.setByteStream(file) + # TODO: self.setEncoding(encoding) + + def __repr__(self): + return self.url + + +class FileInputSource(InputSource, object): + """ + TODO: + """ + + def __init__(self, file): + base = urljoin("file:", pathname2url(os.getcwd())) + system_id = URIRef(file.name, base=base) + super(FileInputSource, self).__init__(system_id) + self.file = file + self.setByteStream(file) + # TODO: self.setEncoding(encoding) + + def __repr__(self): + return `self.file` + + +def create_input_source(source=None, publicID=None, + location=None, file=None, data=None): + """ + Return an appropriate InputSource instance for the given + parameters. + """ + + # TODO: test that exactly one of source, location, file, and data + # is not None. + + input_source = None + + if source is not None: + if isinstance(source, InputSource): + input_source = source + else: + if isinstance(source, _StringTypes): + location = source + elif hasattr(source, "read") and not isinstance(source, Namespace): + f = source + input_source = InputSource() + input_source.setByteStream(f) + if hasattr(f, "name"): + input_source.setSystemId(f.name) + else: + raise Exception("Unexpected type '%s' for source '%s'" % (type(source), source)) + + if location is not None: + base = urljoin("file:", "%s/" % pathname2url(os.getcwd())) + absolute_location = URIRef(location, base=base).defrag() + if absolute_location.startswith("file:///"): + filename = url2pathname(absolute_location.replace("file:///", "/")) + file = __builtin__.file(filename, "rb") + else: + input_source = URLInputSource(absolute_location) + publicID = publicID or absolute_location + + if file is not None: + input_source = FileInputSource(file) + + if data is not None: + input_source = StringInputSource(data) + + if input_source is None: + raise Exception("could not create InputSource") + else: + if publicID: + input_source.setPublicId(publicID) + + # TODO: what motivated this bit? + id = input_source.getPublicId() + if id is None: + input_source.setPublicId("") + return input_source + + |