summaryrefslogtreecommitdiff
path: root/rdflib/parser.py
diff options
context:
space:
mode:
authoreikeon <devnull@localhost>2009-03-08 00:47:10 +0000
committereikeon <devnull@localhost>2009-03-08 00:47:10 +0000
commit33526fbac41d9bf099d9a76bf60d82a9a2ac6343 (patch)
tree48f6280adfd62ee2ddb5ec742e6db7c88d0ffee9 /rdflib/parser.py
parent9d48823418ef54fe1339a9e9f53a951b2bdd9d14 (diff)
downloadrdflib-33526fbac41d9bf099d9a76bf60d82a9a2ac6343.tar.gz
merged in parser related changes from 3.0 branch; fixing pep8 module name issues for URLInputSource and company along the way
Diffstat (limited to 'rdflib/parser.py')
-rw-r--r--rdflib/parser.py162
1 files changed, 162 insertions, 0 deletions
diff --git a/rdflib/parser.py b/rdflib/parser.py
new file mode 100644
index 00000000..b84e5f44
--- /dev/null
+++ b/rdflib/parser.py
@@ -0,0 +1,162 @@
+"""
+This module defines the parser plugin interface and contains other
+related parser support code.
+
+The module is mainly useful for those wanting to write a parser that
+can plugin to rdflib. If you are wanting to invoke a parser you likely
+want to do so through the Graph class parse method.
+
+"""
+
+import os
+import __builtin__
+import warnings
+from urllib import pathname2url, url2pathname
+from urllib2 import urlopen, Request
+from urlparse import urljoin
+from StringIO import StringIO
+from xml.sax import xmlreader
+from xml.sax.saxutils import prepare_input_source
+import types
+try:
+ _StringTypes = (types.StringType, types.UnicodeType)
+except AttributeError:
+ _StringTypes = (types.StringType,)
+
+from rdflib import __version__
+from rdflib.term import URIRef, Namespace
+
+
+class Parser(object):
+
+ def __init__(self):
+ pass
+
+ def parse(self, source, sink):
+ pass
+
+
+class InputSource(xmlreader.InputSource):
+ """
+ TODO:
+ """
+
+ def __init__(self, system_id = None):
+ xmlreader.InputSource.__init__(self, system_id=system_id)
+ self.content_type = None
+
+
+class StringInputSource(InputSource, object):
+ """
+ TODO:
+ """
+
+ def __init__(self, value, system_id=None):
+ super(StringInputSource, self).__init__(system_id)
+ stream = StringIO(value)
+ self.setByteStream(stream)
+ # TODO:
+ # encoding = value.encoding
+ # self.setEncoding(encoding)
+
+
+headers = {
+ 'Accept': 'application/rdf+xml,text/rdf+n3;q=0.9,application/xhtml+xml;q=0.5',
+ 'User-agent': 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__
+ }
+
+
+class URLInputSource(InputSource, object):
+ """
+ TODO:
+ """
+
+ def __init__(self, system_id=None):
+ super(URLInputSource, self).__init__(system_id)
+ self.url = system_id
+ # So that we send the headers we want to...
+ req = Request(system_id, None, headers)
+
+ file = urlopen(req)
+ self.content_type = file.info().get('content-type')
+ self.content_type = self.content_type.split(";", 1)[0]
+ self.setByteStream(file)
+ # TODO: self.setEncoding(encoding)
+
+ def __repr__(self):
+ return self.url
+
+
+class FileInputSource(InputSource, object):
+ """
+ TODO:
+ """
+
+ def __init__(self, file):
+ base = urljoin("file:", pathname2url(os.getcwd()))
+ system_id = URIRef(file.name, base=base)
+ super(FileInputSource, self).__init__(system_id)
+ self.file = file
+ self.setByteStream(file)
+ # TODO: self.setEncoding(encoding)
+
+ def __repr__(self):
+ return `self.file`
+
+
+def create_input_source(source=None, publicID=None,
+ location=None, file=None, data=None):
+ """
+ Return an appropriate InputSource instance for the given
+ parameters.
+ """
+
+ # TODO: test that exactly one of source, location, file, and data
+ # is not None.
+
+ input_source = None
+
+ if source is not None:
+ if isinstance(source, InputSource):
+ input_source = source
+ else:
+ if isinstance(source, _StringTypes):
+ location = source
+ elif hasattr(source, "read") and not isinstance(source, Namespace):
+ f = source
+ input_source = InputSource()
+ input_source.setByteStream(f)
+ if hasattr(f, "name"):
+ input_source.setSystemId(f.name)
+ else:
+ raise Exception("Unexpected type '%s' for source '%s'" % (type(source), source))
+
+ if location is not None:
+ base = urljoin("file:", "%s/" % pathname2url(os.getcwd()))
+ absolute_location = URIRef(location, base=base).defrag()
+ if absolute_location.startswith("file:///"):
+ filename = url2pathname(absolute_location.replace("file:///", "/"))
+ file = __builtin__.file(filename, "rb")
+ else:
+ input_source = URLInputSource(absolute_location)
+ publicID = publicID or absolute_location
+
+ if file is not None:
+ input_source = FileInputSource(file)
+
+ if data is not None:
+ input_source = StringInputSource(data)
+
+ if input_source is None:
+ raise Exception("could not create InputSource")
+ else:
+ if publicID:
+ input_source.setPublicId(publicID)
+
+ # TODO: what motivated this bit?
+ id = input_source.getPublicId()
+ if id is None:
+ input_source.setPublicId("")
+ return input_source
+
+