From d1ab445f54cd355875f53106fb477cedc8dbaac0 Mon Sep 17 00:00:00 2001 From: Ivan Herman Date: Thu, 18 Dec 2014 16:13:24 +0100 Subject: Second attempt to update the new microdata parser, now with the correct (master) base. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous attempt went wrong because I started with a wrong branch:-( Jörn rebased it, and I re-did the __init__.py file from scratch. --- rdflib/plugins/parsers/pyMicrodata/__init__.py | 91 +++++++++++++------------- rdflib/plugins/parsers/structureddata.py | 6 +- 2 files changed, 48 insertions(+), 49 deletions(-) diff --git a/rdflib/plugins/parsers/pyMicrodata/__init__.py b/rdflib/plugins/parsers/pyMicrodata/__init__.py index 60e87f33..6b7b2b7b 100644 --- a/rdflib/plugins/parsers/pyMicrodata/__init__.py +++ b/rdflib/plugins/parsers/pyMicrodata/__init__.py @@ -2,7 +2,7 @@ """ This module implements the microdata->RDF algorithm, as documented by the U{W3C Semantic Web Interest Group -Note}. +Note}. The module can be used via a stand-alone script (an example is part of the distribution) or bound to a CGI script as a Web Service. An example CGI script is also added to the distribution. Both the local script and the distribution may have to be adapted to local circumstances. @@ -97,7 +97,7 @@ class MicrodataError(Exception) : def __init__(self, msg) : self.msg = msg Exception.__init__(self) - + class HTTPError(MicrodataError) : """Raised when HTTP problems are detected. It does not add any new functionality to the Exception class.""" @@ -133,48 +133,48 @@ class pyMicrodata : """ self.http_status = 200 self.base = base - + def _generate_error_graph(self, pgraph, full_msg, uri = None) : """ Generate an error message into the graph. This method is usually used reacting on exceptions. - + Later versions of pyMicrodata may have more detailed error conditions on which it wishes to react. At the moment, this is fairly crude... """ - if pgraph is None : + if pgraph == None : retval = Graph() else : retval = pgraph - - pgraph.bind("dc", "http://purl.org/dc/terms/") - pgraph.bind("xsd", 'http://www.w3.org/2001/XMLSchema#') - pgraph.bind("ht", 'http://www.w3.org/2006/http#') - pgraph.bind("pyMicrodata", 'http://www.w3.org/2012/pyMicrodata/vocab#') + + pgraph.bind( "dc","http://purl.org/dc/terms/" ) + pgraph.bind( "xsd",'http://www.w3.org/2001/XMLSchema#' ) + pgraph.bind( "ht",'http://www.w3.org/2006/http#' ) + pgraph.bind( "pyMicrodata",'http://www.w3.org/2012/pyMicrodata/vocab#' ) bnode = BNode() retval.add((bnode, ns_rdf["type"], ns_micro["Error"])) retval.add((bnode, ns_dc["description"], Literal(full_msg))) retval.add((bnode, ns_dc["date"], Literal(datetime.datetime.utcnow().isoformat(),datatype=ns_xsd["dateTime"]))) - - if uri is not None : + + if uri != None : htbnode = BNode() - retval.add((bnode, ns_micro["context"],htbnode)) - retval.add((htbnode, ns_rdf["type"], ns_ht["Request"])) - retval.add((htbnode, ns_ht["requestURI"], Literal(uri))) - - if self.http_status is not None and self.http_status != 200 : + retval.add( (bnode, ns_micro["context"],htbnode) ) + retval.add( (htbnode, ns_rdf["type"], ns_ht["Request"]) ) + retval.add( (htbnode, ns_ht["requestURI"], Literal(uri)) ) + + if self.http_status != None and self.http_status != 200: htbnode = BNode() - retval.add((bnode, ns_micro["context"],htbnode)) - retval.add((htbnode, ns_rdf["type"], ns_ht["Response"])) - retval.add((htbnode, ns_ht["responseCode"], URIRef("http://www.w3.org/2006/http#%s" % self.http_status))) + retval.add( (bnode, ns_micro["context"],htbnode) ) + retval.add( (htbnode, ns_rdf["type"], ns_ht["Response"]) ) + retval.add( (htbnode, ns_ht["responseCode"], URIRef("http://www.w3.org/2006/http#%s" % self.http_status)) ) return retval - + def _get_input(self, name) : """ Trying to guess whether "name" is a URI, a string; it then tries to open these as such accordingly, returning a file-like object. If name is a plain string then it returns the input argument (that should - be, supposedly, a file-like object already) + be, supposidly, a file-like object already) @param name: identifier of the input source @type name: string or a file-like object @return: a file like object if opening "name" is possible and successful, "name" otherwise @@ -194,11 +194,11 @@ class pyMicrodata : self.base = url_request.location return url_request.data else : - self.base = name - return file(name) + self.base = 'file://'+name + return open(name, 'rb') else : return name - + #################################################################################################################### # Externally used methods # @@ -212,19 +212,19 @@ class pyMicrodata : @return: an RDF Graph @rtype: rdflib Graph instance """ - if graph is None : + if graph == None : # Create the RDF Graph, that will contain the return triples... - graph = Graph() - + graph = Graph() + conversion = MicrodataConversion(dom.documentElement, graph, base = self.base) conversion.convert() return graph - + def graph_from_source(self, name, graph = None, rdfOutput = False) : """ Extract an RDF graph from an microdata source. The source is parsed, the RDF extracted, and the RDF Graph is returned. This is a front-end to the L{pyMicrodata.graph_from_DOM} method. - + @param name: a URI, a file name, or a file-like object @return: an RDF Graph @rtype: rdflib Graph instance @@ -246,7 +246,7 @@ class pyMicrodata : self.http_status = 500 if not rdfOutput : raise e return self._generate_error_graph(graph, str(e), uri=name) - + dom = None try : import warnings @@ -263,7 +263,7 @@ class pyMicrodata : e = sys.exc_info()[1] self.http_status = 400 if not rdfOutput : raise e - return self._generate_error_graph(graph, str(e), uri=name) + return self._generate_error_graph(graph, str(e), uri=name) except Exception : # Something nasty happened:-( @@ -274,7 +274,7 @@ class pyMicrodata : self.http_status = 500 if not rdfOutput : raise e return self._generate_error_graph(graph, str(e), uri=name) - + def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) : """ Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF @@ -291,7 +291,7 @@ class pyMicrodata : graph = Graph() for prefix in _bindings : - graph.bind(prefix, Namespace(_bindings[prefix])) + graph.bind(prefix,Namespace(_bindings[prefix])) # the value of rdfOutput determines the reaction on exceptions... for name in names : @@ -314,7 +314,7 @@ def processURI(uri, outputFormat, form) : """The standard processing of a microdata uri options in a form, ie, as an entry point from a CGI call. The call accepts extra form options (eg, HTTP GET options) as follows: - + @param uri: URI to access. Note that the "text:" and "uploaded:" values are treated separately; the former is for textual intput (in which case a StringIO is used to get the data) and the latter is for uploaded file, where the form gives access to the file directly. @param outputFormat: serialization formats, as understood by RDFLib. Note that though "turtle" is a possible parameter value, some versions of the RDFLib turtle generation does funny (though legal) things with @@ -326,23 +326,22 @@ def processURI(uri, outputFormat, form) : """ if uri == "uploaded:" : - input = form["uploaded"].file - base = "" + input = form["uploaded"].file + base = "" elif uri == "text:" : - input = StringIO(form.getfirst("text")) - base = "" + input = StringIO(form.getfirst("text")) + base = "" else : - input = uri - base = uri + input = uri + base = uri processor = pyMicrodata(base = base) # Decide the output format; the issue is what should happen in case of a top level error like an inaccessibility of # the html source: should a graph be returned or an HTML page with an error message? - # decide whether HTML or RDF should be sent. + # decide whether HTML or RDF should be sent. htmlOutput = False - #import os #if 'HTTP_ACCEPT' in os.environ : # acc = os.environ['HTTP_ACCEPT'] # possibilities = ['text/html', @@ -375,7 +374,7 @@ def processURI(uri, outputFormat, form) : import cgi h = sys.exc_info()[1] retval = 'Content-type: text/html; charset=utf-8\nStatus: %s \n\n' % h.http_code - retval += "\n" + retval += "\n" retval += "\n" retval += "HTTP Error in Microdata processing\n" retval += "\n" @@ -393,7 +392,7 @@ def processURI(uri, outputFormat, form) : import traceback, cgi retval = 'Content-type: text/html; charset=utf-8\nStatus: %s\n\n' % processor.http_status - retval += "\n" + retval += "\n" retval += "\n" retval += "Exception in Microdata processing\n" retval += "\n" @@ -406,7 +405,7 @@ def processURI(uri, outputFormat, form) : retval +="
%s
\n" % value retval +="

Distiller request details

\n" retval +="
\n" - if uri == "text:" and "text" in form and form["text"].value is not None and len(form["text"].value.strip()) != 0 : + if uri == "text:" and "text" in form and form["text"].value != None and len(form["text"].value.strip()) != 0 : retval +="
Text input:
%s
\n" % cgi.escape(form["text"].value).replace('\n','
') elif uri == "uploaded:" : retval +="
Uploaded file
\n" diff --git a/rdflib/plugins/parsers/structureddata.py b/rdflib/plugins/parsers/structureddata.py index e4058cd4..fe0529d8 100644 --- a/rdflib/plugins/parsers/structureddata.py +++ b/rdflib/plugins/parsers/structureddata.py @@ -26,7 +26,7 @@ try: except ImportError: import warnings warnings.warn( - 'html5lib not found! RDFa and Microdata ' + + 'html5lib not found! RDFa and Microdata the ' + 'parsers will not be available.') html5lib = False @@ -129,7 +129,7 @@ class RDFaParser(Parser): if html5lib is False: raise ImportError( - 'html5lib is not installed, cannot use ' + + 'html5lib is not installed, cannot use the ' + 'RDFa and Microdata parsers.') (baseURI, orig_source) = _get_orig_source(source) @@ -227,7 +227,7 @@ class MicrodataParser(Parser): """ if html5lib is False: raise ImportError( - 'html5lib is not installed, cannot use RDFa ' + + 'html5lib is not installed, cannot use the RDFa ' + 'and Microdata parsers.') (baseURI, orig_source) = _get_orig_source(source) -- cgit v1.2.1