diff options
author | Nicholas Car <nicholas.car@surroundaustralia.com> | 2020-06-01 11:43:26 +1000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-01 11:43:26 +1000 |
commit | db4b66f36bd90db56bb9e072d549aa9bca4d8fed (patch) | |
tree | f51833d40f62351c8cc10d438e52ecc80c4604e3 | |
parent | 037ea51e5f4863a7f98ff59972fcd34d39a7ed97 (diff) | |
parent | b5fa8ec78ec6eba1df4530588c3568a8710b9cad (diff) | |
download | rdflib-db4b66f36bd90db56bb9e072d549aa9bca4d8fed.tar.gz |
Merge pull request #1046 from dwinston/autodetect-parse-format
Autodetect parse() format
-rw-r--r-- | rdflib/graph.py | 21 | ||||
-rw-r--r-- | rdflib/util.py | 11 | ||||
-rw-r--r-- | test/test_parse_file_guess_format.py | 32 |
3 files changed, 55 insertions, 9 deletions
diff --git a/rdflib/graph.py b/rdflib/graph.py index 12d18dce..145224b8 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -2,6 +2,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from xml.sax import SAXParseException + from rdflib.term import Literal # required for doctests from rdflib.namespace import Namespace # required for doctests @@ -21,6 +23,7 @@ from rdflib.parser import create_input_source from rdflib.namespace import NamespaceManager from rdflib.resource import Resource from rdflib.collection import Collection +import rdflib.util # avoid circular dependency import os import shutil @@ -31,6 +34,7 @@ from urllib.parse import urlparse assert Literal # avoid warning assert Namespace # avoid warning + logger = logging.getLogger(__name__) @@ -1066,13 +1070,24 @@ class Graph(Node): ) if format is None: format = source.content_type + assumed_xml = False if format is None: - # raise Exception("Could not determine format for %r. You can" + \ - # "expicitly specify one with the format argument." % source) - format = "application/rdf+xml" + if (hasattr(source, "file") + and getattr(source.file, "name", None) + and isinstance(source.file.name, str)): + format = rdflib.util.guess_format(source.file.name) + if format is None: + format = "application/rdf+xml" + assumed_xml = True parser = plugin.get(format, Parser)() try: parser.parse(source, self, **args) + except SAXParseException as saxpe: + if assumed_xml: + logger.warning( + "Could not guess format for %r, so assumed xml." + " You can explicitly specify format using the format argument." % source) + raise saxpe finally: if source.auto_close: source.close() diff --git a/rdflib/util.py b/rdflib/util.py index 57b20915..92996ec7 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -47,8 +47,7 @@ from rdflib.exceptions import ContextTypeError from rdflib.exceptions import ObjectTypeError from rdflib.exceptions import PredicateTypeError from rdflib.exceptions import SubjectTypeError -from rdflib.graph import Graph -from rdflib.graph import QuotedGraph +import rdflib.graph # avoid circular dependency from rdflib.namespace import Namespace from rdflib.namespace import NamespaceManager from rdflib.term import BNode @@ -161,7 +160,7 @@ def from_n3(s, default=None, backend=None, nsm=None): >>> from rdflib import RDFS >>> from_n3('rdfs:label') == RDFS['label'] True - >>> nsm = NamespaceManager(Graph()) + >>> nsm = NamespaceManager(rdflib.graph.Graph()) >>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/') >>> berlin = URIRef('http://dbpedia.org/resource/Berlin') >>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin @@ -207,16 +206,16 @@ def from_n3(s, default=None, backend=None, nsm=None): return Literal(int(s)) elif s.startswith("{"): identifier = from_n3(s[1:-1]) - return QuotedGraph(backend, identifier) + return rdflib.graph.QuotedGraph(backend, identifier) elif s.startswith("["): identifier = from_n3(s[1:-1]) - return Graph(backend, identifier) + return rdflib.graph.Graph(backend, identifier) elif s.startswith("_:"): return BNode(s[2:]) elif ":" in s: if nsm is None: # instantiate default NamespaceManager and rely on its defaults - nsm = NamespaceManager(Graph()) + nsm = NamespaceManager(rdflib.graph.Graph()) prefix, last_part = s.split(":", 1) ns = dict(nsm.namespaces())[prefix] return Namespace(ns)[last_part] diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py new file mode 100644 index 00000000..abb039df --- /dev/null +++ b/test/test_parse_file_guess_format.py @@ -0,0 +1,32 @@ +import unittest +from pathlib import Path +from shutil import copyfile +from tempfile import TemporaryDirectory + +from xml.sax import SAXParseException + +from rdflib import Graph, logger as graph_logger + + +class FileParserGuessFormatTest(unittest.TestCase): + def test_ttl(self): + g = Graph() + self.assertIsInstance(g.parse("test/w3c/turtle/IRI_subject.ttl"), Graph) + + def test_n3(self): + g = Graph() + self.assertIsInstance(g.parse("test/n3/example-lots_of_graphs.n3"), Graph) + + def test_warning(self): + g = Graph() + with TemporaryDirectory() as tmpdirname: + newpath = Path(tmpdirname).joinpath("no_file_ext") + copyfile("test/w3c/turtle/IRI_subject.ttl", str(newpath)) + with self.assertLogs(graph_logger, "WARNING") as log_cm: + with self.assertRaises(SAXParseException): + g.parse(str(newpath)) + self.assertTrue(any("Could not guess format" in msg for msg in log_cm.output)) + + +if __name__ == '__main__': + unittest.main() |