summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Car <nicholas.car@surroundaustralia.com>2020-06-01 11:43:26 +1000
committerGitHub <noreply@github.com>2020-06-01 11:43:26 +1000
commitdb4b66f36bd90db56bb9e072d549aa9bca4d8fed (patch)
treef51833d40f62351c8cc10d438e52ecc80c4604e3
parent037ea51e5f4863a7f98ff59972fcd34d39a7ed97 (diff)
parentb5fa8ec78ec6eba1df4530588c3568a8710b9cad (diff)
downloadrdflib-db4b66f36bd90db56bb9e072d549aa9bca4d8fed.tar.gz
Merge pull request #1046 from dwinston/autodetect-parse-format
Autodetect parse() format
-rw-r--r--rdflib/graph.py21
-rw-r--r--rdflib/util.py11
-rw-r--r--test/test_parse_file_guess_format.py32
3 files changed, 55 insertions, 9 deletions
diff --git a/rdflib/graph.py b/rdflib/graph.py
index 12d18dce..145224b8 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -2,6 +2,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+from xml.sax import SAXParseException
+
from rdflib.term import Literal # required for doctests
from rdflib.namespace import Namespace # required for doctests
@@ -21,6 +23,7 @@ from rdflib.parser import create_input_source
from rdflib.namespace import NamespaceManager
from rdflib.resource import Resource
from rdflib.collection import Collection
+import rdflib.util # avoid circular dependency
import os
import shutil
@@ -31,6 +34,7 @@ from urllib.parse import urlparse
assert Literal # avoid warning
assert Namespace # avoid warning
+
logger = logging.getLogger(__name__)
@@ -1066,13 +1070,24 @@ class Graph(Node):
)
if format is None:
format = source.content_type
+ assumed_xml = False
if format is None:
- # raise Exception("Could not determine format for %r. You can" + \
- # "expicitly specify one with the format argument." % source)
- format = "application/rdf+xml"
+ if (hasattr(source, "file")
+ and getattr(source.file, "name", None)
+ and isinstance(source.file.name, str)):
+ format = rdflib.util.guess_format(source.file.name)
+ if format is None:
+ format = "application/rdf+xml"
+ assumed_xml = True
parser = plugin.get(format, Parser)()
try:
parser.parse(source, self, **args)
+ except SAXParseException as saxpe:
+ if assumed_xml:
+ logger.warning(
+ "Could not guess format for %r, so assumed xml."
+ " You can explicitly specify format using the format argument." % source)
+ raise saxpe
finally:
if source.auto_close:
source.close()
diff --git a/rdflib/util.py b/rdflib/util.py
index 57b20915..92996ec7 100644
--- a/rdflib/util.py
+++ b/rdflib/util.py
@@ -47,8 +47,7 @@ from rdflib.exceptions import ContextTypeError
from rdflib.exceptions import ObjectTypeError
from rdflib.exceptions import PredicateTypeError
from rdflib.exceptions import SubjectTypeError
-from rdflib.graph import Graph
-from rdflib.graph import QuotedGraph
+import rdflib.graph # avoid circular dependency
from rdflib.namespace import Namespace
from rdflib.namespace import NamespaceManager
from rdflib.term import BNode
@@ -161,7 +160,7 @@ def from_n3(s, default=None, backend=None, nsm=None):
>>> from rdflib import RDFS
>>> from_n3('rdfs:label') == RDFS['label']
True
- >>> nsm = NamespaceManager(Graph())
+ >>> nsm = NamespaceManager(rdflib.graph.Graph())
>>> nsm.bind('dbpedia', 'http://dbpedia.org/resource/')
>>> berlin = URIRef('http://dbpedia.org/resource/Berlin')
>>> from_n3('dbpedia:Berlin', nsm=nsm) == berlin
@@ -207,16 +206,16 @@ def from_n3(s, default=None, backend=None, nsm=None):
return Literal(int(s))
elif s.startswith("{"):
identifier = from_n3(s[1:-1])
- return QuotedGraph(backend, identifier)
+ return rdflib.graph.QuotedGraph(backend, identifier)
elif s.startswith("["):
identifier = from_n3(s[1:-1])
- return Graph(backend, identifier)
+ return rdflib.graph.Graph(backend, identifier)
elif s.startswith("_:"):
return BNode(s[2:])
elif ":" in s:
if nsm is None:
# instantiate default NamespaceManager and rely on its defaults
- nsm = NamespaceManager(Graph())
+ nsm = NamespaceManager(rdflib.graph.Graph())
prefix, last_part = s.split(":", 1)
ns = dict(nsm.namespaces())[prefix]
return Namespace(ns)[last_part]
diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py
new file mode 100644
index 00000000..abb039df
--- /dev/null
+++ b/test/test_parse_file_guess_format.py
@@ -0,0 +1,32 @@
+import unittest
+from pathlib import Path
+from shutil import copyfile
+from tempfile import TemporaryDirectory
+
+from xml.sax import SAXParseException
+
+from rdflib import Graph, logger as graph_logger
+
+
+class FileParserGuessFormatTest(unittest.TestCase):
+ def test_ttl(self):
+ g = Graph()
+ self.assertIsInstance(g.parse("test/w3c/turtle/IRI_subject.ttl"), Graph)
+
+ def test_n3(self):
+ g = Graph()
+ self.assertIsInstance(g.parse("test/n3/example-lots_of_graphs.n3"), Graph)
+
+ def test_warning(self):
+ g = Graph()
+ with TemporaryDirectory() as tmpdirname:
+ newpath = Path(tmpdirname).joinpath("no_file_ext")
+ copyfile("test/w3c/turtle/IRI_subject.ttl", str(newpath))
+ with self.assertLogs(graph_logger, "WARNING") as log_cm:
+ with self.assertRaises(SAXParseException):
+ g.parse(str(newpath))
+ self.assertTrue(any("Could not guess format" in msg for msg in log_cm.output))
+
+
+if __name__ == '__main__':
+ unittest.main()