summaryrefslogtreecommitdiff
path: root/rdflib/tools/defined_namespace_creator.py
diff options
context:
space:
mode:
Diffstat (limited to 'rdflib/tools/defined_namespace_creator.py')
-rw-r--r--rdflib/tools/defined_namespace_creator.py182
1 files changed, 182 insertions, 0 deletions
diff --git a/rdflib/tools/defined_namespace_creator.py b/rdflib/tools/defined_namespace_creator.py
new file mode 100644
index 00000000..8336fa17
--- /dev/null
+++ b/rdflib/tools/defined_namespace_creator.py
@@ -0,0 +1,182 @@
+"""
+This rdflib Python script creates a DefinedNamespace Python file from a given RDF file
+
+It is a very simple script: it finds all things defined in the RDF file within a given
+namespace:
+
+ <thing> a ?x
+
+ where ?x is anything and <thing> starts with the given namespace
+
+Nicholas J. Car, Dec, 2021
+"""
+import sys
+from pathlib import Path
+import argparse
+import datetime
+
+sys.path.append(str(Path(__file__).parent.absolute().parent.parent))
+
+from rdflib import Graph, util
+from rdflib.namespace import DCTERMS, OWL, RDFS, SKOS
+
+
+def get_input_format(file_path):
+ input_format = util.guess_format(str(file_path))
+ if input_format is None:
+ str_path = str(file_path)
+ if str_path.endswith("json-ld") or str_path.endswith("jsonld"):
+ input_format = "json-ld"
+ else:
+ raise Exception(
+ "ERROR: Cannot guess the RDF format of input file {}".format(
+ file_path)
+ )
+
+ return input_format
+
+
+def validate_namespace(namespace):
+ if not namespace.endswith(("/", "#")):
+ raise ValueError("The supplied namespace must end with '/' or '#'")
+
+
+def validate_object_id(object_id):
+ for c in object_id:
+ if not c.isupper():
+ raise ValueError("The supplied object_id must be an all-capitals string")
+
+
+def get_classes(g, target_namespace):
+ namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS}
+ q = """
+ SELECT DISTINCT ?x ?def
+ WHERE {
+ # anything that is an instance of owl:Class or rdfs:Class
+ # or any subclass of them
+ VALUES ?c { owl:Class rdfs:Class }
+ ?x rdfs:subClassOf*/a ?c .
+
+ # get any definitions, if they have one
+ OPTIONAL {
+ ?x rdfs:comment|dcterms:description|skos:definition ?def
+ }
+
+ # only get results for the targetted namespace (supplied by user)
+ FILTER STRSTARTS(STR(?x), "xxx")
+ }
+ """.replace("xxx", target_namespace)
+ classes = []
+ for r in g.query(q, initNs=namespaces):
+ classes.append((str(r[0]), str(r[1])))
+
+ classes.sort(key=lambda tup: tup[1])
+
+ return classes
+
+
+def get_target_namespace_elements(g, target_namespace):
+ namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS}
+ q = """
+ SELECT DISTINCT ?s ?def
+ WHERE {
+ # all things in the RDF data (anything RDF.type...)
+ ?s a ?o .
+
+ # get any definitions, if they have one
+ OPTIONAL {
+ ?s dcterms:description|rdfs:comment|skos:definition ?def
+ }
+
+ # only get results for the target namespace (supplied by user)
+ FILTER STRSTARTS(STR(?s), "xxx")
+ }
+ """.replace("xxx", target_namespace)
+ elements = []
+ for r in g.query(q, initNs=namespaces):
+ elements.append((str(r[0]), str(r[1])))
+
+ elements.sort(key=lambda tup: tup[0])
+
+ elements_strs = []
+ for e in elements:
+ desc = e[1].replace('\n', ' ')
+ elements_strs.append(f" {e[0].replace(args.target_namespace, '')}: URIRef # {desc}\n")
+
+ return elements, elements_strs
+
+
+def make_dn_file(output_file_name, target_namespace, elements_strs, object_id, fail):
+ header = f'''from rdflib.term import URIRef
+from rdflib.namespace import DefinedNamespace, Namespace
+
+
+class {object_id}(DefinedNamespace):
+ """
+ DESCRIPTION_EDIT_ME_!
+
+ Generated from: SOURCE_RDF_FILE_EDIT_ME_!
+ Date: {datetime.datetime.utcnow()}
+ """
+'''
+ with open(output_file_name, "w") as f:
+ f.write(header)
+ f.write("\n")
+ f.write(f' _NS = Namespace("{target_namespace}")')
+ f.write("\n\n")
+ if fail:
+ f.write(" _fail = True")
+ f.write("\n\n")
+ f.writelines(elements_strs)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument(
+ "ontology_file",
+ type=str,
+ help="Path to the RDF ontology to extract a DefinedNamespace from.",
+ )
+
+ parser.add_argument(
+ "target_namespace",
+ type=str,
+ help="The namespace within the ontology that you want to create a DefinedNamespace for.",
+ )
+
+ parser.add_argument(
+ "object_id",
+ type=str,
+ help="The RDFlib object ID of the DefinedNamespace, e.g. GEO for GeoSPARQL.",
+ )
+
+ parser.add_argument(
+ '-f', "--fail",
+ dest='fail',
+ action='store_true',
+ help="Whether (true) or not (false) to mimic ClosedNamespace and fail on non-element use"
+ )
+ parser.add_argument('--no-fail', dest='fail', action='store_false')
+ parser.set_defaults(feature=False)
+
+ args = parser.parse_args()
+
+ g = Graph().parse(args.ontology_file, format=get_input_format(args.ontology_file))
+
+ validate_namespace(args.target_namespace)
+
+ validate_object_id(args.object_id)
+
+ print(f"Creating DefinedNamespace file {args.object_id} for {args.target_namespace}...")
+ print(f"Ontology with {len(g)} triples loaded...")
+
+ print("Getting all namespace elements...")
+ elements = get_target_namespace_elements(g, args.target_namespace)
+
+ output_file_name = Path().cwd() / f"_{args.object_id}.py"
+ print(f"Creating DefinedNamespace Python file {output_file_name}")
+ make_dn_file(output_file_name, args.target_namespace, elements[1], args.object_id, args.fail)
+
+
+