diff options
author | Sam Thursfield <sam@afuera.me.uk> | 2016-04-07 17:30:56 +0100 |
---|---|---|
committer | Sam Thursfield <sam@afuera.me.uk> | 2016-06-30 02:30:12 +0100 |
commit | 98df5c61bcc1c395708527a881509b3dab513df4 (patch) | |
tree | dc3a832bb6fb5278f0e5d3dcbd4c39369c1e14ed | |
parent | 185ac59b6a9b8691e7ef89424daa0ef6e2111910 (diff) | |
download | tracker-98df5c61bcc1c395708527a881509b3dab513df4.tar.gz |
Add support to extractors for outputting metadata as JSON-LDwip/sam/resource-rebase-2
This adds a new dependency on the JSON-GLib library.
https://bugzilla.gnome.org/show_bug.cgi?id=767472
-rw-r--r-- | configure.ac | 6 | ||||
-rw-r--r-- | docs/manpages/tracker-extract.1 | 2 | ||||
-rw-r--r-- | src/libtracker-common/tracker-enums.h | 6 | ||||
-rw-r--r-- | src/libtracker-sparql/tracker-resource.c | 136 | ||||
-rw-r--r-- | src/libtracker-sparql/tracker-resource.h | 2 | ||||
-rw-r--r-- | src/tracker-extract/tracker-extract.c | 21 | ||||
-rw-r--r-- | src/tracker-extract/tracker-extract.h | 7 | ||||
-rw-r--r-- | src/tracker-extract/tracker-main.c | 4 | ||||
-rw-r--r-- | src/tracker/tracker-extract.c | 14 |
9 files changed, 184 insertions, 14 deletions
diff --git a/configure.ac b/configure.ac index f24d08428..4b861c543 100644 --- a/configure.ac +++ b/configure.ac @@ -232,6 +232,7 @@ GSTREAMER_REQUIRED=0.10.31 GUPNP_DLNA_REQUIRED=0.9.4 LIBPNG_REQUIRED=0.89 LIBMEDIAART_REQUIRED=1.9.0 +JSON_GLIB_REQUIRED=1.0.4 # 3.6.11 for sqlite_backup API # 3.6.16 to fix test failures @@ -317,8 +318,9 @@ LIBTRACKER_CONTROL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED PKG_CHECK_MODULES(LIBTRACKER_CONTROL, [$LIBTRACKER_CONTROL_REQUIRED]) # Check requirements for libtracker-sparql -LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED - gio-unix-2.0 >= $GLIB_REQUIRED +LIBTRACKER_SPARQL_REQUIRED="glib-2.0 >= $GLIB_REQUIRED + gio-unix-2.0 >= $GLIB_REQUIRED + json-glib-1.0 >= $JSON_GLIB_REQUIRED uuid" PKG_CHECK_MODULES(LIBTRACKER_SPARQL, [$LIBTRACKER_SPARQL_REQUIRED]) diff --git a/docs/manpages/tracker-extract.1 b/docs/manpages/tracker-extract.1 index 27b630be0..eedf5213f 100644 --- a/docs/manpages/tracker-extract.1 +++ b/docs/manpages/tracker-extract.1 @@ -71,7 +71,7 @@ The possible \fILEVEL\fR options are: .TP .B \-o, \-\-output-format\fR=<\fIFORMAT\fR> Choose which format to use to output results. Supported formats are -\fIsparql\fR and \fIturtle\fR. +\fIsparql\fR, \fIturtle\fR and \fIjson-ld\fR. .SH EXAMPLES .TP diff --git a/src/libtracker-common/tracker-enums.h b/src/libtracker-common/tracker-enums.h index 2be97c174..f3e2bbd53 100644 --- a/src/libtracker-common/tracker-enums.h +++ b/src/libtracker-common/tracker-enums.h @@ -38,6 +38,12 @@ typedef enum { typedef enum { TRACKER_SERIALIZATION_FORMAT_SPARQL, TRACKER_SERIALIZATION_FORMAT_TURTLE, + /* JSON and JSON_LD are treated as the same thing right now, but we could + * treat them differently if we wanted. also it's nice to be able to pass + * both 'json' and 'json-ld' to `tracker extract --output-format=`. + */ + TRACKER_SERIALIZATION_FORMAT_JSON, + TRACKER_SERIALIZATION_FORMAT_JSON_LD, } TrackerSerializationFormat; G_END_DECLS diff --git a/src/libtracker-sparql/tracker-resource.c b/src/libtracker-sparql/tracker-resource.c index f3a88b6ba..fd509e206 100644 --- a/src/libtracker-sparql/tracker-resource.c +++ b/src/libtracker-sparql/tracker-resource.c @@ -18,6 +18,7 @@ */ #include <glib.h> +#include <json-glib/json-glib.h> #include <string.h> @@ -1051,3 +1052,138 @@ tracker_resource_generate_sparql_update (TrackerResource *resource, g_list_free (context.done_list); } + +typedef struct { + JsonBuilder *builder; + GList *done_list; +} GenerateJsonldData; + +static void generate_jsonld_foreach (gpointer key, gpointer value_ptr, gpointer user_data); + +static void +tracker_resource_generate_jsonld (TrackerResource *self, + GenerateJsonldData *data) +{ + /* FIXME: generate a JSON-LD context ! */ + + TrackerResourcePrivate *priv = GET_PRIVATE (self); + JsonBuilder *builder = data->builder; + JsonNode *result; + + json_builder_begin_object (builder); + + /* The JSON-LD spec says it is "important that nodes have an identifier", but + * doesn't mandate one. I think it's better to omit the ID for blank nodes + * (where the caller passed NULL as an identifier) than to emit something + * SPARQL-specific like '_:123'. + */ + if (strncmp (priv->identifier, "_:", 2) != 0) { + json_builder_set_member_name (builder, "@id"); + json_builder_add_string_value (builder, priv->identifier); + } + + g_hash_table_foreach (priv->properties, generate_jsonld_foreach, data); + + json_builder_end_object (builder); +}; + +static void +generate_jsonld_value (const GValue *value, + GenerateJsonldData *data) +{ + JsonNode *node; + + if (G_VALUE_HOLDS (value, TRACKER_TYPE_RESOURCE)) { + TrackerResource *resource; + + resource = TRACKER_RESOURCE (g_value_get_object (value)); + + if (g_list_find_custom (data->done_list, resource, (GCompareFunc) tracker_resource_compare) == NULL) { + tracker_resource_generate_jsonld (resource, data); + + data->done_list = g_list_prepend (data->done_list, resource); + } else { + json_builder_add_string_value (data->builder, tracker_resource_get_identifier(resource)); + } + } else if (G_VALUE_HOLDS (value, TRACKER_TYPE_URI)) { + /* URIs can be treated the same as strings in JSON-LD provided the @context + * sets the type of that property correctly. However, json_node_set_value() + * will reject a GValue holding TRACKER_TYPE_URI, so we have to extract the + * string manually here. + */ + const char *uri = g_value_get_string (value); + node = json_node_new (JSON_NODE_VALUE); + json_node_set_string (node, uri); + json_builder_add_value (data->builder, node); + } else { + node = json_node_new (JSON_NODE_VALUE); + json_node_set_value (node, value); + json_builder_add_value (data->builder, node); + } +} + +static void +generate_jsonld_foreach (gpointer key, + gpointer value_ptr, + gpointer user_data) +{ + const char *property = key; + const GValue *value = value_ptr; + GenerateJsonldData *data = user_data; + JsonBuilder *builder = data->builder; + + if (strcmp (property, "rdf:type") == 0) { + property = "@type"; + } + + json_builder_set_member_name (builder, property); + if (G_VALUE_HOLDS (value, G_TYPE_PTR_ARRAY)) { + json_builder_begin_array (builder); + g_ptr_array_foreach (g_value_get_boxed (value), (GFunc) generate_jsonld_value, data); + json_builder_end_array (builder); + } else { + generate_jsonld_value (value, data); + } +} + +/** + * tracker_resource_print_jsonld: + * @resource: a #TrackerResource + * @error: address where an error can be returned + * + * Serialize all the information in @resource as a JSON-LD document. + * + * See <http://www.jsonld.org/> for more information on the JSON-LD + * serialization format. + * + * Returns: a newly-allocated string + * + * Since: 1.10 + */ +char * +tracker_resource_print_jsonld (TrackerResource *resource) +{ + GenerateJsonldData context; + JsonNode *json_root_node; + JsonGenerator *generator; + char *result; + + context.done_list = NULL; + context.builder = json_builder_new (); + + tracker_resource_generate_jsonld (resource, &context); + json_root_node = json_builder_get_root (context.builder); + + generator = json_generator_new (); + json_generator_set_root (generator, json_root_node); + json_generator_set_pretty (generator, TRUE); + + result = json_generator_to_data (generator, NULL); + + g_list_free (context.done_list); + json_node_free (json_root_node); + g_object_unref (context.builder); + g_object_unref (generator); + + return result; +} diff --git a/src/libtracker-sparql/tracker-resource.h b/src/libtracker-sparql/tracker-resource.h index d17fdd252..9e74784c1 100644 --- a/src/libtracker-sparql/tracker-resource.h +++ b/src/libtracker-sparql/tracker-resource.h @@ -77,6 +77,8 @@ char *tracker_resource_print_turtle(TrackerResource *self, TrackerNamespaceManag void tracker_resource_generate_sparql_update (TrackerResource *self, TrackerSparqlBuilder *builder, TrackerNamespaceManager *namespaces, const char *graph_id); +char *tracker_resource_print_jsonld (TrackerResource *self); + G_END_DECLS #endif /* __LIBTRACKER_RESOURCE_H__ */ diff --git a/src/tracker-extract/tracker-extract.c b/src/tracker-extract/tracker-extract.c index f12f9bef1..1180588b9 100644 --- a/src/tracker-extract/tracker-extract.c +++ b/src/tracker-extract/tracker-extract.c @@ -738,9 +738,10 @@ tracker_extract_get_media_art_process (TrackerExtract *extract) #endif void -tracker_extract_get_metadata_by_cmdline (TrackerExtract *object, - const gchar *uri, - const gchar *mime) +tracker_extract_get_metadata_by_cmdline (TrackerExtract *object, + const gchar *uri, + const gchar *mime, + TrackerSerializationFormat output_format) { GError *error = NULL; TrackerExtractPrivate *priv; @@ -815,6 +816,20 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract *object, g_print ("%s\n", turtle); g_free (turtle); } + } else { + /* JSON-LD extraction */ + char *json; + + /* If this was going into the tracker-store we'd generate a unique ID + * here, so that the data persisted across file renames. + */ + tracker_resource_set_identifier (resource, uri); + + json = tracker_resource_print_jsonld (resource); + if (json) { + g_print ("%s\n", json); + g_free (json); + } } tracker_extract_info_unref (info); diff --git a/src/tracker-extract/tracker-extract.h b/src/tracker-extract/tracker-extract.h index 50fa8c378..882c601fc 100644 --- a/src/tracker-extract/tracker-extract.h +++ b/src/tracker-extract/tracker-extract.h @@ -79,9 +79,10 @@ void tracker_extract_dbus_start (TrackerExtract void tracker_extract_dbus_stop (TrackerExtract *extract); /* Not DBus API */ -void tracker_extract_get_metadata_by_cmdline (TrackerExtract *object, - const gchar *path, - const gchar *mime); +void tracker_extract_get_metadata_by_cmdline (TrackerExtract *object, + const gchar *path, + const gchar *mime, + TrackerSerializationFormat output_format); G_END_DECLS diff --git a/src/tracker-extract/tracker-main.c b/src/tracker-extract/tracker-main.c index bf6456d26..1aba0bded 100644 --- a/src/tracker-extract/tracker-main.c +++ b/src/tracker-extract/tracker-main.c @@ -96,7 +96,7 @@ static GOptionEntry entries[] = { N_("Force a module to be used for extraction (e.g. \"foo\" for \"foo.so\")"), N_("MODULE") }, { "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format_name, - N_("Output results format: 'sparql', or 'turtle'"), + N_("Output results format: 'sparql', 'turtle' or 'json'"), N_("FORMAT") }, { "version", 'V', 0, G_OPTION_ARG_NONE, &version, @@ -289,7 +289,7 @@ run_standalone (TrackerConfig *config) return EXIT_FAILURE; } - tracker_extract_get_metadata_by_cmdline (object, uri, mime_type); + tracker_extract_get_metadata_by_cmdline (object, uri, mime_type, output_format); g_object_unref (object); g_object_unref (file); diff --git a/src/tracker/tracker-extract.c b/src/tracker/tracker-extract.c index d4979f3fc..af219a55a 100644 --- a/src/tracker/tracker-extract.c +++ b/src/tracker/tracker-extract.c @@ -31,6 +31,7 @@ #include "tracker-extract.h" static gchar *verbosity; +static gchar *output_format = "turtle"; static gchar **filenames; #define EXTRACT_OPTIONS_ENABLED() \ @@ -40,6 +41,9 @@ static GOptionEntry entries[] = { { "verbosity", 'v', 0, G_OPTION_ARG_STRING, &verbosity, N_("Sets the logging verbosity to LEVEL ('debug', 'detailed', 'minimal', 'errors') for all processes"), N_("LEVEL") }, + { "output-format", 'o', 0, G_OPTION_ARG_STRING, &output_format, + N_("Output results format: 'sparql', 'turtle' or 'json-ld'"), + N_("FORMAT") }, { G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, &filenames, N_("FILE"), N_("FILE") }, @@ -48,7 +52,8 @@ static GOptionEntry entries[] = { static gint -extract_files (TrackerVerbosity verbosity) +extract_files (TrackerVerbosity verbosity, + char *output_format) { char **p; char *tracker_extract_path; @@ -60,7 +65,10 @@ extract_files (TrackerVerbosity verbosity) tracker_extract_path = g_build_filename(LIBEXECDIR, "tracker-extract", NULL); for (p = filenames; *p; p++) { - char *argv[] = {tracker_extract_path, "--verbosity", verbosity_str, "--file", *p, NULL}; + char *argv[] = {tracker_extract_path, + "--output-format", output_format, + "--verbosity", verbosity_str, + "--file", *p, NULL }; g_spawn_sync(NULL, argv, NULL, G_SPAWN_DEFAULT, NULL, NULL, NULL, NULL, NULL, &error); @@ -99,7 +107,7 @@ extract_run (void) } } - return extract_files (verbosity_level); + return extract_files (verbosity_level, output_format); } static int |