summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2022-07-11 20:25:12 +0200
committerCarlos Garnacho <carlosg@gnome.org>2023-01-10 13:02:31 +0100
commitddf6665bccd51343362124dd0b250988d64cb940 (patch)
tree96963a0f96c359cfd3dc473d20f53024f0d4650f
parent026b123bf488d510d8ce14d6cdb8aa195204b182 (diff)
downloadtracker-ddf6665bccd51343362124dd0b250988d64cb940.tar.gz
libtracker-sparql: Add JSON-LD serializer
This object converts a RDF TrackerSparqlCursor into a document in the JSON-LD format. This serializer makes an effort in buffering and producing compact JSON-LD (e.g. prefer nested objects), but the threshold is not too high in order to avoid too much memory overhead when serializing huge RDF datasets.
-rw-r--r--src/libtracker-sparql/meson.build1
-rw-r--r--src/libtracker-sparql/tracker-serializer-json-ld.c416
-rw-r--r--src/libtracker-sparql/tracker-serializer-json-ld.h36
3 files changed, 453 insertions, 0 deletions
diff --git a/src/libtracker-sparql/meson.build b/src/libtracker-sparql/meson.build
index 756ffb8ae..e5207b071 100644
--- a/src/libtracker-sparql/meson.build
+++ b/src/libtracker-sparql/meson.build
@@ -40,6 +40,7 @@ libtracker_sparql_sources = files(
'tracker-statement.c',
'tracker-serializer.c',
'tracker-serializer-json.c',
+ 'tracker-serializer-json-ld.c',
'tracker-serializer-trig.c',
'tracker-serializer-turtle.c',
'tracker-serializer-xml.c',
diff --git a/src/libtracker-sparql/tracker-serializer-json-ld.c b/src/libtracker-sparql/tracker-serializer-json-ld.c
new file mode 100644
index 000000000..cb7162831
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-json-ld.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2020, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg@gnome.org>
+ */
+
+/* Serialization of cursors to the JSON-LD format defined at:
+ * https://www.w3.org/TR/json-ld/
+ */
+
+#include "config.h"
+
+#include "tracker-serializer-json-ld.h"
+
+#include <json-glib/json-glib.h>
+
+struct _TrackerSerializerJsonLD
+{
+ TrackerSerializer parent_instance;
+ JsonGenerator *generator;
+ JsonBuilder *builder;
+ GHashTable *resources;
+ GList *recent_resources;
+ GString *data;
+ GPtrArray *vars;
+ gchar *cur_graph;
+ gchar *cur_subject;
+ JsonObject *cur_resource;
+ guint stream_closed : 1;
+ guint cursor_started : 1;
+ guint cursor_finished : 1;
+ guint context_printed : 1;
+ guint needs_separator : 1;
+};
+
+G_DEFINE_TYPE (TrackerSerializerJsonLD, tracker_serializer_json_ld,
+ TRACKER_TYPE_SERIALIZER)
+
+#define MAX_CACHED_RESOURCES 50
+
+static void
+tracker_serializer_json_ld_finalize (GObject *object)
+{
+ TrackerSerializerJsonLD *serializer_json_ld =
+ TRACKER_SERIALIZER_JSON_LD (object);
+
+ g_input_stream_close (G_INPUT_STREAM (object), NULL, NULL);
+ g_clear_pointer (&serializer_json_ld->cur_graph, g_free);
+ g_clear_pointer (&serializer_json_ld->cur_subject, g_free);
+
+ G_OBJECT_CLASS (tracker_serializer_json_ld_parent_class)->finalize (object);
+}
+
+static void
+generate_namespace_foreach (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ JsonBuilder *builder = user_data;;
+
+ json_builder_set_member_name (builder, key);
+ json_builder_add_string_value (builder, value);
+}
+
+static void
+finish_objects (TrackerSerializerJsonLD *serializer_json_ld)
+{
+ GHashTableIter iter;
+ JsonNode *node;
+
+ g_hash_table_iter_init (&iter, serializer_json_ld->resources);
+
+ while (g_hash_table_iter_next (&iter, NULL, (gpointer*) &node)) {
+ if (g_list_find (serializer_json_ld->recent_resources, node)) {
+ if (serializer_json_ld->needs_separator)
+ g_string_append (serializer_json_ld->data, ",\n");
+
+ json_generator_set_root (serializer_json_ld->generator, node);
+ json_generator_to_gstring (serializer_json_ld->generator,
+ serializer_json_ld->data);
+ serializer_json_ld->needs_separator = TRUE;
+ }
+
+ g_hash_table_iter_remove (&iter);
+ }
+
+ g_clear_list (&serializer_json_ld->recent_resources, NULL);
+}
+
+static gboolean
+serialize_up_to_position (TrackerSerializerJsonLD *serializer_json_ld,
+ gsize pos,
+ GCancellable *cancellable,
+ GError **error)
+{
+ TrackerSparqlCursor *cursor;
+ TrackerNamespaceManager *namespaces;
+ GError *inner_error = NULL;
+ JsonNode *node;
+ gboolean check_finish = FALSE;
+
+ if (!serializer_json_ld->data)
+ serializer_json_ld->data = g_string_new (NULL);
+ if (!serializer_json_ld->generator)
+ serializer_json_ld->generator = json_generator_new ();
+ if (!serializer_json_ld->builder)
+ serializer_json_ld->builder = json_builder_new ();
+ if (!serializer_json_ld->vars)
+ serializer_json_ld->vars = g_ptr_array_new_with_free_func (g_free);
+
+ if (!serializer_json_ld->resources) {
+ serializer_json_ld->resources = g_hash_table_new_full (g_str_hash, g_str_equal,
+ g_free, (GDestroyNotify) json_node_unref);
+ }
+
+ if (pos < serializer_json_ld->data->len)
+ return TRUE;
+
+ cursor = tracker_serializer_get_cursor (TRACKER_SERIALIZER (serializer_json_ld));
+ namespaces = tracker_serializer_get_namespaces (TRACKER_SERIALIZER (serializer_json_ld));
+
+ if (!serializer_json_ld->cursor_started)
+ g_string_append (serializer_json_ld->data, "{");
+
+ while (!serializer_json_ld->cursor_finished) {
+ const gchar *graph, *subject, *predicate;
+ gboolean graph_changed, subject_changed;
+ TrackerSparqlValueType object_type;
+ JsonNode *value = NULL;
+ gchar *prop = NULL;
+
+ if (!tracker_sparql_cursor_next (cursor, cancellable, &inner_error)) {
+ if (inner_error) {
+ g_propagate_error (error, inner_error);
+ return FALSE;
+ } else {
+ finish_objects (serializer_json_ld);
+ serializer_json_ld->cursor_finished = TRUE;
+ if (serializer_json_ld->cur_graph)
+ g_string_append (serializer_json_ld->data, "]}");
+
+ g_string_append (serializer_json_ld->data, "]}");
+
+ return TRUE;
+ }
+ }
+
+ subject = tracker_sparql_cursor_get_string (cursor, 0, NULL);
+ predicate = tracker_sparql_cursor_get_string (cursor, 1, NULL);
+ object_type = tracker_sparql_cursor_get_value_type (cursor, 2);
+ graph = tracker_sparql_cursor_get_string (cursor, 3, NULL);
+
+ graph_changed = g_strcmp0 (graph, serializer_json_ld->cur_graph) != 0;
+ subject_changed = g_strcmp0 (subject, serializer_json_ld->cur_subject) != 0;
+
+ if (!serializer_json_ld->cursor_started) {
+ JsonBuilder *builder;
+
+ builder = json_builder_new ();
+ json_builder_begin_object (builder);
+ tracker_namespace_manager_foreach (namespaces,
+ generate_namespace_foreach,
+ builder);
+ json_builder_end_object (builder);
+
+ node = json_builder_get_root (builder);
+ json_generator_set_root (serializer_json_ld->generator, node);
+ g_object_unref (builder);
+ json_node_unref (node);
+
+ g_string_append (serializer_json_ld->data, "\"@context\":");
+ json_generator_to_gstring (serializer_json_ld->generator,
+ serializer_json_ld->data);
+
+ g_string_append (serializer_json_ld->data,
+ ",\"@graph\":[");
+
+ graph_changed = subject_changed = TRUE;
+ }
+
+ if (graph_changed) {
+ /* New/different graph */
+ if (serializer_json_ld->cursor_started && graph_changed) {
+ finish_objects (serializer_json_ld);
+ if (serializer_json_ld->cur_graph)
+ g_string_append (serializer_json_ld->data, "]}");
+
+ g_string_append (serializer_json_ld->data, ",\n");
+ }
+
+ if (graph) {
+ g_string_append_printf (serializer_json_ld->data,
+ "{\"@id\":\"%s\",\"@graph\":[",
+ graph);
+ }
+
+ g_clear_pointer (&serializer_json_ld->cur_graph, g_free);
+ serializer_json_ld->cur_graph = g_strdup (graph);
+ serializer_json_ld->needs_separator = FALSE;
+ }
+
+ if (subject_changed || graph_changed) {
+ JsonNode *node;
+ JsonObject *object;
+
+ /* New/different subject */
+ if (serializer_json_ld->cursor_started && subject_changed)
+ check_finish = TRUE;
+
+ if (g_hash_table_size (serializer_json_ld->resources) > MAX_CACHED_RESOURCES)
+ finish_objects (serializer_json_ld);
+
+ node = g_hash_table_lookup (serializer_json_ld->resources, subject);
+
+ if (!node) {
+ node = json_node_new (JSON_NODE_OBJECT);
+ object = json_object_new ();
+ json_object_set_string_member (object, "@id", subject);
+ json_node_set_object (node, object);
+ json_object_unref (object);
+
+ g_hash_table_insert (serializer_json_ld->resources, g_strdup (subject), node);
+ serializer_json_ld->recent_resources =
+ g_list_prepend (serializer_json_ld->recent_resources, node);
+ } else {
+ object = json_node_get_object (node);
+ }
+
+ serializer_json_ld->cur_resource = object;
+
+ g_clear_pointer (&serializer_json_ld->cur_subject, g_free);
+ serializer_json_ld->cur_subject = g_strdup (subject);
+ }
+
+ if (g_strcmp0 (predicate, TRACKER_PREFIX_RDF "type") == 0) {
+ const gchar *type;
+ gchar *compressed;
+
+ type = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+ compressed = tracker_namespace_manager_compress_uri (namespaces, type);
+
+ prop = g_strdup ("@type");
+
+ value = json_node_alloc ();
+ json_node_init_string (value, compressed);
+ g_free (compressed);
+ } else {
+ TrackerNamespaceManager *namespaces;
+ const gchar *res;
+
+ namespaces = tracker_serializer_get_namespaces (TRACKER_SERIALIZER (serializer_json_ld));
+ prop = tracker_namespace_manager_compress_uri (namespaces, predicate);
+
+ switch (object_type) {
+ case TRACKER_SPARQL_VALUE_TYPE_URI:
+ case TRACKER_SPARQL_VALUE_TYPE_BLANK_NODE:
+ res = tracker_sparql_cursor_get_string (cursor, 2, NULL);
+
+ node = g_hash_table_lookup (serializer_json_ld->resources,
+ res);
+
+ if (node &&
+ serializer_json_ld->cur_resource != json_node_get_object (node) &&
+ g_list_find (serializer_json_ld->recent_resources, node)) {
+ /* This is still a "root" node, make it part of this tree */
+ serializer_json_ld->recent_resources =
+ g_list_remove (serializer_json_ld->recent_resources, node);
+ value = json_node_ref (node);
+ } else {
+ JsonObject *object;
+
+ /* Unknown object, or one already referenced elsewhere */
+ value = json_node_new (JSON_NODE_OBJECT);
+ object = json_object_new ();
+ json_object_set_string_member (object, "@id", res);
+ json_node_set_object (value, object);
+ json_object_unref (object);
+ }
+ break;
+ case TRACKER_SPARQL_VALUE_TYPE_STRING:
+ case TRACKER_SPARQL_VALUE_TYPE_DATETIME:
+ value = json_node_alloc ();
+ json_node_init_string (value, tracker_sparql_cursor_get_string (cursor, 2, NULL));
+ break;
+ case TRACKER_SPARQL_VALUE_TYPE_INTEGER:
+ value = json_node_alloc ();
+ json_node_init_int (value, tracker_sparql_cursor_get_integer (cursor, 2));
+ break;
+ case TRACKER_SPARQL_VALUE_TYPE_DOUBLE:
+ value = json_node_alloc ();
+ json_node_init_double (value, tracker_sparql_cursor_get_double (cursor, 2));
+ break;
+ case TRACKER_SPARQL_VALUE_TYPE_BOOLEAN:
+ value = json_node_alloc ();
+ json_node_init_boolean (value, tracker_sparql_cursor_get_boolean (cursor, 2));
+ break;
+ case TRACKER_SPARQL_VALUE_TYPE_UNBOUND:
+ break;
+ }
+ }
+
+ if (prop && value) {
+ JsonNode *prev;
+ JsonArray *array;
+
+ prev = json_object_get_member (serializer_json_ld->cur_resource,
+ prop);
+
+ if (!prev) {
+ json_object_set_member (serializer_json_ld->cur_resource,
+ prop, value);
+ } else if (JSON_NODE_HOLDS_ARRAY (prev)) {
+ array = json_node_get_array (prev);
+ json_array_add_element (array, value);
+ } else if (!json_node_equal (prev, value)) {
+ array = json_array_new ();
+ json_array_add_element (array, json_node_ref (prev));
+ json_array_add_element (array, value);
+
+ json_object_set_array_member (serializer_json_ld->cur_resource,
+ prop, array);
+ }
+ }
+
+ g_free (prop);
+ serializer_json_ld->cursor_started = TRUE;
+
+ if (check_finish && serializer_json_ld->data->len > pos)
+ break;
+ }
+
+ return TRUE;
+}
+
+static gssize
+tracker_serializer_json_ld_read (GInputStream *istream,
+ gpointer buffer,
+ gsize count,
+ GCancellable *cancellable,
+ GError **error)
+{
+ TrackerSerializerJsonLD *serializer_json_ld = TRACKER_SERIALIZER_JSON_LD (istream);
+ gsize bytes_copied;
+
+ if (serializer_json_ld->stream_closed ||
+ (serializer_json_ld->cursor_finished &&
+ serializer_json_ld->data->len == 0))
+ return 0;
+
+ if (!serialize_up_to_position (serializer_json_ld,
+ count,
+ cancellable,
+ error))
+ return -1;
+
+ bytes_copied = MIN (count, serializer_json_ld->data->len);
+
+ memcpy (buffer,
+ serializer_json_ld->data->str,
+ bytes_copied);
+ g_string_erase (serializer_json_ld->data, 0, bytes_copied);
+
+ return bytes_copied;
+}
+
+static gboolean
+tracker_serializer_json_ld_close (GInputStream *istream,
+ GCancellable *cancellable,
+ GError **error)
+{
+ TrackerSerializerJsonLD *serializer_json_ld = TRACKER_SERIALIZER_JSON_LD (istream);
+
+ if (serializer_json_ld->data) {
+ g_string_free (serializer_json_ld->data, TRUE);
+ serializer_json_ld->data = NULL;
+ }
+
+ g_clear_object (&serializer_json_ld->generator);
+ g_clear_object (&serializer_json_ld->builder);
+ serializer_json_ld->stream_closed = TRUE;
+ g_clear_pointer (&serializer_json_ld->vars, g_ptr_array_unref);
+
+ return TRUE;
+}
+
+static void
+tracker_serializer_json_ld_class_init (TrackerSerializerJsonLDClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+ GInputStreamClass *istream_class = G_INPUT_STREAM_CLASS (klass);
+
+ object_class->finalize = tracker_serializer_json_ld_finalize;
+
+ istream_class->read_fn = tracker_serializer_json_ld_read;
+ istream_class->close_fn = tracker_serializer_json_ld_close;
+}
+
+static void
+tracker_serializer_json_ld_init (TrackerSerializerJsonLD *serializer)
+{
+}
diff --git a/src/libtracker-sparql/tracker-serializer-json-ld.h b/src/libtracker-sparql/tracker-serializer-json-ld.h
new file mode 100644
index 000000000..13325e00f
--- /dev/null
+++ b/src/libtracker-sparql/tracker-serializer-json-ld.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2022, Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Carlos Garnacho <carlosg@gnome.org>
+ */
+
+#ifndef TRACKER_SERIALIZER_JSON_LD_H
+#define TRACKER_SERIALIZER_JSON_LD_H
+
+#include <libtracker-sparql/tracker-sparql.h>
+#include <libtracker-sparql/tracker-private.h>
+#include <libtracker-sparql/tracker-serializer.h>
+
+#define TRACKER_TYPE_SERIALIZER_JSON_LD (tracker_serializer_json_ld_get_type())
+
+G_DECLARE_FINAL_TYPE (TrackerSerializerJsonLD,
+ tracker_serializer_json_ld,
+ TRACKER, SERIALIZER_JSON_LD,
+ TrackerSerializer)
+
+#endif /* TRACKER_SERIALIZER_JSON_LD_H */