From f0406061a98ed4be4a12ff597f58650d49e1075f Mon Sep 17 00:00:00 2001 From: Carlos Garnacho Date: Wed, 24 Mar 2021 12:20:55 +0100 Subject: libtracker-sparql/direct: Use TrackerBatch beneath update_array_async When using update_array_async(), we attempt to process the entire set of updates as a transaction, that involves dealing with it as a single SPARQL string (i.e. concatenated in a separate copy in memory). Since this single huge string may be duplicated for other purposes in libtracker-data internals (e.g. unescaping \u and \U sequences), the impact of dealing with it as a single string can get worse. Use TrackerBatch underneath instead, this means queries are treated individually for parsing purposes, these possible string duplications happen over these smaller chunks, and memory does not peak as much with large sets of updates. Locally, this reduced the peak heap usage from 95MB to 85MB in tracker-miner-fs-3 when dealing with document metadata coming from tracker-extract-3. --- src/libtracker-sparql/direct/tracker-direct.c | 45 ++++++++++++++++----------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/src/libtracker-sparql/direct/tracker-direct.c b/src/libtracker-sparql/direct/tracker-direct.c index ff4642a9a..7f5131deb 100644 --- a/src/libtracker-sparql/direct/tracker-direct.c +++ b/src/libtracker-sparql/direct/tracker-direct.c @@ -847,6 +847,26 @@ tracker_direct_connection_update_finish (TrackerSparqlConnection *self, g_propagate_error (error, _translate_internal_error (inner_error)); } +static void +on_batch_finished (GObject *source, + GAsyncResult *result, + gpointer user_data) +{ + TrackerBatch *batch = TRACKER_BATCH (source); + GTask *task = user_data; + GError *error = NULL; + gboolean retval; + + retval = tracker_batch_execute_finish (batch, result, &error); + + if (retval) + g_task_return_boolean (task, TRUE); + else + g_task_return_error (task, error); + + g_object_unref (task); +} + static void tracker_direct_connection_update_array_async (TrackerSparqlConnection *self, gchar **updates, @@ -855,29 +875,18 @@ tracker_direct_connection_update_array_async (TrackerSparqlConnection *self, GAsyncReadyCallback callback, gpointer user_data) { - TrackerDirectConnectionPrivate *priv; - TrackerDirectConnection *conn; - TaskData *task_data; + TrackerBatch *batch; GTask *task; - gchar *concatenated; - gchar **array_copy; - - conn = TRACKER_DIRECT_CONNECTION (self); - priv = tracker_direct_connection_get_instance_private (conn); + gint i; - /* Make a NULL-terminated array and concatenate it */ - array_copy = g_new0 (gchar *, n_updates + 1); - memcpy (array_copy, updates, n_updates * sizeof (gchar *)); - concatenated = g_strjoinv ("\n", array_copy); - g_free (array_copy); + batch = tracker_sparql_connection_create_batch (self); - task_data = task_data_query_new (TASK_TYPE_UPDATE, concatenated, g_free); + for (i = 0; i < n_updates; i++) + tracker_batch_add_sparql (batch, updates[i]); task = g_task_new (self, cancellable, callback, user_data); - g_task_set_task_data (task, task_data, - (GDestroyNotify) task_data_free); - - g_thread_pool_push (priv->update_thread, task, NULL); + tracker_batch_execute_async (batch, cancellable, on_batch_finished, task); + g_object_unref (batch); } static gboolean -- cgit v1.2.1