diff options
author | Carlos Garnacho <carlosg@gnome.org> | 2022-03-05 22:22:04 +0000 |
---|---|---|
committer | Carlos Garnacho <carlosg@gnome.org> | 2022-03-05 22:22:04 +0000 |
commit | 2ec6eaf585ad63112fb3b4bba993fcfeb6afda4d (patch) | |
tree | 0e40937d16a38d366ca5418dec15644bac01a647 | |
parent | 3573dd0c24aacf71f10d17a81a7d1520b3b0c570 (diff) | |
parent | ebd4b50a28a8c32d981052d9f8283d24ab97d886 (diff) | |
download | tracker-2ec6eaf585ad63112fb3b4bba993fcfeb6afda4d.tar.gz |
Merge branch 'wip/carlosg/benchmark' into 'master'
Add benchmark utility
See merge request GNOME/tracker!491
-rw-r--r-- | src/libtracker-data/tracker-sparql.c | 10 | ||||
-rw-r--r-- | utils/benchmark/meson.build | 4 | ||||
-rw-r--r-- | utils/benchmark/tracker-benchmark.c | 430 | ||||
-rw-r--r-- | utils/meson.build | 1 |
4 files changed, 440 insertions, 5 deletions
diff --git a/src/libtracker-data/tracker-sparql.c b/src/libtracker-data/tracker-sparql.c index d02686360..2397336e7 100644 --- a/src/libtracker-data/tracker-sparql.c +++ b/src/libtracker-data/tracker-sparql.c @@ -9513,7 +9513,7 @@ static gboolean translate_BlankNode (TrackerSparql *sparql, GError **error) { - gint64 bnode_id = 0; + TrackerRowid bnode_id = 0; TrackerVariable *var; /* BlankNode ::= BLANK_NODE_LABEL | ANON @@ -9531,11 +9531,12 @@ translate_BlankNode (TrackerSparql *sparql, tracker_token_bnode_init (sparql->current_state->token, bnode_id); } else if (_accept (sparql, RULE_TYPE_TERMINAL, TERMINAL_TYPE_BLANK_NODE_LABEL)) { gchar *str; - gint64 *value; str = _dup_last_string (sparql); if (sparql->current_state->blank_node_map) { + TrackerRowid *value; + value = g_hash_table_lookup (sparql->current_state->blank_node_map, str); if (value) @@ -9548,10 +9549,9 @@ translate_BlankNode (TrackerSparql *sparql, if (bnode_id == 0) return FALSE; - value = g_new0 (gint64, 1); - *value = bnode_id; g_hash_table_insert (sparql->current_state->blank_node_map, - g_strdup (str), value); + g_strdup (str), + tracker_rowid_copy (&bnode_id)); } if (sparql->blank_nodes && diff --git a/utils/benchmark/meson.build b/utils/benchmark/meson.build new file mode 100644 index 000000000..134dc67cc --- /dev/null +++ b/utils/benchmark/meson.build @@ -0,0 +1,4 @@ +executable('tracker-benchmark', + 'tracker-benchmark.c', + dependencies: [tracker_sparql_dep], + install: false) diff --git a/utils/benchmark/tracker-benchmark.c b/utils/benchmark/tracker-benchmark.c new file mode 100644 index 000000000..aede7d303 --- /dev/null +++ b/utils/benchmark/tracker-benchmark.c @@ -0,0 +1,430 @@ +/* + * Copyright (C) 2022, Red Hat Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * Author: Carlos Garnacho <carlosg@gnome.org> + */ + +#include <libtracker-sparql/tracker-sparql.h> +#include <locale.h> +#include <glib.h> +#include <stdio.h> + +static gchar *database_path = NULL; +static gint batch_size = 5000; +static gint duration = 30; + +static GOptionEntry entries[] = { + { "database", 'p', 0, G_OPTION_ARG_FILENAME, &database_path, + "Location of the database", + "FILE" + }, + { "batch-size", 'b', 0, G_OPTION_ARG_INT, &batch_size, + "Update batch size", + "SIZE" + }, + { "duration", 'd', 0, G_OPTION_ARG_INT, &duration, + "Duration of individual benchmarks", + "DURATION" + }, + { NULL } +}; + +typedef gpointer (*DataCreateFunc) (void); + +typedef void (*BenchmarkFunc) (TrackerSparqlConnection *conn, + DataCreateFunc data_func, + double *elapsed, + int *elems, + double *min, + double *max); + +enum { + UNIT_SEC, + UNIT_MSEC, + UNIT_USEC, +}; + +static inline int +get_unit (gdouble value) +{ + /* Below msec, report in usecs */ + if (value < 0.001) + return UNIT_USEC; + else if (value < 1) + return UNIT_MSEC; + + return UNIT_SEC; +} + +static gdouble +transform_unit (gdouble value) +{ + int unit = get_unit (value); + + switch (unit) { + case UNIT_USEC: + return value * G_USEC_PER_SEC; + case UNIT_MSEC: + return value * 1000; + case UNIT_SEC: + return value; + default: + g_assert_not_reached (); + } +} + +static const gchar * +unit_string (gdouble value) +{ + int unit = get_unit (value); + + switch (unit) { + case UNIT_USEC: + return "usec"; + case UNIT_MSEC: + return "msec"; + case UNIT_SEC: + return "sec"; + default: + g_assert_not_reached (); + } +} + +static inline gpointer +create_resource (void) +{ + TrackerResource *resource; + + resource = tracker_resource_new (NULL); + tracker_resource_set_uri (resource, "rdf:type", "rdfs:Resource"); + + return resource; +} + +static inline gpointer +create_query (void) +{ + return g_strdup ("SELECT ?u { ?u a rdfs:Resource } limit 1"); +} + +static inline TrackerBatch * +create_batch (TrackerSparqlConnection *conn, + DataCreateFunc data_func, + gboolean sparql) +{ + TrackerBatch *batch; + TrackerResource *resource; + int i; + + batch = tracker_sparql_connection_create_batch (conn); + + for (i = 0; i < batch_size; i++) { + resource = data_func (); + + if (sparql) { + gchar *sparql; + + sparql = tracker_resource_print_sparql_update (resource, + tracker_sparql_connection_get_namespace_manager (conn), + NULL); + tracker_batch_add_sparql (batch, sparql); + g_free (sparql); + } else { + tracker_batch_add_resource (batch, NULL, resource); + } + + g_object_unref (resource); + } + + return batch; +} + +static int +consume_cursor (TrackerSparqlCursor *cursor) +{ + GError *error = NULL; + int magic = 0; + + while (tracker_sparql_cursor_next (cursor, NULL, &error)) { + const gchar *str; + + /* Some bit fiddling so the loop is not optimized out */ + str = tracker_sparql_cursor_get_string (cursor, 0, NULL); + magic ^= str[0] == 'h'; + } + + tracker_sparql_cursor_close (cursor); + + return magic; +} + +static void +benchmark_update_batch (TrackerSparqlConnection *conn, + DataCreateFunc data_func, + double *elapsed, + int *elems, + double *min, + double *max) +{ + GTimer *timer; + GError *error = NULL; + + timer = g_timer_new (); + + while (*elapsed < duration) { + TrackerBatch *batch; + double batch_elapsed; + + g_timer_reset (timer); + batch = create_batch (conn, data_func, FALSE); + tracker_batch_execute (batch, NULL, &error); + g_assert_no_error (error); + g_object_unref (batch); + + batch_elapsed = g_timer_elapsed (timer, NULL); + *min = MIN (*min, batch_elapsed); + *max = MAX (*max, batch_elapsed); + *elapsed += batch_elapsed; + *elems += 1; + } + + /* We count things by resources, not batches */ + *min /= batch_size; + *max /= batch_size; + *elems *= batch_size; + + g_timer_destroy (timer); +} + +static void +benchmark_update_sparql (TrackerSparqlConnection *conn, + DataCreateFunc data_func, + double *elapsed, + int *elems, + double *min, + double *max) +{ + GTimer *timer; + GError *error = NULL; + + timer = g_timer_new (); + + while (*elapsed < duration) { + TrackerBatch *batch; + double batch_elapsed; + + batch = create_batch (conn, data_func, TRUE); + tracker_batch_execute (batch, NULL, &error); + g_assert_no_error (error); + g_object_unref (batch); + + batch_elapsed = g_timer_elapsed (timer, NULL); + *min = MIN (*min, batch_elapsed); + *max = MAX (*max, batch_elapsed); + *elapsed += batch_elapsed; + *elems += 1; + g_timer_reset (timer); + } + + /* We count things by resources, not batches */ + *min /= batch_size; + *max /= batch_size; + *elems *= batch_size; + + g_timer_destroy (timer); +} + +static void +benchmark_query_statement (TrackerSparqlConnection *conn, + DataCreateFunc data_func, + double *elapsed, + int *elems, + double *min, + double *max) +{ + TrackerSparqlStatement *stmt; + GTimer *timer; + GError *error = NULL; + gchar *query; + + timer = g_timer_new (); + query = data_func (); + stmt = tracker_sparql_connection_query_statement (conn, query, + NULL, &error); + g_assert_no_error (error); + g_free (query); + + while (*elapsed < duration) { + TrackerSparqlCursor *cursor; + double query_elapsed; + + cursor = tracker_sparql_statement_execute (stmt, NULL, &error); + g_assert_no_error (error); + consume_cursor (cursor); + g_object_unref (cursor); + + query_elapsed = g_timer_elapsed (timer, NULL); + *min = MIN (*min, query_elapsed); + *max = MAX (*max, query_elapsed); + *elapsed += query_elapsed; + *elems += 1; + g_timer_reset (timer); + } + + g_object_unref (stmt); + g_timer_destroy (timer); +} + +static void +benchmark_query_sparql (TrackerSparqlConnection *conn, + DataCreateFunc data_func, + double *elapsed, + int *elems, + double *min, + double *max) +{ + GTimer *timer; + GError *error = NULL; + gchar *query; + + timer = g_timer_new (); + query = data_func (); + + while (*elapsed < duration) { + TrackerSparqlCursor *cursor; + double query_elapsed; + + cursor = tracker_sparql_connection_query (conn, query, + NULL, &error); + g_assert_no_error (error); + consume_cursor (cursor); + g_object_unref (cursor); + + query_elapsed = g_timer_elapsed (timer, NULL); + *min = MIN (*min, query_elapsed); + *max = MAX (*max, query_elapsed); + *elapsed += query_elapsed; + *elems += 1; + g_timer_reset (timer); + } + + g_timer_destroy (timer); + g_free (query); +} + +struct { + const gchar *desc; + BenchmarkFunc func; + DataCreateFunc data_func; +} benchmarks[] = { + { "Resource batch update (sync)", benchmark_update_batch, create_resource }, + { "SPARQL batch update (sync)", benchmark_update_sparql, create_resource }, + { "Prepared statement query (sync)", benchmark_query_statement, create_query }, + { "SPARQL query (sync)", benchmark_query_sparql, create_query }, +}; + +static void +run_benchmarks (TrackerSparqlConnection *conn) +{ + guint i; + guint max_len = 0; + + for (i = 0; i < G_N_ELEMENTS (benchmarks); i++) + max_len = MAX (max_len, strlen (benchmarks[i].desc)); + + g_print ("%*s\t\tElements\tElems/sec\tMin \tMax \tAvg\n", + max_len, "Test"); + + for (i = 0; i < G_N_ELEMENTS (benchmarks); i++) { + double elapsed = 0, min = G_MAXDOUBLE, max = -G_MAXDOUBLE, adjusted, avg; + int elems = 0; + + benchmarks[i].func (conn, benchmarks[i].data_func, + &elapsed, &elems, &min, &max); + + if (elapsed > duration) { + /* To avoid explaining how long did the benchmark + * actually take to run. Adjust the output to the + * specified time limit. + */ + adjusted = elems * ((double) duration / elapsed); + } else { + adjusted = elems; + } + + avg = elapsed / elems; + g_print ("%*s\t\t%.3f\t%.3f\t%.3f %s\t%.3f %s\t%3.3f %s\n", + max_len, benchmarks[i].desc, + adjusted, + elems / elapsed, + transform_unit (min), unit_string (min), + transform_unit (max), unit_string (max), + transform_unit (avg), unit_string (avg)); + } +} + +int +main (int argc, char *argv[]) +{ + TrackerSparqlConnection *conn; + GOptionContext *context; + GError *error = NULL; + GFile *db = NULL; + + setlocale (LC_ALL, ""); + + context = g_option_context_new (NULL); + g_option_context_add_main_entries (context, entries, NULL); + + if (!g_option_context_parse (context, &argc, (char***) &argv, &error)) { + g_printerr ("%s, %s\n", "Unrecognized options", error->message); + g_error_free (error); + g_option_context_free (context); + return EXIT_FAILURE; + } + + g_option_context_free (context); + + g_print ("Batch size: %d, Individual test duration: %d sec\n", + batch_size, duration); + + if (database_path) { + if (g_file_test (database_path, G_FILE_TEST_EXISTS)) { + g_printerr ("Database path '%s' already exists", database_path); + return EXIT_FAILURE; + } + + g_print ("Opening file database at '%s'…\n", + database_path); + db = g_file_new_for_commandline_arg (database_path); + } else { + g_print ("Opening in-memory database…\n"); + } + + conn = tracker_sparql_connection_new (0, db, + tracker_sparql_get_ontology_nepomuk(), + NULL, &error); + g_assert_no_error (error); + + run_benchmarks (conn); + + g_object_unref (conn); + g_clear_object (&db); + + return EXIT_SUCCESS; +} diff --git a/utils/meson.build b/utils/meson.build index d571a582b..a13458910 100644 --- a/utils/meson.build +++ b/utils/meson.build @@ -1,3 +1,4 @@ +subdir('benchmark') subdir('mtp') subdir('tracker-resdump') subdir('trackertestutils') |