diff options
author | Carlos Garnacho <carlosg@gnome.org> | 2023-01-22 14:47:06 +0100 |
---|---|---|
committer | Carlos Garnacho <carlosg@gnome.org> | 2023-01-23 15:50:06 +0100 |
commit | 3b0fd169216c7a18e5dfa65d9ba340f3e38f880b (patch) | |
tree | c5df75ea3013c4d7c8d73381c096ccfabb74ebb9 | |
parent | cfc4f6a89ba5e07c7bd8e3d836f43bf17b32b68f (diff) | |
download | tracker-3b0fd169216c7a18e5dfa65d9ba340f3e38f880b.tar.gz |
core: Fix possible FTS integrity corruption
From experimenting with FTS5, our usage patterns, and the 'integrity-check'
FTS command, it turns out the 'delete' command does not entirely like doing
partial updates on a row, it instead expects the command to be issued for
all table columns.
The only way to keep the FTS index happy is that we issue FTS deletes/inserts
for all columns in the FTS table whenever we update a rowid, and that the
select query on the external table "fts_view" is as-is for the given rowid.
For that we must move the COALESCE() that filters out rows with all-empty
text columns from the FTS updating queries to the fts_view view itself.
In order to automatically fix possible corruptions in existing databases,
and update to the fts_view change, we must trigger all our sorts of FTS
updates: A pointless FTS field (nco:phoneNumber) was removed to trigger
an update on the fts view+table, and the parser version was bumped to ensure
the FTS index is rebuilt. These modifications will update existing databases
to fix any existing corruption.
Closes: https://gitlab.gnome.org/GNOME/tracker-miners/-/issues/253
-rw-r--r-- | src/libtracker-common/tracker-parser.h | 2 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-data-update.c | 38 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-db-interface-sqlite.c | 10 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-fts.c | 10 | ||||
-rw-r--r-- | src/ontologies/nepomuk/32-nco.ontology | 6 |
5 files changed, 40 insertions, 26 deletions
diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h index 7e1708b81..3c8271503 100644 --- a/src/libtracker-common/tracker-parser.h +++ b/src/libtracker-common/tracker-parser.h @@ -30,7 +30,7 @@ * at runtime, the former must be rebuilt for those to match perfectly * to avoid returning meaningless results on FTS searches. */ -#define TRACKER_PARSER_VERSION 4 +#define TRACKER_PARSER_VERSION 5 G_BEGIN_DECLS diff --git a/src/libtracker-sparql/core/tracker-data-update.c b/src/libtracker-sparql/core/tracker-data-update.c index 8f5ba7106..aef138b6f 100644 --- a/src/libtracker-sparql/core/tracker-data-update.c +++ b/src/libtracker-sparql/core/tracker-data-update.c @@ -1507,6 +1507,29 @@ static void resource_buffer_free (TrackerDataUpdateBufferResource *resource) g_slice_free (TrackerDataUpdateBufferResource, resource); } +GPtrArray * +get_fts_properties (TrackerData *data) +{ + TrackerOntologies *ontologies; + TrackerProperty **properties; + guint n_props, i; + GPtrArray *result; + + ontologies = tracker_data_manager_get_ontologies (data->manager); + properties = tracker_ontologies_get_properties (ontologies, &n_props); + + result = g_ptr_array_sized_new (8); + + for (i = 0; i < n_props; i++) { + if (tracker_property_get_fulltext_indexed (properties[i])) + g_ptr_array_add (result, (gpointer) tracker_property_get_name (properties[i])); + } + + g_ptr_array_add (result, NULL); + + return result; +} + void tracker_data_update_buffer_flush (TrackerData *data, GError **error) @@ -1517,7 +1540,6 @@ tracker_data_update_buffer_flush (TrackerData *data, GHashTableIter iter; GError *actual_error = NULL; const gchar *database; - GList *l; guint i; if (data->update_buffer.update_log->len == 0) @@ -1534,13 +1556,8 @@ tracker_data_update_buffer_flush (TrackerData *data, GPtrArray *properties; gboolean retval; - properties = g_ptr_array_sized_new (8); database = resource->graph->graph ? resource->graph->graph : "main"; - - for (l = resource->fts_properties; l; l = l->next) - g_ptr_array_add (properties, (gpointer) tracker_property_get_name (l->data)); - - g_ptr_array_add (properties, NULL); + properties = get_fts_properties (data); retval = tracker_db_interface_sqlite_fts_delete_text (iface, database, @@ -1567,13 +1584,8 @@ tracker_data_update_buffer_flush (TrackerData *data, GPtrArray *properties; gboolean retval; - properties = g_ptr_array_sized_new (8); database = resource->graph->graph ? resource->graph->graph : "main"; - - for (l = resource->fts_properties; l; l = l->next) - g_ptr_array_add (properties, (gpointer) tracker_property_get_name (l->data)); - - g_ptr_array_add (properties, NULL); + properties = get_fts_properties (data); retval = tracker_db_interface_sqlite_fts_update_text (iface, database, diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c index 13ff41d84..580775e19 100644 --- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c +++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c @@ -2297,12 +2297,11 @@ tracker_db_interface_sqlite_fts_create_update_query (TrackerDBInterface *db_int } query = g_strdup_printf ("INSERT INTO \"%s\".fts5 (ROWID, %s) " - "SELECT ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ? AND COALESCE(%s, NULL) IS NOT NULL", + "SELECT ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ?", database, props_str->str, props_str->str, - database, - props_str->str); + database); g_string_free (props_str, TRUE); return query; @@ -2362,12 +2361,11 @@ tracker_db_interface_sqlite_fts_create_delete_query (TrackerDBInterface *db_int } query = g_strdup_printf ("INSERT INTO \"%s\".fts5 (fts5, ROWID, %s) " - "SELECT 'delete', ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ? AND COALESCE(%s, NULL) IS NOT NULL", + "SELECT 'delete', ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ?", database, props_str->str, props_str->str, - database, - props_str->str); + database); g_string_free (props_str, TRUE); return query; diff --git a/src/libtracker-sparql/core/tracker-fts.c b/src/libtracker-sparql/core/tracker-fts.c index 8398b6e7a..8b0a9052c 100644 --- a/src/libtracker-sparql/core/tracker-fts.c +++ b/src/libtracker-sparql/core/tracker-fts.c @@ -80,7 +80,7 @@ tracker_fts_create_table (sqlite3 *db, GHashTable *grouped_columns, GError **error) { - GString *str, *from, *fts; + GString *str, *from, *fts, *column_names; gchar *index_table; GList *columns, *keys, *l; gint rc; @@ -99,6 +99,8 @@ tracker_fts_create_table (sqlite3 *db, g_string_append_printf (fts, "\"%s\".%s USING fts5(content=\"fts_view\", ", database, table_name); + column_names = g_string_new (NULL); + keys = g_hash_table_get_keys (tables); keys = g_list_sort (keys, (GCompareFunc) strcmp); @@ -120,7 +122,7 @@ tracker_fts_create_table (sqlite3 *db, g_string_append_printf (str, " AS \"%s\" ", (gchar *) columns->data); - g_string_append_printf (fts, "\"%s\", ", + g_string_append_printf (column_names, "\"%s\", ", (gchar *) columns->data); columns = columns->next; @@ -133,6 +135,8 @@ tracker_fts_create_table (sqlite3 *db, g_list_free (keys); + g_string_append_printf (from, "WHERE COALESCE (%s NULL) IS NOT NULL ", + column_names->str); g_string_append (from, "GROUP BY ROWID"); g_string_append (str, from->str); g_string_free (from, TRUE); @@ -143,6 +147,7 @@ tracker_fts_create_table (sqlite3 *db, if (rc != SQLITE_OK) goto error; + g_string_append (fts, column_names->str); g_string_append (fts, "tokenize=TrackerTokenizer)"); rc = sqlite3_exec(db, fts->str, NULL, NULL, NULL); @@ -158,6 +163,7 @@ tracker_fts_create_table (sqlite3 *db, error: g_string_free (fts, TRUE); + g_string_free (column_names, TRUE); if (rc != SQLITE_OK) { g_set_error (error, diff --git a/src/ontologies/nepomuk/32-nco.ontology b/src/ontologies/nepomuk/32-nco.ontology index f0d1286f1..8f0d8dd15 100644 --- a/src/ontologies/nepomuk/32-nco.ontology +++ b/src/ontologies/nepomuk/32-nco.ontology @@ -9,7 +9,7 @@ nco: a nrl:Namespace, nrl:Ontology ; nrl:prefix "nco" ; - nrl:lastModified "2016-02-28T21:30:00Z" . + nrl:lastModified "2023-01-23T11:30:00Z" . nco:Role a rdfs:Class ; rdfs:label "Role" ; @@ -340,9 +340,7 @@ nco:phoneNumber a rdf:Property ; nrl:maxCardinality 1 ; rdfs:domain nco:PhoneNumber ; rdfs:range xsd:string ; - nrl:indexed true ; - nrl:fulltextIndexed true ; - nrl:weight 5 . + nrl:indexed true . nco:nickname a rdf:Property ; rdfs:label "nickname" ; |