summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2023-01-22 14:47:06 +0100
committerCarlos Garnacho <carlosg@gnome.org>2023-01-23 15:50:06 +0100
commit3b0fd169216c7a18e5dfa65d9ba340f3e38f880b (patch)
treec5df75ea3013c4d7c8d73381c096ccfabb74ebb9
parentcfc4f6a89ba5e07c7bd8e3d836f43bf17b32b68f (diff)
downloadtracker-3b0fd169216c7a18e5dfa65d9ba340f3e38f880b.tar.gz
core: Fix possible FTS integrity corruption
From experimenting with FTS5, our usage patterns, and the 'integrity-check' FTS command, it turns out the 'delete' command does not entirely like doing partial updates on a row, it instead expects the command to be issued for all table columns. The only way to keep the FTS index happy is that we issue FTS deletes/inserts for all columns in the FTS table whenever we update a rowid, and that the select query on the external table "fts_view" is as-is for the given rowid. For that we must move the COALESCE() that filters out rows with all-empty text columns from the FTS updating queries to the fts_view view itself. In order to automatically fix possible corruptions in existing databases, and update to the fts_view change, we must trigger all our sorts of FTS updates: A pointless FTS field (nco:phoneNumber) was removed to trigger an update on the fts view+table, and the parser version was bumped to ensure the FTS index is rebuilt. These modifications will update existing databases to fix any existing corruption. Closes: https://gitlab.gnome.org/GNOME/tracker-miners/-/issues/253
-rw-r--r--src/libtracker-common/tracker-parser.h2
-rw-r--r--src/libtracker-sparql/core/tracker-data-update.c38
-rw-r--r--src/libtracker-sparql/core/tracker-db-interface-sqlite.c10
-rw-r--r--src/libtracker-sparql/core/tracker-fts.c10
-rw-r--r--src/ontologies/nepomuk/32-nco.ontology6
5 files changed, 40 insertions, 26 deletions
diff --git a/src/libtracker-common/tracker-parser.h b/src/libtracker-common/tracker-parser.h
index 7e1708b81..3c8271503 100644
--- a/src/libtracker-common/tracker-parser.h
+++ b/src/libtracker-common/tracker-parser.h
@@ -30,7 +30,7 @@
* at runtime, the former must be rebuilt for those to match perfectly
* to avoid returning meaningless results on FTS searches.
*/
-#define TRACKER_PARSER_VERSION 4
+#define TRACKER_PARSER_VERSION 5
G_BEGIN_DECLS
diff --git a/src/libtracker-sparql/core/tracker-data-update.c b/src/libtracker-sparql/core/tracker-data-update.c
index 8f5ba7106..aef138b6f 100644
--- a/src/libtracker-sparql/core/tracker-data-update.c
+++ b/src/libtracker-sparql/core/tracker-data-update.c
@@ -1507,6 +1507,29 @@ static void resource_buffer_free (TrackerDataUpdateBufferResource *resource)
g_slice_free (TrackerDataUpdateBufferResource, resource);
}
+GPtrArray *
+get_fts_properties (TrackerData *data)
+{
+ TrackerOntologies *ontologies;
+ TrackerProperty **properties;
+ guint n_props, i;
+ GPtrArray *result;
+
+ ontologies = tracker_data_manager_get_ontologies (data->manager);
+ properties = tracker_ontologies_get_properties (ontologies, &n_props);
+
+ result = g_ptr_array_sized_new (8);
+
+ for (i = 0; i < n_props; i++) {
+ if (tracker_property_get_fulltext_indexed (properties[i]))
+ g_ptr_array_add (result, (gpointer) tracker_property_get_name (properties[i]));
+ }
+
+ g_ptr_array_add (result, NULL);
+
+ return result;
+}
+
void
tracker_data_update_buffer_flush (TrackerData *data,
GError **error)
@@ -1517,7 +1540,6 @@ tracker_data_update_buffer_flush (TrackerData *data,
GHashTableIter iter;
GError *actual_error = NULL;
const gchar *database;
- GList *l;
guint i;
if (data->update_buffer.update_log->len == 0)
@@ -1534,13 +1556,8 @@ tracker_data_update_buffer_flush (TrackerData *data,
GPtrArray *properties;
gboolean retval;
- properties = g_ptr_array_sized_new (8);
database = resource->graph->graph ? resource->graph->graph : "main";
-
- for (l = resource->fts_properties; l; l = l->next)
- g_ptr_array_add (properties, (gpointer) tracker_property_get_name (l->data));
-
- g_ptr_array_add (properties, NULL);
+ properties = get_fts_properties (data);
retval = tracker_db_interface_sqlite_fts_delete_text (iface,
database,
@@ -1567,13 +1584,8 @@ tracker_data_update_buffer_flush (TrackerData *data,
GPtrArray *properties;
gboolean retval;
- properties = g_ptr_array_sized_new (8);
database = resource->graph->graph ? resource->graph->graph : "main";
-
- for (l = resource->fts_properties; l; l = l->next)
- g_ptr_array_add (properties, (gpointer) tracker_property_get_name (l->data));
-
- g_ptr_array_add (properties, NULL);
+ properties = get_fts_properties (data);
retval = tracker_db_interface_sqlite_fts_update_text (iface,
database,
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
index 13ff41d84..580775e19 100644
--- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
+++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c
@@ -2297,12 +2297,11 @@ tracker_db_interface_sqlite_fts_create_update_query (TrackerDBInterface *db_int
}
query = g_strdup_printf ("INSERT INTO \"%s\".fts5 (ROWID, %s) "
- "SELECT ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ? AND COALESCE(%s, NULL) IS NOT NULL",
+ "SELECT ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ?",
database,
props_str->str,
props_str->str,
- database,
- props_str->str);
+ database);
g_string_free (props_str, TRUE);
return query;
@@ -2362,12 +2361,11 @@ tracker_db_interface_sqlite_fts_create_delete_query (TrackerDBInterface *db_int
}
query = g_strdup_printf ("INSERT INTO \"%s\".fts5 (fts5, ROWID, %s) "
- "SELECT 'delete', ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ? AND COALESCE(%s, NULL) IS NOT NULL",
+ "SELECT 'delete', ROWID, %s FROM \"%s\".fts_view WHERE ROWID = ?",
database,
props_str->str,
props_str->str,
- database,
- props_str->str);
+ database);
g_string_free (props_str, TRUE);
return query;
diff --git a/src/libtracker-sparql/core/tracker-fts.c b/src/libtracker-sparql/core/tracker-fts.c
index 8398b6e7a..8b0a9052c 100644
--- a/src/libtracker-sparql/core/tracker-fts.c
+++ b/src/libtracker-sparql/core/tracker-fts.c
@@ -80,7 +80,7 @@ tracker_fts_create_table (sqlite3 *db,
GHashTable *grouped_columns,
GError **error)
{
- GString *str, *from, *fts;
+ GString *str, *from, *fts, *column_names;
gchar *index_table;
GList *columns, *keys, *l;
gint rc;
@@ -99,6 +99,8 @@ tracker_fts_create_table (sqlite3 *db,
g_string_append_printf (fts, "\"%s\".%s USING fts5(content=\"fts_view\", ",
database, table_name);
+ column_names = g_string_new (NULL);
+
keys = g_hash_table_get_keys (tables);
keys = g_list_sort (keys, (GCompareFunc) strcmp);
@@ -120,7 +122,7 @@ tracker_fts_create_table (sqlite3 *db,
g_string_append_printf (str, " AS \"%s\" ",
(gchar *) columns->data);
- g_string_append_printf (fts, "\"%s\", ",
+ g_string_append_printf (column_names, "\"%s\", ",
(gchar *) columns->data);
columns = columns->next;
@@ -133,6 +135,8 @@ tracker_fts_create_table (sqlite3 *db,
g_list_free (keys);
+ g_string_append_printf (from, "WHERE COALESCE (%s NULL) IS NOT NULL ",
+ column_names->str);
g_string_append (from, "GROUP BY ROWID");
g_string_append (str, from->str);
g_string_free (from, TRUE);
@@ -143,6 +147,7 @@ tracker_fts_create_table (sqlite3 *db,
if (rc != SQLITE_OK)
goto error;
+ g_string_append (fts, column_names->str);
g_string_append (fts, "tokenize=TrackerTokenizer)");
rc = sqlite3_exec(db, fts->str, NULL, NULL, NULL);
@@ -158,6 +163,7 @@ tracker_fts_create_table (sqlite3 *db,
error:
g_string_free (fts, TRUE);
+ g_string_free (column_names, TRUE);
if (rc != SQLITE_OK) {
g_set_error (error,
diff --git a/src/ontologies/nepomuk/32-nco.ontology b/src/ontologies/nepomuk/32-nco.ontology
index f0d1286f1..8f0d8dd15 100644
--- a/src/ontologies/nepomuk/32-nco.ontology
+++ b/src/ontologies/nepomuk/32-nco.ontology
@@ -9,7 +9,7 @@
nco: a nrl:Namespace, nrl:Ontology ;
nrl:prefix "nco" ;
- nrl:lastModified "2016-02-28T21:30:00Z" .
+ nrl:lastModified "2023-01-23T11:30:00Z" .
nco:Role a rdfs:Class ;
rdfs:label "Role" ;
@@ -340,9 +340,7 @@ nco:phoneNumber a rdf:Property ;
nrl:maxCardinality 1 ;
rdfs:domain nco:PhoneNumber ;
rdfs:range xsd:string ;
- nrl:indexed true ;
- nrl:fulltextIndexed true ;
- nrl:weight 5 .
+ nrl:indexed true .
nco:nickname a rdf:Property ;
rdfs:label "nickname" ;