diff options
author | Sam Thursfield <sam@afuera.me.uk> | 2023-03-23 18:14:24 +0000 |
---|---|---|
committer | Sam Thursfield <sam@afuera.me.uk> | 2023-03-23 18:14:24 +0000 |
commit | ba6a9608f3b82d7708eae5b6477719668830e7e6 (patch) | |
tree | e90447c58310bfe6f6e135c4026e7f5c56503a7f | |
parent | 6409812d31697ba430ebb843133304b46991d637 (diff) | |
parent | 827ba585eec44cf3710a82406783f47bcb64f9c3 (diff) | |
download | tracker-ba6a9608f3b82d7708eae5b6477719668830e7e6.tar.gz |
Merge branch 'wip/carlosg/orderless-fts-terms' into 'master'
core: handle FTS search terms individually
See merge request https://gitlab.gnome.org/GNOME/tracker/-/merge_requests/585
-rw-r--r-- | src/libtracker-sparql/core/tracker-db-interface-sqlite.c | 62 | ||||
-rw-r--r-- | src/libtracker-sparql/core/tracker-sparql.c | 12 | ||||
-rw-r--r-- | tests/fts/fts3aa-4.out | 8 | ||||
-rw-r--r-- | tests/fts/fts3aa-4.rq | 1 | ||||
-rw-r--r-- | tests/fts/fts3aa-5.out | 4 | ||||
-rw-r--r-- | tests/fts/fts3aa-5.rq | 1 | ||||
-rw-r--r-- | tests/fts/fts3aa-6.out | 2 | ||||
-rw-r--r-- | tests/fts/fts3aa-6.rq | 1 | ||||
-rw-r--r-- | tests/fts/fts3aa-7.out | 8 | ||||
-rw-r--r-- | tests/fts/fts3aa-7.rq | 2 | ||||
-rw-r--r-- | tests/fts/fts3aa-8.out | 4 | ||||
-rw-r--r-- | tests/fts/fts3aa-8.rq | 2 | ||||
-rw-r--r-- | tests/fts/fts3aa-9.out | 1 | ||||
-rw-r--r-- | tests/fts/fts3aa-9.rq | 1 | ||||
-rw-r--r-- | tests/fts/tracker-fts-test.c | 2 |
15 files changed, 104 insertions, 7 deletions
diff --git a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c index 24c863616..2aa493ca9 100644 --- a/src/libtracker-sparql/core/tracker-db-interface-sqlite.c +++ b/src/libtracker-sparql/core/tracker-db-interface-sqlite.c @@ -1681,6 +1681,66 @@ function_sparql_print_value (sqlite3_context *context, } } +static void +function_sparql_fts_tokenize (sqlite3_context *context, + int argc, + sqlite3_value *argv[]) +{ + const gchar *fn = "SparqlFtsTokenizer helper"; + gchar *text; + const gchar *p; + gboolean in_quote = FALSE; + gboolean in_space = FALSE; + gboolean started = FALSE; + int n_output_quotes = 0; + gunichar ch; + GString *str; + int len; + gchar *result; + + if (argc > 1) { + result_context_function_error (context, fn, "Invalid argument count"); + return; + } + + text = g_strstrip (g_strdup (sqlite3_value_text (argv[0]))); + str = g_string_new (NULL); + p = text; + + while ((ch = g_utf8_get_char (p)) != 0) { + if (ch == '\"') { + n_output_quotes++; + in_quote = !in_quote; + } else if ((ch == ' ') != !!in_space) { + /* Ensure terms get independently quoted, unless + * they are within a explicitly quoted part of the text. + */ + if (!in_quote && started) { + g_string_append_c (str, '"'); + n_output_quotes++; + } + + in_space = ch == ' '; + } else if (!started) { + /* Not a quote, nor a space at the first char. Add the starting quote */ + g_string_append_c (str, '"'); + n_output_quotes++; + } + + g_string_append_unichar (str, ch); + started = TRUE; + p = g_utf8_next_char (p); + } + + if (n_output_quotes % 2 != 0) + g_string_append_c (str, '"'); + + len = str->len; + result = g_string_free (str, FALSE); + sqlite3_result_text (context, result, len, g_free); + g_free (text); +} + static int check_interrupt (void *user_data) { @@ -1757,6 +1817,8 @@ initialize_functions (TrackerDBInterface *db_interface) function_sparql_strlang }, { "SparqlPrintValue", 2, SQLITE_ANY | SQLITE_DETERMINISTIC, function_sparql_print_value }, + { "SparqlFtsTokenize", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, + function_sparql_fts_tokenize }, /* Numbers */ { "SparqlCeil", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, function_sparql_ceil }, diff --git a/src/libtracker-sparql/core/tracker-sparql.c b/src/libtracker-sparql/core/tracker-sparql.c index 500a9af44..745ed46b3 100644 --- a/src/libtracker-sparql/core/tracker-sparql.c +++ b/src/libtracker-sparql/core/tracker-sparql.c @@ -1711,11 +1711,11 @@ tracker_sparql_add_fts_subquery (TrackerSparql *sparql, if (tracker_sparql_find_graph (sparql, tracker_token_get_idstring (graph))) { _append_string_printf (sparql, "%s FROM \"%s\".\"fts5\" " - "WHERE fts5 = '\"' || REPLACE (", + "WHERE fts5 = SparqlFtsTokenize(", select_items->str, tracker_token_get_idstring (graph)); _append_literal_sql (sparql, binding); - _append_string (sparql, ", '\"', ' ') || '\"*'"); + _append_string (sparql, ") || '*' "); } else { _append_empty_select (sparql, n_properties); } @@ -1727,10 +1727,10 @@ tracker_sparql_add_fts_subquery (TrackerSparql *sparql, if (!sparql->policy.filter_unnamed_graph) { _append_string_printf (sparql, "%s, 0 FROM \"main\".\"fts5\" " - "WHERE fts5 = '\"' || REPLACE (", + "WHERE fts5 = SparqlFtsTokenize(", select_items->str); _append_literal_sql (sparql, binding); - _append_string (sparql, ", '\"', ' ') || '\"*'"); + _append_string (sparql, ") || '*' "); } else { _append_empty_select (sparql, n_properties); } @@ -1744,12 +1744,12 @@ tracker_sparql_add_fts_subquery (TrackerSparql *sparql, _append_string_printf (sparql, "UNION ALL %s, %" G_GINT64_FORMAT " AS graph " "FROM \"%s\".\"fts5\" " - "WHERE fts5 = '\"' || REPLACE (", + "WHERE fts5 = SparqlFtsTokenize(", select_items->str, *graph_id, (gchar *) graph_name); _append_literal_sql (sparql, binding); - _append_string (sparql, ", '\"', ' ') || '\"*'"); + _append_string (sparql, ") || '*' "); } g_hash_table_unref (graphs); diff --git a/tests/fts/fts3aa-4.out b/tests/fts/fts3aa-4.out new file mode 100644 index 000000000..4bdf28b05 --- /dev/null +++ b/tests/fts/fts3aa-4.out @@ -0,0 +1,8 @@ +"http://www.example.org/test#20" +"http://www.example.org/test#21" +"http://www.example.org/test#22" +"http://www.example.org/test#23" +"http://www.example.org/test#28" +"http://www.example.org/test#29" +"http://www.example.org/test#30" +"http://www.example.org/test#31" diff --git a/tests/fts/fts3aa-4.rq b/tests/fts/fts3aa-4.rq new file mode 100644 index 000000000..76f97168c --- /dev/null +++ b/tests/fts/fts3aa-4.rq @@ -0,0 +1 @@ +SELECT ?o WHERE { ?o fts:match "three five" } order by asc ?o diff --git a/tests/fts/fts3aa-5.out b/tests/fts/fts3aa-5.out new file mode 100644 index 000000000..a2f64826a --- /dev/null +++ b/tests/fts/fts3aa-5.out @@ -0,0 +1,4 @@ +"http://www.example.org/test#20" +"http://www.example.org/test#21" +"http://www.example.org/test#22" +"http://www.example.org/test#23" diff --git a/tests/fts/fts3aa-5.rq b/tests/fts/fts3aa-5.rq new file mode 100644 index 000000000..9dbb5f174 --- /dev/null +++ b/tests/fts/fts3aa-5.rq @@ -0,0 +1 @@ +SELECT ?o WHERE { ?o fts:match "\"three five\"" } order by ?o diff --git a/tests/fts/fts3aa-6.out b/tests/fts/fts3aa-6.out new file mode 100644 index 000000000..2e57c7ed9 --- /dev/null +++ b/tests/fts/fts3aa-6.out @@ -0,0 +1,2 @@ +"http://www.example.org/test#15" +"http://www.example.org/test#31" diff --git a/tests/fts/fts3aa-6.rq b/tests/fts/fts3aa-6.rq new file mode 100644 index 000000000..3482fe180 --- /dev/null +++ b/tests/fts/fts3aa-6.rq @@ -0,0 +1 @@ +SELECT ?o WHERE { ?o fts:match " \"two three\" four one" } order by ?o diff --git a/tests/fts/fts3aa-7.out b/tests/fts/fts3aa-7.out new file mode 100644 index 000000000..a082546ee --- /dev/null +++ b/tests/fts/fts3aa-7.out @@ -0,0 +1,8 @@ +"http://www.example.org/test#10" +"http://www.example.org/test#11" +"http://www.example.org/test#14" +"http://www.example.org/test#15" +"http://www.example.org/test#26" +"http://www.example.org/test#27" +"http://www.example.org/test#30" +"http://www.example.org/test#31" diff --git a/tests/fts/fts3aa-7.rq b/tests/fts/fts3aa-7.rq new file mode 100644 index 000000000..46aab7bda --- /dev/null +++ b/tests/fts/fts3aa-7.rq @@ -0,0 +1,2 @@ +# Match contains an unfinished quote +SELECT ?o WHERE { ?o fts:match "four \"two" } order by ?o diff --git a/tests/fts/fts3aa-8.out b/tests/fts/fts3aa-8.out new file mode 100644 index 000000000..9de74cce2 --- /dev/null +++ b/tests/fts/fts3aa-8.out @@ -0,0 +1,4 @@ +"http://www.example.org/test#10" +"http://www.example.org/test#11" +"http://www.example.org/test#26" +"http://www.example.org/test#27" diff --git a/tests/fts/fts3aa-8.rq b/tests/fts/fts3aa-8.rq new file mode 100644 index 000000000..0874c737c --- /dev/null +++ b/tests/fts/fts3aa-8.rq @@ -0,0 +1,2 @@ +# Match contains an unfinished quote +SELECT ?o WHERE { ?o fts:match " \"two four " } order by ?o diff --git a/tests/fts/fts3aa-9.out b/tests/fts/fts3aa-9.out new file mode 100644 index 000000000..feaf7fc17 --- /dev/null +++ b/tests/fts/fts3aa-9.out @@ -0,0 +1 @@ +"http://www.example.org/test#31" diff --git a/tests/fts/fts3aa-9.rq b/tests/fts/fts3aa-9.rq new file mode 100644 index 000000000..72e1c98ed --- /dev/null +++ b/tests/fts/fts3aa-9.rq @@ -0,0 +1 @@ +SELECT ?o WHERE { ?o fts:match "\"four five\" three \"one two\"" } order by ?o diff --git a/tests/fts/tracker-fts-test.c b/tests/fts/tracker-fts-test.c index b05da1aee..fd2308c4b 100644 --- a/tests/fts/tracker-fts-test.c +++ b/tests/fts/tracker-fts-test.c @@ -33,7 +33,7 @@ struct _TestInfo { }; const TestInfo tests[] = { - { "fts3aa", 3 }, + { "fts3aa", 9 }, { "fts3ae", 1 }, { "consistency/partial-update", 2 }, { "consistency/insert-or-replace", 2 }, |