summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarlos Garnacho <carlosg@gnome.org>2016-02-27 18:12:36 +0100
committerCarlos Garnacho <carlosg@gnome.org>2016-02-29 15:56:08 +0100
commit93c149a53bea1b8de79ae3b0086d7ffce62b5609 (patch)
tree614f1bc94e6d40417614f00ab0d289c684530660
parentb324a0fd4329f8a1de3a476a1788e6c4c94c50be (diff)
downloadtracker-wip/fts5.tar.gz
Update to FTS5wip/fts5
Our old stale copy of the FTS3/4 module is now deleted, replaced by a shinier FTS5 embedded module. If at configure time we detect that SQLite doesn't offer the FTS5 module, we will load our own, just as we used to do with FTS4. FTS5 brings a few differences in the ways it's meant to be extended, the tokenizer has been updated to cope with the differences. Also, FTS5 offers no offsets() builtin function, nor matchinfo() which we used to implement ranking. It offers though ways to implement additional functions, and builtin rank support which can be tweaked to achieve the same functional results than we did. Other than that, the ways to interact with the FTS virtual table are roughly similar to those in FTS4, insertions and deletions have been updated to do things the FTS5 way. Since it's not worth to bump the database format (data is reproducted from the journal, so we drop some embedded data such as nie:plainTextContent), the nco:hobby property has been modified to no longer be fulltext indexed, AFAIK there's no users ever setting/ accessing that, and the FTS properties change will trigger the regeneration of the FTS view and virtual tables, resulting in a seamless update to FTS5. However, we don't leave completely unscathed from the fts3_tokenizer() change. Since the older FTS3/4 tokenizer is not registered, we can't just drop the older FTS table. So it is left dangling and never accessed again, in favor of the newer fts5 table. This is obviously not a problem when creating the database from scratch. In the way, a few bugs were found. per-property weights in ranking were being given in a scrambled way (although stable across database generations). And deletion of FTS properties (or entire rows) could result in the tokens not being fully removed from the FTS table, resulting in confused searches. These are now fixed. Impact to users of tracker should be none. All the FTS Sparql-to-SQL translation has been updated to just use FTS5 syntax and tables.
-rw-r--r--configure.ac16
-rw-r--r--m4/sqlite-builtin-fts5.m4 (renamed from m4/sqlite-builtin-fts4.m4)12
-rw-r--r--src/libtracker-data/tracker-data-manager.c15
-rw-r--r--src/libtracker-data/tracker-data-update.c19
-rw-r--r--src/libtracker-data/tracker-db-interface-sqlite.c201
-rw-r--r--src/libtracker-data/tracker-db-interface-sqlite.h16
-rw-r--r--src/libtracker-data/tracker-sparql-expression.vala10
-rw-r--r--src/libtracker-data/tracker-sparql-pattern.vala19
-rw-r--r--src/libtracker-fts/Makefile.am25
-rw-r--r--src/libtracker-fts/fts3.c5350
-rw-r--r--src/libtracker-fts/fts3.h32
-rw-r--r--src/libtracker-fts/fts3Int.h560
-rw-r--r--src/libtracker-fts/fts3_aux.c474
-rw-r--r--src/libtracker-fts/fts3_expr.c1013
-rw-r--r--src/libtracker-fts/fts3_hash.c383
-rw-r--r--src/libtracker-fts/fts3_hash.h112
-rw-r--r--src/libtracker-fts/fts3_icu.c261
-rw-r--r--src/libtracker-fts/fts3_porter.c646
-rw-r--r--src/libtracker-fts/fts3_snippet.c1520
-rw-r--r--src/libtracker-fts/fts3_term.c373
-rw-r--r--src/libtracker-fts/fts3_test.c535
-rw-r--r--src/libtracker-fts/fts3_tokenizer.c488
-rw-r--r--src/libtracker-fts/fts3_tokenizer.h161
-rw-r--r--src/libtracker-fts/fts3_tokenizer1.c234
-rw-r--r--src/libtracker-fts/fts3_unicode.c393
-rw-r--r--src/libtracker-fts/fts3_unicode2.c366
-rw-r--r--src/libtracker-fts/fts3_write.c5402
-rw-r--r--src/libtracker-fts/fts5.c20402
-rw-r--r--src/libtracker-fts/fts5.h578
-rw-r--r--src/libtracker-fts/tracker-fts-tokenizer.c549
-rw-r--r--src/libtracker-fts/tracker-fts-tokenizer.h4
-rw-r--r--src/libtracker-fts/tracker-fts.c250
-rw-r--r--src/ontologies/32-nco.ontology3
33 files changed, 21571 insertions, 18851 deletions
diff --git a/configure.ac b/configure.ac
index 29d68d72e..2f071f52d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -820,19 +820,19 @@ fi
if test "x$have_tracker_fts" = "xyes"; then
AC_DEFINE(HAVE_TRACKER_FTS, [1], [Define to 1 if tracker FTS is compiled])
- AX_SQLITE_BUILTIN_FTS4
- if test "x$ax_cv_sqlite_builtin_fts4" = "xyes" ; then
- have_builtin_fts4="yes"
- AC_DEFINE(HAVE_BUILTIN_FTS, [], [Defined if Sqlite has FTS4 compiled in])
+ AX_SQLITE_BUILTIN_FTS5
+ if test "x$ax_cv_sqlite_builtin_fts5" = "xyes" ; then
+ have_builtin_fts5="yes"
+ AC_DEFINE(HAVE_BUILTIN_FTS, [], [Defined if Sqlite has FTS5 compiled in])
else
- have_builtin_fts4="no"
+ have_builtin_fts5="no"
fi
else
AC_DEFINE(HAVE_TRACKER_FTS, [0], [Define to 0 if tracker FTS is not compiled])
- have_builtin_fts4="disabled"
+ have_builtin_fts5="disabled"
fi
-AM_CONDITIONAL(HAVE_BUILTIN_FTS, test "$have_builtin_fts4" = "yes")
+AM_CONDITIONAL(HAVE_BUILTIN_FTS, test "$have_builtin_fts5" = "yes")
AM_CONDITIONAL(HAVE_TRACKER_FTS, test "$have_tracker_fts" = "yes")
####################################################################
@@ -2790,7 +2790,7 @@ Feature Support:
Unicode support library: $with_unicode_support
Build with Journal support: $have_tracker_journal
- Build with SQLite FTS support: $have_tracker_fts (built-in FTS: $have_builtin_fts4)
+ Build with SQLite FTS support: $have_tracker_fts (built-in FTS: $have_builtin_fts5)
Build with Stemming support: $have_libstemmer
Cache media art $have_libmediaart (libmediaart)
diff --git a/m4/sqlite-builtin-fts4.m4 b/m4/sqlite-builtin-fts5.m4
index bb0d31121..635f15dc7 100644
--- a/m4/sqlite-builtin-fts4.m4
+++ b/m4/sqlite-builtin-fts5.m4
@@ -1,4 +1,4 @@
-AC_DEFUN([AX_SQLITE_BUILTIN_FTS4],
+AC_DEFUN([AX_SQLITE_BUILTIN_FTS5],
[
AC_REQUIRE([AC_PROG_CC])
@@ -12,7 +12,7 @@ AC_DEFUN([AX_SQLITE_BUILTIN_FTS4],
AC_CHECK_HEADERS([sqlite3.h])
AC_CACHE_CHECK([whether SQLite3 has required FTS features],
- [ax_cv_sqlite_builtin_fts4],
+ [ax_cv_sqlite_builtin_fts5],
[
AC_RUN_IFELSE(
[AC_LANG_PROGRAM([[#include <sqlite3.h>]],
@@ -22,11 +22,11 @@ AC_DEFUN([AX_SQLITE_BUILTIN_FTS4],
if (rc!=SQLITE_OK) return -1;
rc = sqlite3_exec(db, "create table a(text)", 0, 0, 0);
if (rc!=SQLITE_OK) return -1;
- rc = sqlite3_exec(db, "create virtual table t using fts4(content='a',text)", 0, 0, 0);
+ rc = sqlite3_exec(db, "create virtual table t using fts5(content='a',text)", 0, 0, 0);
if (rc!=SQLITE_OK) return -1;]])],
- [ax_cv_sqlite_builtin_fts4=yes],
- [ax_cv_sqlite_builtin_fts4=no],
- [ax_cv_sqlite_builtin_fts4=no])])
+ [ax_cv_sqlite_builtin_fts5=yes],
+ [ax_cv_sqlite_builtin_fts5=no],
+ [ax_cv_sqlite_builtin_fts5=no])])
CFLAGS="$OLD_CFLAGS"
LDFLAGS="$OLD_LDFLAGS"
diff --git a/src/libtracker-data/tracker-data-manager.c b/src/libtracker-data/tracker-data-manager.c
index b7d2ffea2..5a811bd79 100644
--- a/src/libtracker-data/tracker-data-manager.c
+++ b/src/libtracker-data/tracker-data-manager.c
@@ -3642,14 +3642,11 @@ ontology_get_fts_properties (gboolean only_new,
}
static void
-rebuild_fts_tokens (TrackerDBInterface *iface,
- gboolean creating_db)
+rebuild_fts_tokens (TrackerDBInterface *iface)
{
- if (!creating_db) {
- g_debug ("Rebuilding FTS tokens, this may take a moment...");
- tracker_db_interface_sqlite_fts_rebuild_tokens (iface);
- g_debug ("FTS tokens rebuilt");
- }
+ g_debug ("Rebuilding FTS tokens, this may take a moment...");
+ tracker_db_interface_sqlite_fts_rebuild_tokens (iface);
+ g_debug ("FTS tokens rebuilt");
/* Update the stamp file */
tracker_db_manager_tokenizer_update ();
@@ -4636,9 +4633,9 @@ tracker_data_manager_init (TrackerDBManagerFlags flags,
tracker_db_manager_set_current_locale ();
#if HAVE_TRACKER_FTS
- rebuild_fts_tokens (iface, is_first_time_index);
+ rebuild_fts_tokens (iface);
} else if (!read_only && tracker_db_manager_get_tokenizer_changed ()) {
- rebuild_fts_tokens (iface, is_first_time_index);
+ rebuild_fts_tokens (iface);
#endif
}
diff --git a/src/libtracker-data/tracker-data-update.c b/src/libtracker-data/tracker-data-update.c
index fcf0cc594..9cdfcaf46 100644
--- a/src/libtracker-data/tracker-data-update.c
+++ b/src/libtracker-data/tracker-data-update.c
@@ -949,7 +949,6 @@ tracker_data_resource_buffer_flush (GError **error)
if (resource_buffer->fts_updated) {
TrackerProperty *prop;
GArray *values;
- gboolean create = resource_buffer->create;
GPtrArray *properties, *text;
properties = text = NULL;
@@ -982,8 +981,7 @@ tracker_data_resource_buffer_flush (GError **error)
tracker_db_interface_sqlite_fts_update_text (iface,
resource_buffer->id,
(const gchar **) properties->pdata,
- (const gchar **) text->pdata,
- create);
+ (const gchar **) text->pdata);
update_buffer.fts_ever_updated = TRUE;
g_ptr_array_free (properties, TRUE);
g_ptr_array_free (text, TRUE);
@@ -1465,17 +1463,26 @@ get_old_property_values (TrackerProperty *property,
if (tracker_property_get_fulltext_indexed (prop)
&& check_property_domain (prop)) {
const gchar *property_name;
+ GString *str;
gint i;
old_values = get_property_values (prop);
property_name = tracker_property_get_name (prop);
+ str = g_string_new (NULL);
/* delete old fts entries */
for (i = 0; i < old_values->len; i++) {
- tracker_db_interface_sqlite_fts_delete_text (iface,
- resource_buffer->id,
- property_name);
+ GValue *value = &g_array_index (old_values, GValue, i);
+ if (i != 0)
+ g_string_append_c (str, ',');
+ g_string_append (str, g_value_get_string (value));
}
+
+ tracker_db_interface_sqlite_fts_delete_text (iface,
+ resource_buffer->id,
+ property_name,
+ str->str);
+ g_string_free (str, TRUE);
}
}
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.c b/src/libtracker-data/tracker-db-interface-sqlite.c
index c4f011905..1bd455548 100644
--- a/src/libtracker-data/tracker-db-interface-sqlite.c
+++ b/src/libtracker-data/tracker-db-interface-sqlite.c
@@ -90,8 +90,7 @@ struct TrackerDBInterface {
gpointer busy_user_data;
gchar *busy_status;
- gchar *fts_insert_str;
- gchar *fts_delete_str;
+ gchar *fts_properties;
};
struct TrackerDBInterfaceClass {
@@ -1352,7 +1351,7 @@ tracker_db_interface_sqlite_fts_init (TrackerDBInterface *db_interface,
tracker_fts_init_db (db_interface->db, properties);
if (create &&
- !tracker_fts_create_table (db_interface->db, "fts",
+ !tracker_fts_create_table (db_interface->db, "fts5",
properties, multivalued)) {
g_warning ("FTS tables creation failed");
}
@@ -1360,33 +1359,18 @@ tracker_db_interface_sqlite_fts_init (TrackerDBInterface *db_interface,
fts_columns = _fts_create_properties (properties);
if (fts_columns) {
- GString *insert, *select, *delete, cols;
- gint i = 0;
-
- insert = g_string_new ("INSERT INTO fts (docid");
- select = g_string_new ("SELECT rowid");
- delete = g_string_new ("UPDATE fts SET docid=?");
-
- while (fts_columns[i]) {
- g_string_append_printf (insert, ", \"%s\"",
- fts_columns[i]);
- g_string_append_printf (select, ", \"%s\"",
- fts_columns[i]);
- g_string_append_printf (delete, ", \"%s\"=\"\"",
- fts_columns[i]);
- i++;
- }
-
- g_string_append (select, " FROM fts_view WHERE rowid=?");
- g_string_append (insert, ") ");
- g_string_append (insert, select->str);
+ GString *fts_properties;
+ gint i;
- g_string_free (select, TRUE);
- db_interface->fts_insert_str = g_string_free (insert, FALSE);
+ fts_properties = g_string_new (NULL);
- g_string_append (delete, " WHERE docid=?");
- db_interface->fts_delete_str = g_string_free (delete, FALSE);
+ for (i = 0; fts_columns[i] != NULL; i++) {
+ g_string_append_printf (fts_properties, ", \"%s\"",
+ fts_columns[i]);
+ }
+ db_interface->fts_properties = g_string_free (fts_properties,
+ FALSE);
g_strfreev (fts_columns);
}
#endif
@@ -1399,104 +1383,134 @@ tracker_db_interface_sqlite_fts_alter_table (TrackerDBInterface *db_interface,
GHashTable *properties,
GHashTable *multivalued)
{
- if (!tracker_fts_alter_table (db_interface->db, "fts", properties, multivalued)) {
+ if (!tracker_fts_alter_table (db_interface->db, "fts5", properties, multivalued)) {
g_critical ("Failed to update FTS columns");
}
}
+static gchar *
+tracker_db_interface_sqlite_fts_create_query (TrackerDBInterface *db_interface,
+ gboolean delete,
+ const gchar **properties)
+{
+ GString *insert_str, *values_str;
+ gint i;
+
+ insert_str = g_string_new ("INSERT INTO fts5 (");
+ values_str = g_string_new (NULL);
+
+ if (delete) {
+ g_string_append (insert_str, "fts5,");
+ g_string_append (values_str, "'delete',");
+ }
+
+ g_string_append (insert_str, "rowid");
+ g_string_append (values_str, "?");
+
+ for (i = 0; properties[i] != NULL; i++) {
+ g_string_append_printf (insert_str, ",\"%s\"", properties[i]);
+ g_string_append (values_str, ",?");
+ }
+
+ g_string_append_printf (insert_str, ") VALUES (%s)", values_str->str);
+ g_string_free (values_str, TRUE);
+
+ return g_string_free (insert_str, FALSE);
+}
+
+static gchar *
+tracker_db_interface_sqlite_fts_create_delete_all_query (TrackerDBInterface *db_interface)
+{
+ GString *insert_str;
+
+ insert_str = g_string_new (NULL);
+ g_string_append_printf (insert_str,
+ "INSERT INTO fts5 (fts5, rowid %s) "
+ "SELECT 'delete', rowid %s FROM fts_view "
+ "WHERE rowid = ?",
+ db_interface->fts_properties,
+ db_interface->fts_properties);
+ return g_string_free (insert_str, FALSE);
+}
+
gboolean
tracker_db_interface_sqlite_fts_update_text (TrackerDBInterface *db_interface,
int id,
const gchar **properties,
- const gchar **text,
- gboolean create)
+ const gchar **text)
{
TrackerDBStatement *stmt;
GError *error = NULL;
+ gchar *query;
+ gint i;
- if (!create) {
- stmt = tracker_db_interface_create_statement (db_interface,
- TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE,
- &error,
- "DELETE FROM fts WHERE docid=?");
-
- if (!stmt || error) {
- if (error) {
- g_warning ("Could not create FTS update statement: %s",
- error->message);
- g_error_free (error);
- }
- return FALSE;
- }
-
- tracker_db_statement_bind_int (stmt, 0, id);
- tracker_db_statement_execute (stmt, &error);
- g_object_unref (stmt);
-
- if (error) {
- g_warning ("Could not update FTS text: %s", error->message);
- g_error_free (error);
- return FALSE;
- }
- }
-
+ query = tracker_db_interface_sqlite_fts_create_query (db_interface,
+ FALSE, properties);
stmt = tracker_db_interface_create_statement (db_interface,
TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE,
&error,
- "%s",
- db_interface->fts_insert_str);
+ "%s", query);
+ g_free (query);
- if (!stmt || error) {
- if (error) {
- g_warning ("Could not create FTS insert statement: %s\n",
- error->message);
- g_error_free (error);
- }
- return FALSE;
- }
+ if (!stmt || error) {
+ if (error) {
+ g_warning ("Could not create FTS insert statement: %s\n",
+ error->message);
+ g_error_free (error);
+ }
+ return FALSE;
+ }
- tracker_db_statement_bind_int (stmt, 0, id);
- tracker_db_statement_execute (stmt, &error);
- g_object_unref (stmt);
+ tracker_db_statement_bind_int (stmt, 0, id);
+ for (i = 0; text[i] != NULL; i++) {
+ tracker_db_statement_bind_text (stmt, i + 1, text[i]);
+ }
- if (error) {
- g_warning ("Could not insert FTS text: %s", error->message);
- g_error_free (error);
- return FALSE;
- }
+ tracker_db_statement_execute (stmt, &error);
+ g_object_unref (stmt);
- return TRUE;
+ if (error) {
+ g_warning ("Could not insert FTS text: %s", error->message);
+ g_error_free (error);
+ return FALSE;
+ }
+
+ return TRUE;
}
gboolean
-tracker_db_interface_sqlite_fts_delete_text (TrackerDBInterface *db_interface,
- int id,
- const gchar *property)
+tracker_db_interface_sqlite_fts_delete_text (TrackerDBInterface *db_interface,
+ int rowid,
+ const gchar *property,
+ const gchar *old_text)
{
TrackerDBStatement *stmt;
GError *error = NULL;
+ const gchar *properties[] = { property, NULL };
+ gchar *query;
+ query = tracker_db_interface_sqlite_fts_create_query (db_interface,
+ TRUE, properties);
stmt = tracker_db_interface_create_statement (db_interface,
TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE,
&error,
- "UPDATE fts SET \"%s\" = '' WHERE docid = ?",
- property);
+ "%s", query);
+ g_free (query);
if (!stmt || error) {
- if (error) {
- g_warning ("Could not create FTS update statement: %s\n",
- error->message);
- g_error_free (error);
- }
+ g_warning ("Could not create FTS delete statement: %s",
+ error ? error->message : "No error given");
+ g_clear_error (&error);
return FALSE;
}
- tracker_db_statement_bind_int (stmt, 0, id);
+ tracker_db_statement_bind_int (stmt, 0, rowid);
+ tracker_db_statement_bind_text (stmt, 1, old_text);
tracker_db_statement_execute (stmt, &error);
g_object_unref (stmt);
if (error) {
- g_warning ("Could not execute FTS update: %s", error->message);
+ g_warning ("Could not delete FTS text: %s", error->message);
g_error_free (error);
return FALSE;
}
@@ -1510,12 +1524,15 @@ tracker_db_interface_sqlite_fts_delete_id (TrackerDBInterface *db_interface,
{
TrackerDBStatement *stmt;
GError *error = NULL;
+ gchar *query;
+ query = tracker_db_interface_sqlite_fts_create_delete_all_query (db_interface);
stmt = tracker_db_interface_create_statement (db_interface,
TRACKER_DB_STATEMENT_CACHE_TYPE_UPDATE,
&error,
- "%s",
- db_interface->fts_delete_str);
+ "%s", query);
+ g_free (query);
+
if (!stmt || error) {
if (error) {
g_warning ("Could not create FTS delete statement: %s",
@@ -1526,7 +1543,6 @@ tracker_db_interface_sqlite_fts_delete_id (TrackerDBInterface *db_interface,
}
tracker_db_statement_bind_int (stmt, 0, id);
- tracker_db_statement_bind_int (stmt, 1, id);
tracker_db_statement_execute (stmt, &error);
g_object_unref (stmt);
@@ -1542,7 +1558,7 @@ tracker_db_interface_sqlite_fts_delete_id (TrackerDBInterface *db_interface,
void
tracker_db_interface_sqlite_fts_rebuild_tokens (TrackerDBInterface *interface)
{
- tracker_fts_rebuild_tokens (interface->db, "fts");
+ tracker_fts_rebuild_tokens (interface->db, "fts5");
}
#endif
@@ -1592,8 +1608,7 @@ tracker_db_interface_sqlite_finalize (GObject *object)
db_interface = TRACKER_DB_INTERFACE (object);
close_database (db_interface);
- g_free (db_interface->fts_insert_str);
- g_free (db_interface->fts_delete_str);
+ g_free (db_interface->fts_properties);
g_message ("Closed sqlite3 database:'%s'", db_interface->filename);
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.h b/src/libtracker-data/tracker-db-interface-sqlite.h
index 18c020450..a7e265ef5 100644
--- a/src/libtracker-data/tracker-db-interface-sqlite.h
+++ b/src/libtracker-data/tracker-db-interface-sqlite.h
@@ -52,17 +52,17 @@ void tracker_db_interface_sqlite_wal_hook (TrackerD
void tracker_db_interface_sqlite_fts_alter_table (TrackerDBInterface *interface,
GHashTable *properties,
GHashTable *multivalued);
-int tracker_db_interface_sqlite_fts_update_text (TrackerDBInterface *interface,
- int id,
+gboolean tracker_db_interface_sqlite_fts_update_text (TrackerDBInterface *db_interface,
+ int id,
const gchar **properties,
- const gchar **text,
- gboolean create);
+ const gchar **text);
-gboolean tracker_db_interface_sqlite_fts_delete_text (TrackerDBInterface *db_interface,
- int id,
- const gchar *property);
+gboolean tracker_db_interface_sqlite_fts_delete_text (TrackerDBInterface *interface,
+ int rowid,
+ const gchar *property,
+ const gchar *old_text);
gboolean tracker_db_interface_sqlite_fts_delete_id (TrackerDBInterface *interface,
- int id);
+ int rowid);
void tracker_db_interface_sqlite_fts_update_commit (TrackerDBInterface *interface);
void tracker_db_interface_sqlite_fts_update_rollback (TrackerDBInterface *interface);
diff --git a/src/libtracker-data/tracker-sparql-expression.vala b/src/libtracker-data/tracker-sparql-expression.vala
index d69b908f7..91c6c10a0 100644
--- a/src/libtracker-data/tracker-sparql-expression.vala
+++ b/src/libtracker-data/tracker-sparql-expression.vala
@@ -709,7 +709,7 @@ class Tracker.Sparql.Expression : Object {
var variable = context.get_variable (v);
sql.append (variable.sql_expression);
- fts_sql = "tracker_offsets(offsets(\"fts\"),fts_property_names())";
+ fts_sql = "tracker_offsets(\"fts5\")";
return PropertyType.STRING;
} else if (uri == FTS_NS + "snippet") {
bool is_var;
@@ -718,7 +718,10 @@ class Tracker.Sparql.Expression : Object {
var variable = context.get_variable (v);
var fts = new StringBuilder ();
- fts.append_printf ("snippet(\"fts\"");
+ fts.append_printf ("snippet(\"fts5\"");
+
+ // lookup column
+ fts.append (", -1");
// "start match" text
if (accept (SparqlTokenType.COMMA)) {
@@ -741,9 +744,6 @@ class Tracker.Sparql.Expression : Object {
fts.append (", '...'");
}
- // lookup column
- fts.append (", -1");
-
// Approximate number of words in context
if (accept (SparqlTokenType.COMMA)) {
fts.append (", ");
diff --git a/src/libtracker-data/tracker-sparql-pattern.vala b/src/libtracker-data/tracker-sparql-pattern.vala
index ec9d5471e..73740b7c2 100644
--- a/src/libtracker-data/tracker-sparql-pattern.vala
+++ b/src/libtracker-data/tracker-sparql-pattern.vala
@@ -341,14 +341,14 @@ class Tracker.Sparql.Pattern : Object {
}
if (queries_fts_data && fts_subject != null) {
- // Ensure there's a docid to match on in FTS queries
+ // Ensure there's a rowid to match on in FTS queries
if (!first) {
sql.append (", ");
} else {
first = false;
}
- sql.append ("%s AS docid ".printf (fts_subject.sql_expression));
+ sql.append ("%s AS rowid ".printf (fts_subject.sql_expression));
}
// literals in select expressions need to be bound before literals in the where clause
@@ -459,9 +459,9 @@ class Tracker.Sparql.Pattern : Object {
str.append (fts_var);
}
- str.append (" FROM fts JOIN (");
+ str.append (" FROM fts5 JOIN (");
sql.prepend (str.str);
- sql.append_printf (") AS ranks USING (docid) WHERE fts %s".printf (match_str.str));
+ sql.append_printf (") AS ranks ON fts5.rowid=rowid WHERE fts5 %s".printf (match_str.str));
}
context = context.parent_context;
@@ -1349,7 +1349,7 @@ class Tracker.Sparql.Pattern : Object {
} else if (prop == null) {
if (current_predicate == "http://www.tracker-project.org/ontologies/fts#match") {
// fts:match
- db_table = "fts";
+ db_table = "fts5";
share_table = false;
is_fts_match = true;
fts_subject = context.get_variable (current_subject);
@@ -1477,7 +1477,7 @@ class Tracker.Sparql.Pattern : Object {
binding.table = table;
binding.type = subject_type;
if (is_fts_match) {
- binding.sql_db_column_name = "docid";
+ binding.sql_db_column_name = "rowid";
} else {
binding.sql_db_column_name = "ID";
}
@@ -1532,13 +1532,12 @@ class Tracker.Sparql.Pattern : Object {
binding.literal = object;
// binding.data_type = triple.object.type;
binding.table = table;
- binding.sql_db_column_name = "fts";
+ binding.sql_db_column_name = "fts5";
triple_context.bindings.append (binding);
- sql.append_printf ("\"%s\".\"docid\" AS \"ID\", ",
+ sql.append_printf ("\"%s\".\"rowid\" AS \"ID\", ",
binding.table.sql_query_tablename);
- sql.append_printf ("tracker_rank(matchinfo(\"%s\".\"fts\", 'cl'),fts_column_weights()) " +
- "AS \"%s_u_rank\", ",
+ sql.append_printf ("\"%s\".\"rank\" AS \"%s_u_rank\", ",
binding.table.sql_query_tablename,
context.get_variable (current_subject).name);
} else {
diff --git a/src/libtracker-fts/Makefile.am b/src/libtracker-fts/Makefile.am
index 8021c78b7..327dd03af 100644
--- a/src/libtracker-fts/Makefile.am
+++ b/src/libtracker-fts/Makefile.am
@@ -6,24 +6,9 @@ AM_CPPFLAGS = \
noinst_LTLIBRARIES = libtracker-fts.la
-fts4_sources = \
- fts3_aux.c \
- fts3.c \
- fts3_expr.c \
- fts3.h \
- fts3_hash.c \
- fts3_hash.h \
- fts3_icu.c \
- fts3Int.h \
- fts3_porter.c \
- fts3_snippet.c \
- fts3_term.c \
- fts3_tokenizer1.c \
- fts3_tokenizer.c \
- fts3_tokenizer.h \
- fts3_unicode.c \
- fts3_unicode2.c \
- fts3_write.c
+fts5_sources = \
+ fts5.c \
+ fts5.h
libtracker_fts_la_SOURCES = \
tracker-fts.c \
@@ -34,7 +19,7 @@ libtracker_fts_la_SOURCES = \
tracker-fts-tokenizer.h
if !HAVE_BUILTIN_FTS
- libtracker_fts_la_SOURCES += $(fts4_sources)
+ libtracker_fts_la_SOURCES += $(fts5_sources)
endif
libtracker_fts_la_LIBADD = \
@@ -42,7 +27,7 @@ libtracker_fts_la_LIBADD = \
$(BUILD_LIBS) \
$(LIBTRACKER_FTS_LIBS)
-EXTRA_DIST = $(fts4_sources)
+EXTRA_DIST = $(fts5_sources)
# Configuration / GSettings
gsettings_ENUM_NAMESPACE = org.freedesktop.Tracker
diff --git a/src/libtracker-fts/fts3.c b/src/libtracker-fts/fts3.c
deleted file mode 100644
index 3dc62ba8d..000000000
--- a/src/libtracker-fts/fts3.c
+++ /dev/null
@@ -1,5350 +0,0 @@
-/*
-** 2006 Oct 10
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This is an SQLite module implementing full-text search.
-*/
-
-/*
-** The code in this file is only compiled if:
-**
-** * The FTS3 module is being built as an extension
-** (in which case SQLITE_CORE is not defined), or
-**
-** * The FTS3 module is being built into the core of
-** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
-*/
-
-/* The full-text index is stored in a series of b+tree (-like)
-** structures called segments which map terms to doclists. The
-** structures are like b+trees in layout, but are constructed from the
-** bottom up in optimal fashion and are not updatable. Since trees
-** are built from the bottom up, things will be described from the
-** bottom up.
-**
-**
-**** Varints ****
-** The basic unit of encoding is a variable-length integer called a
-** varint. We encode variable-length integers in little-endian order
-** using seven bits * per byte as follows:
-**
-** KEY:
-** A = 0xxxxxxx 7 bits of data and one flag bit
-** B = 1xxxxxxx 7 bits of data and one flag bit
-**
-** 7 bits - A
-** 14 bits - BA
-** 21 bits - BBA
-** and so on.
-**
-** This is similar in concept to how sqlite encodes "varints" but
-** the encoding is not the same. SQLite varints are big-endian
-** are are limited to 9 bytes in length whereas FTS3 varints are
-** little-endian and can be up to 10 bytes in length (in theory).
-**
-** Example encodings:
-**
-** 1: 0x01
-** 127: 0x7f
-** 128: 0x81 0x00
-**
-**
-**** Document lists ****
-** A doclist (document list) holds a docid-sorted list of hits for a
-** given term. Doclists hold docids and associated token positions.
-** A docid is the unique integer identifier for a single document.
-** A position is the index of a word within the document. The first
-** word of the document has a position of 0.
-**
-** FTS3 used to optionally store character offsets using a compile-time
-** option. But that functionality is no longer supported.
-**
-** A doclist is stored like this:
-**
-** array {
-** varint docid; (delta from previous doclist)
-** array { (position list for column 0)
-** varint position; (2 more than the delta from previous position)
-** }
-** array {
-** varint POS_COLUMN; (marks start of position list for new column)
-** varint column; (index of new column)
-** array {
-** varint position; (2 more than the delta from previous position)
-** }
-** }
-** varint POS_END; (marks end of positions for this document.
-** }
-**
-** Here, array { X } means zero or more occurrences of X, adjacent in
-** memory. A "position" is an index of a token in the token stream
-** generated by the tokenizer. Note that POS_END and POS_COLUMN occur
-** in the same logical place as the position element, and act as sentinals
-** ending a position list array. POS_END is 0. POS_COLUMN is 1.
-** The positions numbers are not stored literally but rather as two more
-** than the difference from the prior position, or the just the position plus
-** 2 for the first position. Example:
-**
-** label: A B C D E F G H I J K
-** value: 123 5 9 1 1 14 35 0 234 72 0
-**
-** The 123 value is the first docid. For column zero in this document
-** there are two matches at positions 3 and 10 (5-2 and 9-2+3). The 1
-** at D signals the start of a new column; the 1 at E indicates that the
-** new column is column number 1. There are two positions at 12 and 45
-** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The
-** 234 at I is the delta to next docid (357). It has one position 70
-** (72-2) and then terminates with the 0 at K.
-**
-** A "position-list" is the list of positions for multiple columns for
-** a single docid. A "column-list" is the set of positions for a single
-** column. Hence, a position-list consists of one or more column-lists,
-** a document record consists of a docid followed by a position-list and
-** a doclist consists of one or more document records.
-**
-** A bare doclist omits the position information, becoming an
-** array of varint-encoded docids.
-**
-**** Segment leaf nodes ****
-** Segment leaf nodes store terms and doclists, ordered by term. Leaf
-** nodes are written using LeafWriter, and read using LeafReader (to
-** iterate through a single leaf node's data) and LeavesReader (to
-** iterate through a segment's entire leaf layer). Leaf nodes have
-** the format:
-**
-** varint iHeight; (height from leaf level, always 0)
-** varint nTerm; (length of first term)
-** char pTerm[nTerm]; (content of first term)
-** varint nDoclist; (length of term's associated doclist)
-** char pDoclist[nDoclist]; (content of doclist)
-** array {
-** (further terms are delta-encoded)
-** varint nPrefix; (length of prefix shared with previous term)
-** varint nSuffix; (length of unshared suffix)
-** char pTermSuffix[nSuffix];(unshared suffix of next term)
-** varint nDoclist; (length of term's associated doclist)
-** char pDoclist[nDoclist]; (content of doclist)
-** }
-**
-** Here, array { X } means zero or more occurrences of X, adjacent in
-** memory.
-**
-** Leaf nodes are broken into blocks which are stored contiguously in
-** the %_segments table in sorted order. This means that when the end
-** of a node is reached, the next term is in the node with the next
-** greater node id.
-**
-** New data is spilled to a new leaf node when the current node
-** exceeds LEAF_MAX bytes (default 2048). New data which itself is
-** larger than STANDALONE_MIN (default 1024) is placed in a standalone
-** node (a leaf node with a single term and doclist). The goal of
-** these settings is to pack together groups of small doclists while
-** making it efficient to directly access large doclists. The
-** assumption is that large doclists represent terms which are more
-** likely to be query targets.
-**
-** TODO(shess) It may be useful for blocking decisions to be more
-** dynamic. For instance, it may make more sense to have a 2.5k leaf
-** node rather than splitting into 2k and .5k nodes. My intuition is
-** that this might extend through 2x or 4x the pagesize.
-**
-**
-**** Segment interior nodes ****
-** Segment interior nodes store blockids for subtree nodes and terms
-** to describe what data is stored by the each subtree. Interior
-** nodes are written using InteriorWriter, and read using
-** InteriorReader. InteriorWriters are created as needed when
-** SegmentWriter creates new leaf nodes, or when an interior node
-** itself grows too big and must be split. The format of interior
-** nodes:
-**
-** varint iHeight; (height from leaf level, always >0)
-** varint iBlockid; (block id of node's leftmost subtree)
-** optional {
-** varint nTerm; (length of first term)
-** char pTerm[nTerm]; (content of first term)
-** array {
-** (further terms are delta-encoded)
-** varint nPrefix; (length of shared prefix with previous term)
-** varint nSuffix; (length of unshared suffix)
-** char pTermSuffix[nSuffix]; (unshared suffix of next term)
-** }
-** }
-**
-** Here, optional { X } means an optional element, while array { X }
-** means zero or more occurrences of X, adjacent in memory.
-**
-** An interior node encodes n terms separating n+1 subtrees. The
-** subtree blocks are contiguous, so only the first subtree's blockid
-** is encoded. The subtree at iBlockid will contain all terms less
-** than the first term encoded (or all terms if no term is encoded).
-** Otherwise, for terms greater than or equal to pTerm[i] but less
-** than pTerm[i+1], the subtree for that term will be rooted at
-** iBlockid+i. Interior nodes only store enough term data to
-** distinguish adjacent children (if the rightmost term of the left
-** child is "something", and the leftmost term of the right child is
-** "wicked", only "w" is stored).
-**
-** New data is spilled to a new interior node at the same height when
-** the current node exceeds INTERIOR_MAX bytes (default 2048).
-** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing
-** interior nodes and making the tree too skinny. The interior nodes
-** at a given height are naturally tracked by interior nodes at
-** height+1, and so on.
-**
-**
-**** Segment directory ****
-** The segment directory in table %_segdir stores meta-information for
-** merging and deleting segments, and also the root node of the
-** segment's tree.
-**
-** The root node is the top node of the segment's tree after encoding
-** the entire segment, restricted to ROOT_MAX bytes (default 1024).
-** This could be either a leaf node or an interior node. If the top
-** node requires more than ROOT_MAX bytes, it is flushed to %_segments
-** and a new root interior node is generated (which should always fit
-** within ROOT_MAX because it only needs space for 2 varints, the
-** height and the blockid of the previous root).
-**
-** The meta-information in the segment directory is:
-** level - segment level (see below)
-** idx - index within level
-** - (level,idx uniquely identify a segment)
-** start_block - first leaf node
-** leaves_end_block - last leaf node
-** end_block - last block (including interior nodes)
-** root - contents of root node
-**
-** If the root node is a leaf node, then start_block,
-** leaves_end_block, and end_block are all 0.
-**
-**
-**** Segment merging ****
-** To amortize update costs, segments are grouped into levels and
-** merged in batches. Each increase in level represents exponentially
-** more documents.
-**
-** New documents (actually, document updates) are tokenized and
-** written individually (using LeafWriter) to a level 0 segment, with
-** incrementing idx. When idx reaches MERGE_COUNT (default 16), all
-** level 0 segments are merged into a single level 1 segment. Level 1
-** is populated like level 0, and eventually MERGE_COUNT level 1
-** segments are merged to a single level 2 segment (representing
-** MERGE_COUNT^2 updates), and so on.
-**
-** A segment merge traverses all segments at a given level in
-** parallel, performing a straightforward sorted merge. Since segment
-** leaf nodes are written in to the %_segments table in order, this
-** merge traverses the underlying sqlite disk structures efficiently.
-** After the merge, all segment blocks from the merged level are
-** deleted.
-**
-** MERGE_COUNT controls how often we merge segments. 16 seems to be
-** somewhat of a sweet spot for insertion performance. 32 and 64 show
-** very similar performance numbers to 16 on insertion, though they're
-** a tiny bit slower (perhaps due to more overhead in merge-time
-** sorting). 8 is about 20% slower than 16, 4 about 50% slower than
-** 16, 2 about 66% slower than 16.
-**
-** At query time, high MERGE_COUNT increases the number of segments
-** which need to be scanned and merged. For instance, with 100k docs
-** inserted:
-**
-** MERGE_COUNT segments
-** 16 25
-** 8 12
-** 4 10
-** 2 6
-**
-** This appears to have only a moderate impact on queries for very
-** frequent terms (which are somewhat dominated by segment merge
-** costs), and infrequent and non-existent terms still seem to be fast
-** even with many segments.
-**
-** TODO(shess) That said, it would be nice to have a better query-side
-** argument for MERGE_COUNT of 16. Also, it is possible/likely that
-** optimizations to things like doclist merging will swing the sweet
-** spot around.
-**
-**
-**
-**** Handling of deletions and updates ****
-** Since we're using a segmented structure, with no docid-oriented
-** index into the term index, we clearly cannot simply update the term
-** index when a document is deleted or updated. For deletions, we
-** write an empty doclist (varint(docid) varint(POS_END)), for updates
-** we simply write the new doclist. Segment merges overwrite older
-** data for a particular docid with newer data, so deletes or updates
-** will eventually overtake the earlier data and knock it out. The
-** query logic likewise merges doclists so that newer data knocks out
-** older data.
-*/
-
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE)
-# define SQLITE_CORE 1
-#endif
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-
-#include "fts3.h"
-#ifndef SQLITE_CORE
-# include "sqlite3ext.h"
- SQLITE_EXTENSION_INIT1
-#endif
-
-static int fts3EvalNext(Fts3Cursor *pCsr);
-static int fts3EvalStart(Fts3Cursor *pCsr);
-static int fts3TermSegReaderCursor(
- Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
-
-/*
-** Write a 64-bit variable-length integer to memory starting at p[0].
-** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
-** The number of bytes written is returned.
-*/
-int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
- unsigned char *q = (unsigned char *) p;
- sqlite_uint64 vu = v;
- do{
- *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
- vu >>= 7;
- }while( vu!=0 );
- q[-1] &= 0x7f; /* turn off high bit in final byte */
- assert( q - (unsigned char *)p <= FTS3_VARINT_MAX );
- return (int) (q - (unsigned char *)p);
-}
-
-/*
-** Read a 64-bit variable-length integer from memory starting at p[0].
-** Return the number of bytes read, or 0 on error.
-** The value is stored in *v.
-*/
-int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){
- const unsigned char *q = (const unsigned char *) p;
- sqlite_uint64 x = 0, y = 1;
- while( (*q&0x80)==0x80 && q-(unsigned char *)p<FTS3_VARINT_MAX ){
- x += y * (*q++ & 0x7f);
- y <<= 7;
- }
- x += y * (*q++);
- *v = (sqlite_int64) x;
- return (int) (q - (unsigned char *)p);
-}
-
-/*
-** Similar to sqlite3Fts3GetVarint(), except that the output is truncated to a
-** 32-bit integer before it is returned.
-*/
-int sqlite3Fts3GetVarint32(const char *p, int *pi){
- sqlite_int64 i;
- int ret = sqlite3Fts3GetVarint(p, &i);
- *pi = (int) i;
- return ret;
-}
-
-/*
-** Return the number of bytes required to encode v as a varint
-*/
-int sqlite3Fts3VarintLen(sqlite3_uint64 v){
- int i = 0;
- do{
- i++;
- v >>= 7;
- }while( v!=0 );
- return i;
-}
-
-/*
-** Convert an SQL-style quoted string into a normal string by removing
-** the quote characters. The conversion is done in-place. If the
-** input does not begin with a quote character, then this routine
-** is a no-op.
-**
-** Examples:
-**
-** "abc" becomes abc
-** 'xyz' becomes xyz
-** [pqr] becomes pqr
-** `mno` becomes mno
-**
-*/
-void sqlite3Fts3Dequote(char *z){
- char quote; /* Quote character (if any ) */
-
- quote = z[0];
- if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
- int iIn = 1; /* Index of next byte to read from input */
- int iOut = 0; /* Index of next byte to write to output */
-
- /* If the first byte was a '[', then the close-quote character is a ']' */
- if( quote=='[' ) quote = ']';
-
- while( ALWAYS(z[iIn]) ){
- if( z[iIn]==quote ){
- if( z[iIn+1]!=quote ) break;
- z[iOut++] = quote;
- iIn += 2;
- }else{
- z[iOut++] = z[iIn++];
- }
- }
- z[iOut] = '\0';
- }
-}
-
-/*
-** Read a single varint from the doclist at *pp and advance *pp to point
-** to the first byte past the end of the varint. Add the value of the varint
-** to *pVal.
-*/
-static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){
- sqlite3_int64 iVal;
- *pp += sqlite3Fts3GetVarint(*pp, &iVal);
- *pVal += iVal;
-}
-
-/*
-** When this function is called, *pp points to the first byte following a
-** varint that is part of a doclist (or position-list, or any other list
-** of varints). This function moves *pp to point to the start of that varint,
-** and sets *pVal by the varint value.
-**
-** Argument pStart points to the first byte of the doclist that the
-** varint is part of.
-*/
-static void fts3GetReverseVarint(
- char **pp,
- char *pStart,
- sqlite3_int64 *pVal
-){
- sqlite3_int64 iVal;
- char *p;
-
- /* Pointer p now points at the first byte past the varint we are
- ** interested in. So, unless the doclist is corrupt, the 0x80 bit is
- ** clear on character p[-1]. */
- for(p = (*pp)-2; p>=pStart && *p&0x80; p--);
- p++;
- *pp = p;
-
- sqlite3Fts3GetVarint(p, &iVal);
- *pVal = iVal;
-}
-
-/*
-** The xDisconnect() virtual table method.
-*/
-static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
- Fts3Table *p = (Fts3Table *)pVtab;
- int i;
-
- assert( p->nPendingData==0 );
- assert( p->pSegments==0 );
-
- /* Free any prepared statements held */
- for(i=0; i<SizeofArray(p->aStmt); i++){
- sqlite3_finalize(p->aStmt[i]);
- }
- sqlite3_free(p->zSegmentsTbl);
- sqlite3_free(p->zReadExprlist);
- sqlite3_free(p->zWriteExprlist);
- sqlite3_free(p->zContentTbl);
- sqlite3_free(p->zLanguageid);
-
- /* Invoke the tokenizer destructor to free the tokenizer. */
- p->pTokenizer->pModule->xDestroy(p->pTokenizer);
-
- sqlite3_free(p);
- return SQLITE_OK;
-}
-
-/*
-** Construct one or more SQL statements from the format string given
-** and then evaluate those statements. The success code is written
-** into *pRc.
-**
-** If *pRc is initially non-zero then this routine is a no-op.
-*/
-static void fts3DbExec(
- int *pRc, /* Success code */
- sqlite3 *db, /* Database in which to run SQL */
- const char *zFormat, /* Format string for SQL */
- ... /* Arguments to the format string */
-){
- va_list ap;
- char *zSql;
- if( *pRc ) return;
- va_start(ap, zFormat);
- zSql = sqlite3_vmprintf(zFormat, ap);
- va_end(ap);
- if( zSql==0 ){
- *pRc = SQLITE_NOMEM;
- }else{
- *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
- sqlite3_free(zSql);
- }
-}
-
-/*
-** The xDestroy() virtual table method.
-*/
-static int fts3DestroyMethod(sqlite3_vtab *pVtab){
- Fts3Table *p = (Fts3Table *)pVtab;
- int rc = SQLITE_OK; /* Return code */
- const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */
- sqlite3 *db = p->db; /* Database handle */
-
- /* Drop the shadow tables */
- if( p->zContentTbl==0 ){
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
- }
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
-
- /* If everything has worked, invoke fts3DisconnectMethod() to free the
- ** memory associated with the Fts3Table structure and return SQLITE_OK.
- ** Otherwise, return an SQLite error code.
- */
- return (rc==SQLITE_OK ? fts3DisconnectMethod(pVtab) : rc);
-}
-
-
-/*
-** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table
-** passed as the first argument. This is done as part of the xConnect()
-** and xCreate() methods.
-**
-** If *pRc is non-zero when this function is called, it is a no-op.
-** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
-** before returning.
-*/
-static void fts3DeclareVtab(int *pRc, Fts3Table *p){
- if( *pRc==SQLITE_OK ){
- int i; /* Iterator variable */
- int rc; /* Return code */
- char *zSql; /* SQL statement passed to declare_vtab() */
- char *zCols; /* List of user defined columns */
- const char *zLanguageid;
-
- zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
- sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
-
- /* Create a list of user columns for the virtual table */
- zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
- for(i=1; zCols && i<p->nColumn; i++){
- zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
- }
-
- /* Create the whole "CREATE TABLE" statement to pass to SQLite */
- zSql = sqlite3_mprintf(
- "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)",
- zCols, p->zName, zLanguageid
- );
- if( !zCols || !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_declare_vtab(p->db, zSql);
- }
-
- sqlite3_free(zSql);
- sqlite3_free(zCols);
- *pRc = rc;
- }
-}
-
-/*
-** Create the %_stat table if it does not already exist.
-*/
-void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
- fts3DbExec(pRc, p->db,
- "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
- "(id INTEGER PRIMARY KEY, value BLOB);",
- p->zDb, p->zName
- );
- if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
-}
-
-/*
-** Create the backing store tables (%_content, %_segments and %_segdir)
-** required by the FTS3 table passed as the only argument. This is done
-** as part of the vtab xCreate() method.
-**
-** If the p->bHasDocsize boolean is true (indicating that this is an
-** FTS4 table, not an FTS3 table) then also create the %_docsize and
-** %_stat tables required by FTS4.
-*/
-static int fts3CreateTables(Fts3Table *p){
- int rc = SQLITE_OK; /* Return code */
- int i; /* Iterator variable */
- sqlite3 *db = p->db; /* The database connection */
-
- if( p->zContentTbl==0 ){
- const char *zLanguageid = p->zLanguageid;
- char *zContentCols; /* Columns of %_content table */
-
- /* Create a list of user columns for the content table */
- zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
- for(i=0; zContentCols && i<p->nColumn; i++){
- char *z = p->azColumn[i];
- zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
- }
- if( zLanguageid && zContentCols ){
- zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
- }
- if( zContentCols==0 ) rc = SQLITE_NOMEM;
-
- /* Create the content table */
- fts3DbExec(&rc, db,
- "CREATE TABLE %Q.'%q_content'(%s)",
- p->zDb, p->zName, zContentCols
- );
- sqlite3_free(zContentCols);
- }
-
- /* Create other tables */
- fts3DbExec(&rc, db,
- "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
- p->zDb, p->zName
- );
- fts3DbExec(&rc, db,
- "CREATE TABLE %Q.'%q_segdir'("
- "level INTEGER,"
- "idx INTEGER,"
- "start_block INTEGER,"
- "leaves_end_block INTEGER,"
- "end_block INTEGER,"
- "root BLOB,"
- "PRIMARY KEY(level, idx)"
- ");",
- p->zDb, p->zName
- );
- if( p->bHasDocsize ){
- fts3DbExec(&rc, db,
- "CREATE TABLE %Q.'%q_docsize'(docid INTEGER PRIMARY KEY, size BLOB);",
- p->zDb, p->zName
- );
- }
- assert( p->bHasStat==p->bFts4 );
- if( p->bHasStat ){
- sqlite3Fts3CreateStatTable(&rc, p);
- }
- return rc;
-}
-
-/*
-** Store the current database page-size in bytes in p->nPgsz.
-**
-** If *pRc is non-zero when this function is called, it is a no-op.
-** Otherwise, if an error occurs, an SQLite error code is stored in *pRc
-** before returning.
-*/
-static void fts3DatabasePageSize(int *pRc, Fts3Table *p){
- if( *pRc==SQLITE_OK ){
- int rc; /* Return code */
- char *zSql; /* SQL text "PRAGMA %Q.page_size" */
- sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */
-
- zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb);
- if( !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_step(pStmt);
- p->nPgsz = sqlite3_column_int(pStmt, 0);
- rc = sqlite3_finalize(pStmt);
- }else if( rc==SQLITE_AUTH ){
- p->nPgsz = 1024;
- rc = SQLITE_OK;
- }
- }
- assert( p->nPgsz>0 || rc!=SQLITE_OK );
- sqlite3_free(zSql);
- *pRc = rc;
- }
-}
-
-/*
-** "Special" FTS4 arguments are column specifications of the following form:
-**
-** <key> = <value>
-**
-** There may not be whitespace surrounding the "=" character. The <value>
-** term may be quoted, but the <key> may not.
-*/
-static int fts3IsSpecialColumn(
- const char *z,
- int *pnKey,
- char **pzValue
-){
- char *zValue;
- const char *zCsr = z;
-
- while( *zCsr!='=' ){
- if( *zCsr=='\0' ) return 0;
- zCsr++;
- }
-
- *pnKey = (int)(zCsr-z);
- zValue = sqlite3_mprintf("%s", &zCsr[1]);
- if( zValue ){
- sqlite3Fts3Dequote(zValue);
- }
- *pzValue = zValue;
- return 1;
-}
-
-/*
-** Append the output of a printf() style formatting to an existing string.
-*/
-static void fts3Appendf(
- int *pRc, /* IN/OUT: Error code */
- char **pz, /* IN/OUT: Pointer to string buffer */
- const char *zFormat, /* Printf format string to append */
- ... /* Arguments for printf format string */
-){
- if( *pRc==SQLITE_OK ){
- va_list ap;
- char *z;
- va_start(ap, zFormat);
- z = sqlite3_vmprintf(zFormat, ap);
- va_end(ap);
- if( z && *pz ){
- char *z2 = sqlite3_mprintf("%s%s", *pz, z);
- sqlite3_free(z);
- z = z2;
- }
- if( z==0 ) *pRc = SQLITE_NOMEM;
- sqlite3_free(*pz);
- *pz = z;
- }
-}
-
-/*
-** Return a copy of input string zInput enclosed in double-quotes (") and
-** with all double quote characters escaped. For example:
-**
-** fts3QuoteId("un \"zip\"") -> "un \"\"zip\"\""
-**
-** The pointer returned points to memory obtained from sqlite3_malloc(). It
-** is the callers responsibility to call sqlite3_free() to release this
-** memory.
-*/
-static char *fts3QuoteId(char const *zInput){
- int nRet;
- char *zRet;
- nRet = 2 + (int)strlen(zInput)*2 + 1;
- zRet = sqlite3_malloc(nRet);
- if( zRet ){
- int i;
- char *z = zRet;
- *(z++) = '"';
- for(i=0; zInput[i]; i++){
- if( zInput[i]=='"' ) *(z++) = '"';
- *(z++) = zInput[i];
- }
- *(z++) = '"';
- *(z++) = '\0';
- }
- return zRet;
-}
-
-/*
-** Return a list of comma separated SQL expressions and a FROM clause that
-** could be used in a SELECT statement such as the following:
-**
-** SELECT <list of expressions> FROM %_content AS x ...
-**
-** to return the docid, followed by each column of text data in order
-** from left to write. If parameter zFunc is not NULL, then instead of
-** being returned directly each column of text data is passed to an SQL
-** function named zFunc first. For example, if zFunc is "unzip" and the
-** table has the three user-defined columns "a", "b", and "c", the following
-** string is returned:
-**
-** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
-**
-** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
-** is the responsibility of the caller to eventually free it.
-**
-** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
-** a NULL pointer is returned). Otherwise, if an OOM error is encountered
-** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
-** no error occurs, *pRc is left unmodified.
-*/
-static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
- char *zRet = 0;
- char *zFree = 0;
- char *zFunction;
- int i;
-
- if( p->zContentTbl==0 ){
- if( !zFunc ){
- zFunction = "";
- }else{
- zFree = zFunction = fts3QuoteId(zFunc);
- }
- fts3Appendf(pRc, &zRet, "docid");
- for(i=0; i<p->nColumn; i++){
- fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
- }
- if( p->zLanguageid ){
- fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
- }
- sqlite3_free(zFree);
- }else{
- fts3Appendf(pRc, &zRet, "rowid");
- for(i=0; i<p->nColumn; i++){
- fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
- }
- if( p->zLanguageid ){
- fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
- }
- }
- fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x",
- p->zDb,
- (p->zContentTbl ? p->zContentTbl : p->zName),
- (p->zContentTbl ? "" : "_content")
- );
- return zRet;
-}
-
-/*
-** Return a list of N comma separated question marks, where N is the number
-** of columns in the %_content table (one for the docid plus one for each
-** user-defined text column).
-**
-** If argument zFunc is not NULL, then all but the first question mark
-** is preceded by zFunc and an open bracket, and followed by a closed
-** bracket. For example, if zFunc is "zip" and the FTS3 table has three
-** user-defined text columns, the following string is returned:
-**
-** "?, zip(?), zip(?), zip(?)"
-**
-** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
-** is the responsibility of the caller to eventually free it.
-**
-** If *pRc is not SQLITE_OK when this function is called, it is a no-op (and
-** a NULL pointer is returned). Otherwise, if an OOM error is encountered
-** by this function, NULL is returned and *pRc is set to SQLITE_NOMEM. If
-** no error occurs, *pRc is left unmodified.
-*/
-static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
- char *zRet = 0;
- char *zFree = 0;
- char *zFunction;
- int i;
-
- if( !zFunc ){
- zFunction = "";
- }else{
- zFree = zFunction = fts3QuoteId(zFunc);
- }
- fts3Appendf(pRc, &zRet, "?");
- for(i=0; i<p->nColumn; i++){
- fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
- }
- if( p->zLanguageid ){
- fts3Appendf(pRc, &zRet, ", ?");
- }
- sqlite3_free(zFree);
- return zRet;
-}
-
-/*
-** This function interprets the string at (*pp) as a non-negative integer
-** value. It reads the integer and sets *pnOut to the value read, then
-** sets *pp to point to the byte immediately following the last byte of
-** the integer value.
-**
-** Only decimal digits ('0'..'9') may be part of an integer value.
-**
-** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
-** the output value undefined. Otherwise SQLITE_OK is returned.
-**
-** This function is used when parsing the "prefix=" FTS4 parameter.
-*/
-static int fts3GobbleInt(const char **pp, int *pnOut){
- const char *p; /* Iterator pointer */
- int nInt = 0; /* Output value */
-
- for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
- nInt = nInt * 10 + (p[0] - '0');
- }
- if( p==*pp ) return SQLITE_ERROR;
- *pnOut = nInt;
- *pp = p;
- return SQLITE_OK;
-}
-
-/*
-** This function is called to allocate an array of Fts3Index structures
-** representing the indexes maintained by the current FTS table. FTS tables
-** always maintain the main "terms" index, but may also maintain one or
-** more "prefix" indexes, depending on the value of the "prefix=" parameter
-** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
-**
-** Argument zParam is passed the value of the "prefix=" option if one was
-** specified, or NULL otherwise.
-**
-** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
-** the allocated array. *pnIndex is set to the number of elements in the
-** array. If an error does occur, an SQLite error code is returned.
-**
-** Regardless of whether or not an error is returned, it is the responsibility
-** of the caller to call sqlite3_free() on the output array to free it.
-*/
-static int fts3PrefixParameter(
- const char *zParam, /* ABC in prefix=ABC parameter to parse */
- int *pnIndex, /* OUT: size of *apIndex[] array */
- struct Fts3Index **apIndex /* OUT: Array of indexes for this table */
-){
- struct Fts3Index *aIndex; /* Allocated array */
- int nIndex = 1; /* Number of entries in array */
-
- if( zParam && zParam[0] ){
- const char *p;
- nIndex++;
- for(p=zParam; *p; p++){
- if( *p==',' ) nIndex++;
- }
- }
-
- aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
- *apIndex = aIndex;
- *pnIndex = nIndex;
- if( !aIndex ){
- return SQLITE_NOMEM;
- }
-
- memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
- if( zParam ){
- const char *p = zParam;
- int i;
- for(i=1; i<nIndex; i++){
- int nPrefix;
- if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR;
- aIndex[i].nPrefix = nPrefix;
- p++;
- }
- }
-
- return SQLITE_OK;
-}
-
-/*
-** This function is called when initializing an FTS4 table that uses the
-** content=xxx option. It determines the number of and names of the columns
-** of the new FTS4 table.
-**
-** The third argument passed to this function is the value passed to the
-** config=xxx option (i.e. "xxx"). This function queries the database for
-** a table of that name. If found, the output variables are populated
-** as follows:
-**
-** *pnCol: Set to the number of columns table xxx has,
-**
-** *pnStr: Set to the total amount of space required to store a copy
-** of each columns name, including the nul-terminator.
-**
-** *pazCol: Set to point to an array of *pnCol strings. Each string is
-** the name of the corresponding column in table xxx. The array
-** and its contents are allocated using a single allocation. It
-** is the responsibility of the caller to free this allocation
-** by eventually passing the *pazCol value to sqlite3_free().
-**
-** If the table cannot be found, an error code is returned and the output
-** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
-** returned (and the output variables are undefined).
-*/
-static int fts3ContentColumns(
- sqlite3 *db, /* Database handle */
- const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */
- const char *zTbl, /* Name of content table */
- const char ***pazCol, /* OUT: Malloc'd array of column names */
- int *pnCol, /* OUT: Size of array *pazCol */
- int *pnStr /* OUT: Bytes of string content */
-){
- int rc = SQLITE_OK; /* Return code */
- char *zSql; /* "SELECT *" statement on zTbl */
- sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */
-
- zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
- if( !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
- }
- sqlite3_free(zSql);
-
- if( rc==SQLITE_OK ){
- const char **azCol; /* Output array */
- int nStr = 0; /* Size of all column names (incl. 0x00) */
- int nCol; /* Number of table columns */
- int i; /* Used to iterate through columns */
-
- /* Loop through the returned columns. Set nStr to the number of bytes of
- ** space required to store a copy of each column name, including the
- ** nul-terminator byte. */
- nCol = sqlite3_column_count(pStmt);
- for(i=0; i<nCol; i++){
- const char *zCol = sqlite3_column_name(pStmt, i);
- nStr += (int)strlen(zCol) + 1;
- }
-
- /* Allocate and populate the array to return. */
- azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr);
- if( azCol==0 ){
- rc = SQLITE_NOMEM;
- }else{
- char *p = (char *)&azCol[nCol];
- for(i=0; i<nCol; i++){
- const char *zCol = sqlite3_column_name(pStmt, i);
- int n = (int)strlen(zCol)+1;
- memcpy(p, zCol, n);
- azCol[i] = p;
- p += n;
- }
- }
- sqlite3_finalize(pStmt);
-
- /* Set the output variables. */
- *pnCol = nCol;
- *pnStr = nStr;
- *pazCol = azCol;
- }
-
- return rc;
-}
-
-/*
-** This function is the implementation of both the xConnect and xCreate
-** methods of the FTS3 virtual table.
-**
-** The argv[] array contains the following:
-**
-** argv[0] -> module name ("fts3" or "fts4")
-** argv[1] -> database name
-** argv[2] -> table name
-** argv[...] -> "column name" and other module argument fields.
-*/
-static int fts3InitVtab(
- int isCreate, /* True for xCreate, false for xConnect */
- sqlite3 *db, /* The SQLite database connection */
- void *pAux, /* Hash table containing tokenizers */
- int argc, /* Number of elements in argv array */
- const char * const *argv, /* xCreate/xConnect argument array */
- sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
- char **pzErr /* Write any error message here */
-){
- Fts3Hash *pHash = (Fts3Hash *)pAux;
- Fts3Table *p = 0; /* Pointer to allocated vtab */
- int rc = SQLITE_OK; /* Return code */
- int i; /* Iterator variable */
- int nByte; /* Size of allocation used for *p */
- int iCol; /* Column index */
- int nString = 0; /* Bytes required to hold all column names */
- int nCol = 0; /* Number of columns in the FTS table */
- char *zCsr; /* Space for holding column names */
- int nDb; /* Bytes required to hold database name */
- int nName; /* Bytes required to hold table name */
- int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
- const char **aCol; /* Array of column names */
- sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
-
- int nIndex; /* Size of aIndex[] array */
- struct Fts3Index *aIndex = 0; /* Array of indexes for this table */
-
- /* The results of parsing supported FTS4 key=value options: */
- int bNoDocsize = 0; /* True to omit %_docsize table */
- int bDescIdx = 0; /* True to store descending indexes */
- char *zPrefix = 0; /* Prefix parameter value (or NULL) */
- char *zCompress = 0; /* compress=? parameter (or NULL) */
- char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
- char *zContent = 0; /* content=? parameter (or NULL) */
- char *zLanguageid = 0; /* languageid=? parameter (or NULL) */
-
- assert( strlen(argv[0])==4 );
- assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
- || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
- );
-
- nDb = (int)strlen(argv[1]) + 1;
- nName = (int)strlen(argv[2]) + 1;
-
- aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) );
- if( !aCol ) return SQLITE_NOMEM;
- memset((void *)aCol, 0, sizeof(const char *) * (argc-2));
-
- /* Loop through all of the arguments passed by the user to the FTS3/4
- ** module (i.e. all the column names and special arguments). This loop
- ** does the following:
- **
- ** + Figures out the number of columns the FTSX table will have, and
- ** the number of bytes of space that must be allocated to store copies
- ** of the column names.
- **
- ** + If there is a tokenizer specification included in the arguments,
- ** initializes the tokenizer pTokenizer.
- */
- for(i=3; rc==SQLITE_OK && i<argc; i++){
- char const *z = argv[i];
- int nKey;
- char *zVal;
-
- /* Check if this is a tokenizer specification */
- if( !pTokenizer
- && strlen(z)>8
- && 0==sqlite3_strnicmp(z, "tokenize", 8)
- && 0==sqlite3Fts3IsIdChar(z[8])
- ){
- rc = sqlite3Fts3InitTokenizer(pHash, &z[9], &pTokenizer, pzErr);
- }
-
- /* Check if it is an FTS4 special argument. */
- else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){
- struct Fts4Option {
- const char *zOpt;
- int nOpt;
- } aFts4Opt[] = {
- { "matchinfo", 9 }, /* 0 -> MATCHINFO */
- { "prefix", 6 }, /* 1 -> PREFIX */
- { "compress", 8 }, /* 2 -> COMPRESS */
- { "uncompress", 10 }, /* 3 -> UNCOMPRESS */
- { "order", 5 }, /* 4 -> ORDER */
- { "content", 7 }, /* 5 -> CONTENT */
- { "languageid", 10 } /* 6 -> LANGUAGEID */
- };
-
- int iOpt;
- if( !zVal ){
- rc = SQLITE_NOMEM;
- }else{
- for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){
- struct Fts4Option *pOp = &aFts4Opt[iOpt];
- if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){
- break;
- }
- }
- if( iOpt==SizeofArray(aFts4Opt) ){
- *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
- rc = SQLITE_ERROR;
- }else{
- switch( iOpt ){
- case 0: /* MATCHINFO */
- if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){
- *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal);
- rc = SQLITE_ERROR;
- }
- bNoDocsize = 1;
- break;
-
- case 1: /* PREFIX */
- sqlite3_free(zPrefix);
- zPrefix = zVal;
- zVal = 0;
- break;
-
- case 2: /* COMPRESS */
- sqlite3_free(zCompress);
- zCompress = zVal;
- zVal = 0;
- break;
-
- case 3: /* UNCOMPRESS */
- sqlite3_free(zUncompress);
- zUncompress = zVal;
- zVal = 0;
- break;
-
- case 4: /* ORDER */
- if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3))
- && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4))
- ){
- *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
- rc = SQLITE_ERROR;
- }
- bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
- break;
-
- case 5: /* CONTENT */
- sqlite3_free(zContent);
- zContent = zVal;
- zVal = 0;
- break;
-
- case 6: /* LANGUAGEID */
- assert( iOpt==6 );
- sqlite3_free(zLanguageid);
- zLanguageid = zVal;
- zVal = 0;
- break;
- }
- }
- sqlite3_free(zVal);
- }
- }
-
- /* Otherwise, the argument is a column name. */
- else {
- nString += (int)(strlen(z) + 1);
- aCol[nCol++] = z;
- }
- }
-
- /* If a content=xxx option was specified, the following:
- **
- ** 1. Ignore any compress= and uncompress= options.
- **
- ** 2. If no column names were specified as part of the CREATE VIRTUAL
- ** TABLE statement, use all columns from the content table.
- */
- if( rc==SQLITE_OK && zContent ){
- sqlite3_free(zCompress);
- sqlite3_free(zUncompress);
- zCompress = 0;
- zUncompress = 0;
- if( nCol==0 ){
- sqlite3_free((void*)aCol);
- aCol = 0;
- rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
-
- /* If a languageid= option was specified, remove the language id
- ** column from the aCol[] array. */
- if( rc==SQLITE_OK && zLanguageid ){
- int j;
- for(j=0; j<nCol; j++){
- if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
- int k;
- for(k=j; k<nCol; k++) aCol[k] = aCol[k+1];
- nCol--;
- break;
- }
- }
- }
- }
- }
- if( rc!=SQLITE_OK ) goto fts3_init_out;
-
- if( nCol==0 ){
- assert( nString==0 );
- aCol[0] = "content";
- nString = 8;
- nCol = 1;
- }
-
- if( pTokenizer==0 ){
- rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
- if( rc!=SQLITE_OK ) goto fts3_init_out;
- }
- assert( pTokenizer );
-
- rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
- if( rc==SQLITE_ERROR ){
- assert( zPrefix );
- *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
- }
- if( rc!=SQLITE_OK ) goto fts3_init_out;
-
- /* Allocate and populate the Fts3Table structure. */
- nByte = sizeof(Fts3Table) + /* Fts3Table */
- nCol * sizeof(char *) + /* azColumn */
- nIndex * sizeof(struct Fts3Index) + /* aIndex */
- nName + /* zName */
- nDb + /* zDb */
- nString; /* Space for azColumn strings */
- p = (Fts3Table*)sqlite3_malloc(nByte);
- if( p==0 ){
- rc = SQLITE_NOMEM;
- goto fts3_init_out;
- }
- memset(p, 0, nByte);
- p->db = db;
- p->nColumn = nCol;
- p->nPendingData = 0;
- p->azColumn = (char **)&p[1];
- p->pTokenizer = pTokenizer;
- p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
- p->bHasDocsize = (isFts4 && bNoDocsize==0);
- p->bHasStat = isFts4;
- p->bFts4 = isFts4;
- p->bDescIdx = bDescIdx;
- p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */
- p->zContentTbl = zContent;
- p->zLanguageid = zLanguageid;
- zContent = 0;
- zLanguageid = 0;
- TESTONLY( p->inTransaction = -1 );
- TESTONLY( p->mxSavepoint = -1 );
-
- p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
- memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
- p->nIndex = nIndex;
- for(i=0; i<nIndex; i++){
- fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1);
- }
-
- /* Fill in the zName and zDb fields of the vtab structure. */
- zCsr = (char *)&p->aIndex[nIndex];
- p->zName = zCsr;
- memcpy(zCsr, argv[2], nName);
- zCsr += nName;
- p->zDb = zCsr;
- memcpy(zCsr, argv[1], nDb);
- zCsr += nDb;
-
- /* Fill in the azColumn array */
- for(iCol=0; iCol<nCol; iCol++){
- char *z;
- int n = 0;
- z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n);
- memcpy(zCsr, z, n);
- zCsr[n] = '\0';
- sqlite3Fts3Dequote(zCsr);
- p->azColumn[iCol] = zCsr;
- zCsr += n+1;
- assert( zCsr <= &((char *)p)[nByte] );
- }
-
- if( (zCompress==0)!=(zUncompress==0) ){
- char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
- rc = SQLITE_ERROR;
- *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss);
- }
- p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc);
- p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc);
- if( rc!=SQLITE_OK ) goto fts3_init_out;
-
- /* If this is an xCreate call, create the underlying tables in the
- ** database. TODO: For xConnect(), it could verify that said tables exist.
- */
- if( isCreate ){
- rc = fts3CreateTables(p);
- }
-
- /* Check to see if a legacy fts3 table has been "upgraded" by the
- ** addition of a %_stat table so that it can use incremental merge.
- */
- if( !isFts4 && !isCreate ){
- int rc2 = SQLITE_OK;
- fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2",
- p->zDb, p->zName);
- if( rc2==SQLITE_OK ) p->bHasStat = 1;
- }
-
- /* Figure out the page-size for the database. This is required in order to
- ** estimate the cost of loading large doclists from the database. */
- fts3DatabasePageSize(&rc, p);
- p->nNodeSize = p->nPgsz-35;
-
- /* Declare the table schema to SQLite. */
- fts3DeclareVtab(&rc, p);
-
-fts3_init_out:
- sqlite3_free(zPrefix);
- sqlite3_free(aIndex);
- sqlite3_free(zCompress);
- sqlite3_free(zUncompress);
- sqlite3_free(zContent);
- sqlite3_free(zLanguageid);
- sqlite3_free((void *)aCol);
- if( rc!=SQLITE_OK ){
- if( p ){
- fts3DisconnectMethod((sqlite3_vtab *)p);
- }else if( pTokenizer ){
- pTokenizer->pModule->xDestroy(pTokenizer);
- }
- }else{
- assert( p->pSegments==0 );
- *ppVTab = &p->base;
- }
- return rc;
-}
-
-/*
-** The xConnect() and xCreate() methods for the virtual table. All the
-** work is done in function fts3InitVtab().
-*/
-static int fts3ConnectMethod(
- sqlite3 *db, /* Database connection */
- void *pAux, /* Pointer to tokenizer hash table */
- int argc, /* Number of elements in argv array */
- const char * const *argv, /* xCreate/xConnect argument array */
- sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
- char **pzErr /* OUT: sqlite3_malloc'd error message */
-){
- return fts3InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
-}
-static int fts3CreateMethod(
- sqlite3 *db, /* Database connection */
- void *pAux, /* Pointer to tokenizer hash table */
- int argc, /* Number of elements in argv array */
- const char * const *argv, /* xCreate/xConnect argument array */
- sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
- char **pzErr /* OUT: sqlite3_malloc'd error message */
-){
- return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
-}
-
-/*
-** Implementation of the xBestIndex method for FTS3 tables. There
-** are three possible strategies, in order of preference:
-**
-** 1. Direct lookup by rowid or docid.
-** 2. Full-text search using a MATCH operator on a non-docid column.
-** 3. Linear scan of %_content table.
-*/
-static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
- Fts3Table *p = (Fts3Table *)pVTab;
- int i; /* Iterator variable */
- int iCons = -1; /* Index of constraint to use */
- int iLangidCons = -1; /* Index of langid=x constraint, if present */
-
- /* By default use a full table scan. This is an expensive option,
- ** so search through the constraints to see if a more efficient
- ** strategy is possible.
- */
- pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
- pInfo->estimatedCost = 500000;
- for(i=0; i<pInfo->nConstraint; i++){
- struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
- if( pCons->usable==0 ) continue;
-
- /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
- if( iCons<0
- && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
- && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
- ){
- pInfo->idxNum = FTS3_DOCID_SEARCH;
- pInfo->estimatedCost = 1.0;
- iCons = i;
- }
-
- /* A MATCH constraint. Use a full-text search.
- **
- ** If there is more than one MATCH constraint available, use the first
- ** one encountered. If there is both a MATCH constraint and a direct
- ** rowid/docid lookup, prefer the MATCH strategy. This is done even
- ** though the rowid/docid lookup is faster than a MATCH query, selecting
- ** it would lead to an "unable to use function MATCH in the requested
- ** context" error.
- */
- if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH
- && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
- ){
- pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
- pInfo->estimatedCost = 2.0;
- iCons = i;
- }
-
- /* Equality constraint on the langid column */
- if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
- && pCons->iColumn==p->nColumn + 2
- ){
- iLangidCons = i;
- }
- }
-
- if( iCons>=0 ){
- pInfo->aConstraintUsage[iCons].argvIndex = 1;
- pInfo->aConstraintUsage[iCons].omit = 1;
- }
- if( iLangidCons>=0 ){
- pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
- }
-
- /* Regardless of the strategy selected, FTS can deliver rows in rowid (or
- ** docid) order. Both ascending and descending are possible.
- */
- if( pInfo->nOrderBy==1 ){
- struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
- if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
- if( pOrder->desc ){
- pInfo->idxStr = "DESC";
- }else{
- pInfo->idxStr = "ASC";
- }
- pInfo->orderByConsumed = 1;
- }
- }
-
- assert( p->pSegments==0 );
- return SQLITE_OK;
-}
-
-/*
-** Implementation of xOpen method.
-*/
-static int fts3OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
- sqlite3_vtab_cursor *pCsr; /* Allocated cursor */
-
- UNUSED_PARAMETER(pVTab);
-
- /* Allocate a buffer large enough for an Fts3Cursor structure. If the
- ** allocation succeeds, zero it and return SQLITE_OK. Otherwise,
- ** if the allocation fails, return SQLITE_NOMEM.
- */
- *ppCsr = pCsr = (sqlite3_vtab_cursor *)sqlite3_malloc(sizeof(Fts3Cursor));
- if( !pCsr ){
- return SQLITE_NOMEM;
- }
- memset(pCsr, 0, sizeof(Fts3Cursor));
- return SQLITE_OK;
-}
-
-/*
-** Close the cursor. For additional information see the documentation
-** on the xClose method of the virtual table interface.
-*/
-static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
- Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
- assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
- sqlite3_finalize(pCsr->pStmt);
- sqlite3Fts3ExprFree(pCsr->pExpr);
- sqlite3Fts3FreeDeferredTokens(pCsr);
- sqlite3_free(pCsr->aDoclist);
- sqlite3_free(pCsr->aMatchinfo);
- assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
- sqlite3_free(pCsr);
- return SQLITE_OK;
-}
-
-/*
-** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
-** compose and prepare an SQL statement of the form:
-**
-** "SELECT <columns> FROM %_content WHERE rowid = ?"
-**
-** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
-** it. If an error occurs, return an SQLite error code.
-**
-** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK.
-*/
-static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){
- int rc = SQLITE_OK;
- if( pCsr->pStmt==0 ){
- Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
- char *zSql;
- zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
- if( !zSql ) return SQLITE_NOMEM;
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
- sqlite3_free(zSql);
- }
- *ppStmt = pCsr->pStmt;
- return rc;
-}
-
-/*
-** Position the pCsr->pStmt statement so that it is on the row
-** of the %_content table that contains the last match. Return
-** SQLITE_OK on success.
-*/
-static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
- int rc = SQLITE_OK;
- if( pCsr->isRequireSeek ){
- sqlite3_stmt *pStmt = 0;
-
- rc = fts3CursorSeekStmt(pCsr, &pStmt);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
- pCsr->isRequireSeek = 0;
- if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
- return SQLITE_OK;
- }else{
- rc = sqlite3_reset(pCsr->pStmt);
- if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
- /* If no row was found and no error has occured, then the %_content
- ** table is missing a row that is present in the full-text index.
- ** The data structures are corrupt. */
- rc = FTS_CORRUPT_VTAB;
- pCsr->isEof = 1;
- }
- }
- }
- }
-
- if( rc!=SQLITE_OK && pContext ){
- sqlite3_result_error_code(pContext, rc);
- }
- return rc;
-}
-
-/*
-** This function is used to process a single interior node when searching
-** a b-tree for a term or term prefix. The node data is passed to this
-** function via the zNode/nNode parameters. The term to search for is
-** passed in zTerm/nTerm.
-**
-** If piFirst is not NULL, then this function sets *piFirst to the blockid
-** of the child node that heads the sub-tree that may contain the term.
-**
-** If piLast is not NULL, then *piLast is set to the right-most child node
-** that heads a sub-tree that may contain a term for which zTerm/nTerm is
-** a prefix.
-**
-** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
-*/
-static int fts3ScanInteriorNode(
- const char *zTerm, /* Term to select leaves for */
- int nTerm, /* Size of term zTerm in bytes */
- const char *zNode, /* Buffer containing segment interior node */
- int nNode, /* Size of buffer at zNode */
- sqlite3_int64 *piFirst, /* OUT: Selected child node */
- sqlite3_int64 *piLast /* OUT: Selected child node */
-){
- int rc = SQLITE_OK; /* Return code */
- const char *zCsr = zNode; /* Cursor to iterate through node */
- const char *zEnd = &zCsr[nNode];/* End of interior node buffer */
- char *zBuffer = 0; /* Buffer to load terms into */
- int nAlloc = 0; /* Size of allocated buffer */
- int isFirstTerm = 1; /* True when processing first term on page */
- sqlite3_int64 iChild; /* Block id of child node to descend to */
-
- /* Skip over the 'height' varint that occurs at the start of every
- ** interior node. Then load the blockid of the left-child of the b-tree
- ** node into variable iChild.
- **
- ** Even if the data structure on disk is corrupted, this (reading two
- ** varints from the buffer) does not risk an overread. If zNode is a
- ** root node, then the buffer comes from a SELECT statement. SQLite does
- ** not make this guarantee explicitly, but in practice there are always
- ** either more than 20 bytes of allocated space following the nNode bytes of
- ** contents, or two zero bytes. Or, if the node is read from the %_segments
- ** table, then there are always 20 bytes of zeroed padding following the
- ** nNode bytes of content (see sqlite3Fts3ReadBlock() for details).
- */
- zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
- zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
- if( zCsr>zEnd ){
- return FTS_CORRUPT_VTAB;
- }
-
- while( zCsr<zEnd && (piFirst || piLast) ){
- int cmp; /* memcmp() result */
- int nSuffix; /* Size of term suffix */
- int nPrefix = 0; /* Size of term prefix */
- int nBuffer; /* Total term size */
-
- /* Load the next term on the node into zBuffer. Use realloc() to expand
- ** the size of zBuffer if required. */
- if( !isFirstTerm ){
- zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix);
- }
- isFirstTerm = 0;
- zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
-
- if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
- rc = FTS_CORRUPT_VTAB;
- goto finish_scan;
- }
- if( nPrefix+nSuffix>nAlloc ){
- char *zNew;
- nAlloc = (nPrefix+nSuffix) * 2;
- zNew = (char *)sqlite3_realloc(zBuffer, nAlloc);
- if( !zNew ){
- rc = SQLITE_NOMEM;
- goto finish_scan;
- }
- zBuffer = zNew;
- }
- assert( zBuffer );
- memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
- nBuffer = nPrefix + nSuffix;
- zCsr += nSuffix;
-
- /* Compare the term we are searching for with the term just loaded from
- ** the interior node. If the specified term is greater than or equal
- ** to the term from the interior node, then all terms on the sub-tree
- ** headed by node iChild are smaller than zTerm. No need to search
- ** iChild.
- **
- ** If the interior node term is larger than the specified term, then
- ** the tree headed by iChild may contain the specified term.
- */
- cmp = memcmp(zTerm, zBuffer, (nBuffer>nTerm ? nTerm : nBuffer));
- if( piFirst && (cmp<0 || (cmp==0 && nBuffer>nTerm)) ){
- *piFirst = iChild;
- piFirst = 0;
- }
-
- if( piLast && cmp<0 ){
- *piLast = iChild;
- piLast = 0;
- }
-
- iChild++;
- };
-
- if( piFirst ) *piFirst = iChild;
- if( piLast ) *piLast = iChild;
-
- finish_scan:
- sqlite3_free(zBuffer);
- return rc;
-}
-
-
-/*
-** The buffer pointed to by argument zNode (size nNode bytes) contains an
-** interior node of a b-tree segment. The zTerm buffer (size nTerm bytes)
-** contains a term. This function searches the sub-tree headed by the zNode
-** node for the range of leaf nodes that may contain the specified term
-** or terms for which the specified term is a prefix.
-**
-** If piLeaf is not NULL, then *piLeaf is set to the blockid of the
-** left-most leaf node in the tree that may contain the specified term.
-** If piLeaf2 is not NULL, then *piLeaf2 is set to the blockid of the
-** right-most leaf node that may contain a term for which the specified
-** term is a prefix.
-**
-** It is possible that the range of returned leaf nodes does not contain
-** the specified term or any terms for which it is a prefix. However, if the
-** segment does contain any such terms, they are stored within the identified
-** range. Because this function only inspects interior segment nodes (and
-** never loads leaf nodes into memory), it is not possible to be sure.
-**
-** If an error occurs, an error code other than SQLITE_OK is returned.
-*/
-static int fts3SelectLeaf(
- Fts3Table *p, /* Virtual table handle */
- const char *zTerm, /* Term to select leaves for */
- int nTerm, /* Size of term zTerm in bytes */
- const char *zNode, /* Buffer containing segment interior node */
- int nNode, /* Size of buffer at zNode */
- sqlite3_int64 *piLeaf, /* Selected leaf node */
- sqlite3_int64 *piLeaf2 /* Selected leaf node */
-){
- int rc; /* Return code */
- int iHeight; /* Height of this node in tree */
-
- assert( piLeaf || piLeaf2 );
-
- sqlite3Fts3GetVarint32(zNode, &iHeight);
- rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2);
- assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) );
-
- if( rc==SQLITE_OK && iHeight>1 ){
- char *zBlob = 0; /* Blob read from %_segments table */
- int nBlob; /* Size of zBlob in bytes */
-
- if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){
- rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0);
- if( rc==SQLITE_OK ){
- rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0);
- }
- sqlite3_free(zBlob);
- piLeaf = 0;
- zBlob = 0;
- }
-
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0);
- }
- if( rc==SQLITE_OK ){
- rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2);
- }
- sqlite3_free(zBlob);
- }
-
- return rc;
-}
-
-/*
-** This function is used to create delta-encoded serialized lists of FTS3
-** varints. Each call to this function appends a single varint to a list.
-*/
-static void fts3PutDeltaVarint(
- char **pp, /* IN/OUT: Output pointer */
- sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
- sqlite3_int64 iVal /* Write this value to the list */
-){
- assert( iVal-*piPrev > 0 || (*piPrev==0 && iVal==0) );
- *pp += sqlite3Fts3PutVarint(*pp, iVal-*piPrev);
- *piPrev = iVal;
-}
-
-/*
-** When this function is called, *ppPoslist is assumed to point to the
-** start of a position-list. After it returns, *ppPoslist points to the
-** first byte after the position-list.
-**
-** A position list is list of positions (delta encoded) and columns for
-** a single document record of a doclist. So, in other words, this
-** routine advances *ppPoslist so that it points to the next docid in
-** the doclist, or to the first byte past the end of the doclist.
-**
-** If pp is not NULL, then the contents of the position list are copied
-** to *pp. *pp is set to point to the first byte past the last byte copied
-** before this function returns.
-*/
-static void fts3PoslistCopy(char **pp, char **ppPoslist){
- char *pEnd = *ppPoslist;
- char c = 0;
-
- /* The end of a position list is marked by a zero encoded as an FTS3
- ** varint. A single POS_END (0) byte. Except, if the 0 byte is preceded by
- ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail
- ** of some other, multi-byte, value.
- **
- ** The following while-loop moves pEnd to point to the first byte that is not
- ** immediately preceded by a byte with the 0x80 bit set. Then increments
- ** pEnd once more so that it points to the byte immediately following the
- ** last byte in the position-list.
- */
- while( *pEnd | c ){
- c = *pEnd++ & 0x80;
- testcase( c!=0 && (*pEnd)==0 );
- }
- pEnd++; /* Advance past the POS_END terminator byte */
-
- if( pp ){
- int n = (int)(pEnd - *ppPoslist);
- char *p = *pp;
- memcpy(p, *ppPoslist, n);
- p += n;
- *pp = p;
- }
- *ppPoslist = pEnd;
-}
-
-/*
-** When this function is called, *ppPoslist is assumed to point to the
-** start of a column-list. After it returns, *ppPoslist points to the
-** to the terminator (POS_COLUMN or POS_END) byte of the column-list.
-**
-** A column-list is list of delta-encoded positions for a single column
-** within a single document within a doclist.
-**
-** The column-list is terminated either by a POS_COLUMN varint (1) or
-** a POS_END varint (0). This routine leaves *ppPoslist pointing to
-** the POS_COLUMN or POS_END that terminates the column-list.
-**
-** If pp is not NULL, then the contents of the column-list are copied
-** to *pp. *pp is set to point to the first byte past the last byte copied
-** before this function returns. The POS_COLUMN or POS_END terminator
-** is not copied into *pp.
-*/
-static void fts3ColumnlistCopy(char **pp, char **ppPoslist){
- char *pEnd = *ppPoslist;
- char c = 0;
-
- /* A column-list is terminated by either a 0x01 or 0x00 byte that is
- ** not part of a multi-byte varint.
- */
- while( 0xFE & (*pEnd | c) ){
- c = *pEnd++ & 0x80;
- testcase( c!=0 && ((*pEnd)&0xfe)==0 );
- }
- if( pp ){
- int n = (int)(pEnd - *ppPoslist);
- char *p = *pp;
- memcpy(p, *ppPoslist, n);
- p += n;
- *pp = p;
- }
- *ppPoslist = pEnd;
-}
-
-/*
-** Value used to signify the end of an position-list. This is safe because
-** it is not possible to have a document with 2^31 terms.
-*/
-#define POSITION_LIST_END 0x7fffffff
-
-/*
-** This function is used to help parse position-lists. When this function is
-** called, *pp may point to the start of the next varint in the position-list
-** being parsed, or it may point to 1 byte past the end of the position-list
-** (in which case **pp will be a terminator bytes POS_END (0) or
-** (1)).
-**
-** If *pp points past the end of the current position-list, set *pi to
-** POSITION_LIST_END and return. Otherwise, read the next varint from *pp,
-** increment the current value of *pi by the value read, and set *pp to
-** point to the next value before returning.
-**
-** Before calling this routine *pi must be initialized to the value of
-** the previous position, or zero if we are reading the first position
-** in the position-list. Because positions are delta-encoded, the value
-** of the previous position is needed in order to compute the value of
-** the next position.
-*/
-static void fts3ReadNextPos(
- char **pp, /* IN/OUT: Pointer into position-list buffer */
- sqlite3_int64 *pi /* IN/OUT: Value read from position-list */
-){
- if( (**pp)&0xFE ){
- fts3GetDeltaVarint(pp, pi);
- *pi -= 2;
- }else{
- *pi = POSITION_LIST_END;
- }
-}
-
-/*
-** If parameter iCol is not 0, write an POS_COLUMN (1) byte followed by
-** the value of iCol encoded as a varint to *pp. This will start a new
-** column list.
-**
-** Set *pp to point to the byte just after the last byte written before
-** returning (do not modify it if iCol==0). Return the total number of bytes
-** written (0 if iCol==0).
-*/
-static int fts3PutColNumber(char **pp, int iCol){
- int n = 0; /* Number of bytes written */
- if( iCol ){
- char *p = *pp; /* Output pointer */
- n = 1 + sqlite3Fts3PutVarint(&p[1], iCol);
- *p = 0x01;
- *pp = &p[n];
- }
- return n;
-}
-
-/*
-** Compute the union of two position lists. The output written
-** into *pp contains all positions of both *pp1 and *pp2 in sorted
-** order and with any duplicates removed. All pointers are
-** updated appropriately. The caller is responsible for insuring
-** that there is enough space in *pp to hold the complete output.
-*/
-static void fts3PoslistMerge(
- char **pp, /* Output buffer */
- char **pp1, /* Left input list */
- char **pp2 /* Right input list */
-){
- char *p = *pp;
- char *p1 = *pp1;
- char *p2 = *pp2;
-
- while( *p1 || *p2 ){
- int iCol1; /* The current column index in pp1 */
- int iCol2; /* The current column index in pp2 */
-
- if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1);
- else if( *p1==POS_END ) iCol1 = POSITION_LIST_END;
- else iCol1 = 0;
-
- if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2);
- else if( *p2==POS_END ) iCol2 = POSITION_LIST_END;
- else iCol2 = 0;
-
- if( iCol1==iCol2 ){
- sqlite3_int64 i1 = 0; /* Last position from pp1 */
- sqlite3_int64 i2 = 0; /* Last position from pp2 */
- sqlite3_int64 iPrev = 0;
- int n = fts3PutColNumber(&p, iCol1);
- p1 += n;
- p2 += n;
-
- /* At this point, both p1 and p2 point to the start of column-lists
- ** for the same column (the column with index iCol1 and iCol2).
- ** A column-list is a list of non-negative delta-encoded varints, each
- ** incremented by 2 before being stored. Each list is terminated by a
- ** POS_END (0) or POS_COLUMN (1). The following block merges the two lists
- ** and writes the results to buffer p. p is left pointing to the byte
- ** after the list written. No terminator (POS_END or POS_COLUMN) is
- ** written to the output.
- */
- fts3GetDeltaVarint(&p1, &i1);
- fts3GetDeltaVarint(&p2, &i2);
- do {
- fts3PutDeltaVarint(&p, &iPrev, (i1<i2) ? i1 : i2);
- iPrev -= 2;
- if( i1==i2 ){
- fts3ReadNextPos(&p1, &i1);
- fts3ReadNextPos(&p2, &i2);
- }else if( i1<i2 ){
- fts3ReadNextPos(&p1, &i1);
- }else{
- fts3ReadNextPos(&p2, &i2);
- }
- }while( i1!=POSITION_LIST_END || i2!=POSITION_LIST_END );
- }else if( iCol1<iCol2 ){
- p1 += fts3PutColNumber(&p, iCol1);
- fts3ColumnlistCopy(&p, &p1);
- }else{
- p2 += fts3PutColNumber(&p, iCol2);
- fts3ColumnlistCopy(&p, &p2);
- }
- }
-
- *p++ = POS_END;
- *pp = p;
- *pp1 = p1 + 1;
- *pp2 = p2 + 1;
-}
-
-/*
-** This function is used to merge two position lists into one. When it is
-** called, *pp1 and *pp2 must both point to position lists. A position-list is
-** the part of a doclist that follows each document id. For example, if a row
-** contains:
-**
-** 'a b c'|'x y z'|'a b b a'
-**
-** Then the position list for this row for token 'b' would consist of:
-**
-** 0x02 0x01 0x02 0x03 0x03 0x00
-**
-** When this function returns, both *pp1 and *pp2 are left pointing to the
-** byte following the 0x00 terminator of their respective position lists.
-**
-** If isSaveLeft is 0, an entry is added to the output position list for
-** each position in *pp2 for which there exists one or more positions in
-** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
-** when the *pp1 token appears before the *pp2 token, but not more than nToken
-** slots before it.
-**
-** e.g. nToken==1 searches for adjacent positions.
-*/
-static int fts3PoslistPhraseMerge(
- char **pp, /* IN/OUT: Preallocated output buffer */
- int nToken, /* Maximum difference in token positions */
- int isSaveLeft, /* Save the left position */
- int isExact, /* If *pp1 is exactly nTokens before *pp2 */
- char **pp1, /* IN/OUT: Left input list */
- char **pp2 /* IN/OUT: Right input list */
-){
- char *p = *pp;
- char *p1 = *pp1;
- char *p2 = *pp2;
- int iCol1 = 0;
- int iCol2 = 0;
-
- /* Never set both isSaveLeft and isExact for the same invocation. */
- assert( isSaveLeft==0 || isExact==0 );
-
- assert( p!=0 && *p1!=0 && *p2!=0 );
- if( *p1==POS_COLUMN ){
- p1++;
- p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
- }
- if( *p2==POS_COLUMN ){
- p2++;
- p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
- }
-
- while( 1 ){
- if( iCol1==iCol2 ){
- char *pSave = p;
- sqlite3_int64 iPrev = 0;
- sqlite3_int64 iPos1 = 0;
- sqlite3_int64 iPos2 = 0;
-
- if( iCol1 ){
- *p++ = POS_COLUMN;
- p += sqlite3Fts3PutVarint(p, iCol1);
- }
-
- assert( *p1!=POS_END && *p1!=POS_COLUMN );
- assert( *p2!=POS_END && *p2!=POS_COLUMN );
- fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
- fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
-
- while( 1 ){
- if( iPos2==iPos1+nToken
- || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken)
- ){
- sqlite3_int64 iSave;
- iSave = isSaveLeft ? iPos1 : iPos2;
- fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
- pSave = 0;
- assert( p );
- }
- if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
- if( (*p2&0xFE)==0 ) break;
- fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2;
- }else{
- if( (*p1&0xFE)==0 ) break;
- fts3GetDeltaVarint(&p1, &iPos1); iPos1 -= 2;
- }
- }
-
- if( pSave ){
- assert( pp && p );
- p = pSave;
- }
-
- fts3ColumnlistCopy(0, &p1);
- fts3ColumnlistCopy(0, &p2);
- assert( (*p1&0xFE)==0 && (*p2&0xFE)==0 );
- if( 0==*p1 || 0==*p2 ) break;
-
- p1++;
- p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
- p2++;
- p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
- }
-
- /* Advance pointer p1 or p2 (whichever corresponds to the smaller of
- ** iCol1 and iCol2) so that it points to either the 0x00 that marks the
- ** end of the position list, or the 0x01 that precedes the next
- ** column-number in the position list.
- */
- else if( iCol1<iCol2 ){
- fts3ColumnlistCopy(0, &p1);
- if( 0==*p1 ) break;
- p1++;
- p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
- }else{
- fts3ColumnlistCopy(0, &p2);
- if( 0==*p2 ) break;
- p2++;
- p2 += sqlite3Fts3GetVarint32(p2, &iCol2);
- }
- }
-
- fts3PoslistCopy(0, &p2);
- fts3PoslistCopy(0, &p1);
- *pp1 = p1;
- *pp2 = p2;
- if( *pp==p ){
- return 0;
- }
- *p++ = 0x00;
- *pp = p;
- return 1;
-}
-
-/*
-** Merge two position-lists as required by the NEAR operator. The argument
-** position lists correspond to the left and right phrases of an expression
-** like:
-**
-** "phrase 1" NEAR "phrase number 2"
-**
-** Position list *pp1 corresponds to the left-hand side of the NEAR
-** expression and *pp2 to the right. As usual, the indexes in the position
-** lists are the offsets of the last token in each phrase (tokens "1" and "2"
-** in the example above).
-**
-** The output position list - written to *pp - is a copy of *pp2 with those
-** entries that are not sufficiently NEAR entries in *pp1 removed.
-*/
-static int fts3PoslistNearMerge(
- char **pp, /* Output buffer */
- char *aTmp, /* Temporary buffer space */
- int nRight, /* Maximum difference in token positions */
- int nLeft, /* Maximum difference in token positions */
- char **pp1, /* IN/OUT: Left input list */
- char **pp2 /* IN/OUT: Right input list */
-){
- char *p1 = *pp1;
- char *p2 = *pp2;
-
- char *pTmp1 = aTmp;
- char *pTmp2;
- char *aTmp2;
- int res = 1;
-
- fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
- aTmp2 = pTmp2 = pTmp1;
- *pp1 = p1;
- *pp2 = p2;
- fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
- if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
- fts3PoslistMerge(pp, &aTmp, &aTmp2);
- }else if( pTmp1!=aTmp ){
- fts3PoslistCopy(pp, &aTmp);
- }else if( pTmp2!=aTmp2 ){
- fts3PoslistCopy(pp, &aTmp2);
- }else{
- res = 0;
- }
-
- return res;
-}
-
-/*
-** An instance of this function is used to merge together the (potentially
-** large number of) doclists for each term that matches a prefix query.
-** See function fts3TermSelectMerge() for details.
-*/
-typedef struct TermSelect TermSelect;
-struct TermSelect {
- char *aaOutput[16]; /* Malloc'd output buffers */
- int anOutput[16]; /* Size each output buffer in bytes */
-};
-
-/*
-** This function is used to read a single varint from a buffer. Parameter
-** pEnd points 1 byte past the end of the buffer. When this function is
-** called, if *pp points to pEnd or greater, then the end of the buffer
-** has been reached. In this case *pp is set to 0 and the function returns.
-**
-** If *pp does not point to or past pEnd, then a single varint is read
-** from *pp. *pp is then set to point 1 byte past the end of the read varint.
-**
-** If bDescIdx is false, the value read is added to *pVal before returning.
-** If it is true, the value read is subtracted from *pVal before this
-** function returns.
-*/
-static void fts3GetDeltaVarint3(
- char **pp, /* IN/OUT: Point to read varint from */
- char *pEnd, /* End of buffer */
- int bDescIdx, /* True if docids are descending */
- sqlite3_int64 *pVal /* IN/OUT: Integer value */
-){
- if( *pp>=pEnd ){
- *pp = 0;
- }else{
- sqlite3_int64 iVal;
- *pp += sqlite3Fts3GetVarint(*pp, &iVal);
- if( bDescIdx ){
- *pVal -= iVal;
- }else{
- *pVal += iVal;
- }
- }
-}
-
-/*
-** This function is used to write a single varint to a buffer. The varint
-** is written to *pp. Before returning, *pp is set to point 1 byte past the
-** end of the value written.
-**
-** If *pbFirst is zero when this function is called, the value written to
-** the buffer is that of parameter iVal.
-**
-** If *pbFirst is non-zero when this function is called, then the value
-** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
-** (if bDescIdx is non-zero).
-**
-** Before returning, this function always sets *pbFirst to 1 and *piPrev
-** to the value of parameter iVal.
-*/
-static void fts3PutDeltaVarint3(
- char **pp, /* IN/OUT: Output pointer */
- int bDescIdx, /* True for descending docids */
- sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */
- int *pbFirst, /* IN/OUT: True after first int written */
- sqlite3_int64 iVal /* Write this value to the list */
-){
- sqlite3_int64 iWrite;
- if( bDescIdx==0 || *pbFirst==0 ){
- iWrite = iVal - *piPrev;
- }else{
- iWrite = *piPrev - iVal;
- }
- assert( *pbFirst || *piPrev==0 );
- assert( *pbFirst==0 || iWrite>0 );
- *pp += sqlite3Fts3PutVarint(*pp, iWrite);
- *piPrev = iVal;
- *pbFirst = 1;
-}
-
-
-/*
-** This macro is used by various functions that merge doclists. The two
-** arguments are 64-bit docid values. If the value of the stack variable
-** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2).
-** Otherwise, (i2-i1).
-**
-** Using this makes it easier to write code that can merge doclists that are
-** sorted in either ascending or descending order.
-*/
-#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
-
-/*
-** This function does an "OR" merge of two doclists (output contains all
-** positions contained in either argument doclist). If the docids in the
-** input doclists are sorted in ascending order, parameter bDescDoclist
-** should be false. If they are sorted in ascending order, it should be
-** passed a non-zero value.
-**
-** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
-** containing the output doclist and SQLITE_OK is returned. In this case
-** *pnOut is set to the number of bytes in the output doclist.
-**
-** If an error occurs, an SQLite error code is returned. The output values
-** are undefined in this case.
-*/
-static int fts3DoclistOrMerge(
- int bDescDoclist, /* True if arguments are desc */
- char *a1, int n1, /* First doclist */
- char *a2, int n2, /* Second doclist */
- char **paOut, int *pnOut /* OUT: Malloc'd doclist */
-){
- sqlite3_int64 i1 = 0;
- sqlite3_int64 i2 = 0;
- sqlite3_int64 iPrev = 0;
- char *pEnd1 = &a1[n1];
- char *pEnd2 = &a2[n2];
- char *p1 = a1;
- char *p2 = a2;
- char *p;
- char *aOut;
- int bFirstOut = 0;
-
- *paOut = 0;
- *pnOut = 0;
-
- /* Allocate space for the output. Both the input and output doclists
- ** are delta encoded. If they are in ascending order (bDescDoclist==0),
- ** then the first docid in each list is simply encoded as a varint. For
- ** each subsequent docid, the varint stored is the difference between the
- ** current and previous docid (a positive number - since the list is in
- ** ascending order).
- **
- ** The first docid written to the output is therefore encoded using the
- ** same number of bytes as it is in whichever of the input lists it is
- ** read from. And each subsequent docid read from the same input list
- ** consumes either the same or less bytes as it did in the input (since
- ** the difference between it and the previous value in the output must
- ** be a positive value less than or equal to the delta value read from
- ** the input list). The same argument applies to all but the first docid
- ** read from the 'other' list. And to the contents of all position lists
- ** that will be copied and merged from the input to the output.
- **
- ** However, if the first docid copied to the output is a negative number,
- ** then the encoding of the first docid from the 'other' input list may
- ** be larger in the output than it was in the input (since the delta value
- ** may be a larger positive integer than the actual docid).
- **
- ** The space required to store the output is therefore the sum of the
- ** sizes of the two inputs, plus enough space for exactly one of the input
- ** docids to grow.
- **
- ** A symetric argument may be made if the doclists are in descending
- ** order.
- */
- aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1);
- if( !aOut ) return SQLITE_NOMEM;
-
- p = aOut;
- fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
- fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
- while( p1 || p2 ){
- sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
-
- if( p2 && p1 && iDiff==0 ){
- fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
- fts3PoslistMerge(&p, &p1, &p2);
- fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
- }else if( !p2 || (p1 && iDiff<0) ){
- fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
- fts3PoslistCopy(&p, &p1);
- fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
- }else{
- fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
- fts3PoslistCopy(&p, &p2);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
- }
- }
-
- *paOut = aOut;
- *pnOut = (int)(p-aOut);
- assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 );
- return SQLITE_OK;
-}
-
-/*
-** This function does a "phrase" merge of two doclists. In a phrase merge,
-** the output contains a copy of each position from the right-hand input
-** doclist for which there is a position in the left-hand input doclist
-** exactly nDist tokens before it.
-**
-** If the docids in the input doclists are sorted in ascending order,
-** parameter bDescDoclist should be false. If they are sorted in ascending
-** order, it should be passed a non-zero value.
-**
-** The right-hand input doclist is overwritten by this function.
-*/
-static void fts3DoclistPhraseMerge(
- int bDescDoclist, /* True if arguments are desc */
- int nDist, /* Distance from left to right (1=adjacent) */
- char *aLeft, int nLeft, /* Left doclist */
- char *aRight, int *pnRight /* IN/OUT: Right/output doclist */
-){
- sqlite3_int64 i1 = 0;
- sqlite3_int64 i2 = 0;
- sqlite3_int64 iPrev = 0;
- char *pEnd1 = &aLeft[nLeft];
- char *pEnd2 = &aRight[*pnRight];
- char *p1 = aLeft;
- char *p2 = aRight;
- char *p;
- int bFirstOut = 0;
- char *aOut = aRight;
-
- assert( nDist>0 );
-
- p = aOut;
- fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
- fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
-
- while( p1 && p2 ){
- sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
- if( iDiff==0 ){
- char *pSave = p;
- sqlite3_int64 iPrevSave = iPrev;
- int bFirstOutSave = bFirstOut;
-
- fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
- if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
- p = pSave;
- iPrev = iPrevSave;
- bFirstOut = bFirstOutSave;
- }
- fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
- }else if( iDiff<0 ){
- fts3PoslistCopy(0, &p1);
- fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
- }else{
- fts3PoslistCopy(0, &p2);
- fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
- }
- }
-
- *pnRight = (int)(p - aOut);
-}
-
-/*
-** Argument pList points to a position list nList bytes in size. This
-** function checks to see if the position list contains any entries for
-** a token in position 0 (of any column). If so, it writes argument iDelta
-** to the output buffer pOut, followed by a position list consisting only
-** of the entries from pList at position 0, and terminated by an 0x00 byte.
-** The value returned is the number of bytes written to pOut (if any).
-*/
-int sqlite3Fts3FirstFilter(
- sqlite3_int64 iDelta, /* Varint that may be written to pOut */
- char *pList, /* Position list (no 0x00 term) */
- int nList, /* Size of pList in bytes */
- char *pOut /* Write output here */
-){
- int nOut = 0;
- int bWritten = 0; /* True once iDelta has been written */
- char *p = pList;
- char *pEnd = &pList[nList];
-
- if( *p!=0x01 ){
- if( *p==0x02 ){
- nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
- pOut[nOut++] = 0x02;
- bWritten = 1;
- }
- fts3ColumnlistCopy(0, &p);
- }
-
- while( p<pEnd && *p==0x01 ){
- sqlite3_int64 iCol;
- p++;
- p += sqlite3Fts3GetVarint(p, &iCol);
- if( *p==0x02 ){
- if( bWritten==0 ){
- nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
- bWritten = 1;
- }
- pOut[nOut++] = 0x01;
- nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
- pOut[nOut++] = 0x02;
- }
- fts3ColumnlistCopy(0, &p);
- }
- if( bWritten ){
- pOut[nOut++] = 0x00;
- }
-
- return nOut;
-}
-
-
-/*
-** Merge all doclists in the TermSelect.aaOutput[] array into a single
-** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
-** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
-**
-** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
-** the responsibility of the caller to free any doclists left in the
-** TermSelect.aaOutput[] array.
-*/
-static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
- char *aOut = 0;
- int nOut = 0;
- int i;
-
- /* Loop through the doclists in the aaOutput[] array. Merge them all
- ** into a single doclist.
- */
- for(i=0; i<SizeofArray(pTS->aaOutput); i++){
- if( pTS->aaOutput[i] ){
- if( !aOut ){
- aOut = pTS->aaOutput[i];
- nOut = pTS->anOutput[i];
- pTS->aaOutput[i] = 0;
- }else{
- int nNew;
- char *aNew;
-
- int rc = fts3DoclistOrMerge(p->bDescIdx,
- pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew
- );
- if( rc!=SQLITE_OK ){
- sqlite3_free(aOut);
- return rc;
- }
-
- sqlite3_free(pTS->aaOutput[i]);
- sqlite3_free(aOut);
- pTS->aaOutput[i] = 0;
- aOut = aNew;
- nOut = nNew;
- }
- }
- }
-
- pTS->aaOutput[0] = aOut;
- pTS->anOutput[0] = nOut;
- return SQLITE_OK;
-}
-
-/*
-** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
-** as the first argument. The merge is an "OR" merge (see function
-** fts3DoclistOrMerge() for details).
-**
-** This function is called with the doclist for each term that matches
-** a queried prefix. It merges all these doclists into one, the doclist
-** for the specified prefix. Since there can be a very large number of
-** doclists to merge, the merging is done pair-wise using the TermSelect
-** object.
-**
-** This function returns SQLITE_OK if the merge is successful, or an
-** SQLite error code (SQLITE_NOMEM) if an error occurs.
-*/
-static int fts3TermSelectMerge(
- Fts3Table *p, /* FTS table handle */
- TermSelect *pTS, /* TermSelect object to merge into */
- char *aDoclist, /* Pointer to doclist */
- int nDoclist /* Size of aDoclist in bytes */
-){
- if( pTS->aaOutput[0]==0 ){
- /* If this is the first term selected, copy the doclist to the output
- ** buffer using memcpy(). */
- pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
- pTS->anOutput[0] = nDoclist;
- if( pTS->aaOutput[0] ){
- memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
- }else{
- return SQLITE_NOMEM;
- }
- }else{
- char *aMerge = aDoclist;
- int nMerge = nDoclist;
- int iOut;
-
- for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){
- if( pTS->aaOutput[iOut]==0 ){
- assert( iOut>0 );
- pTS->aaOutput[iOut] = aMerge;
- pTS->anOutput[iOut] = nMerge;
- break;
- }else{
- char *aNew;
- int nNew;
-
- int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge,
- pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew
- );
- if( rc!=SQLITE_OK ){
- if( aMerge!=aDoclist ) sqlite3_free(aMerge);
- return rc;
- }
-
- if( aMerge!=aDoclist ) sqlite3_free(aMerge);
- sqlite3_free(pTS->aaOutput[iOut]);
- pTS->aaOutput[iOut] = 0;
-
- aMerge = aNew;
- nMerge = nNew;
- if( (iOut+1)==SizeofArray(pTS->aaOutput) ){
- pTS->aaOutput[iOut] = aMerge;
- pTS->anOutput[iOut] = nMerge;
- }
- }
- }
- }
- return SQLITE_OK;
-}
-
-/*
-** Append SegReader object pNew to the end of the pCsr->apSegment[] array.
-*/
-static int fts3SegReaderCursorAppend(
- Fts3MultiSegReader *pCsr,
- Fts3SegReader *pNew
-){
- if( (pCsr->nSegment%16)==0 ){
- Fts3SegReader **apNew;
- int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*);
- apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte);
- if( !apNew ){
- sqlite3Fts3SegReaderFree(pNew);
- return SQLITE_NOMEM;
- }
- pCsr->apSegment = apNew;
- }
- pCsr->apSegment[pCsr->nSegment++] = pNew;
- return SQLITE_OK;
-}
-
-/*
-** Add seg-reader objects to the Fts3MultiSegReader object passed as the
-** 8th argument.
-**
-** This function returns SQLITE_OK if successful, or an SQLite error code
-** otherwise.
-*/
-static int fts3SegReaderCursor(
- Fts3Table *p, /* FTS3 table handle */
- int iLangid, /* Language id */
- int iIndex, /* Index to search (from 0 to p->nIndex-1) */
- int iLevel, /* Level of segments to scan */
- const char *zTerm, /* Term to query for */
- int nTerm, /* Size of zTerm in bytes */
- int isPrefix, /* True for a prefix search */
- int isScan, /* True to scan from zTerm to EOF */
- Fts3MultiSegReader *pCsr /* Cursor object to populate */
-){
- int rc = SQLITE_OK; /* Error code */
- sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */
- int rc2; /* Result of sqlite3_reset() */
-
- /* If iLevel is less than 0 and this is not a scan, include a seg-reader
- ** for the pending-terms. If this is a scan, then this call must be being
- ** made by an fts4aux module, not an FTS table. In this case calling
- ** Fts3SegReaderPending might segfault, as the data structures used by
- ** fts4aux are not completely populated. So it's easiest to filter these
- ** calls out here. */
- if( iLevel<0 && p->aIndex ){
- Fts3SegReader *pSeg = 0;
- rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg);
- if( rc==SQLITE_OK && pSeg ){
- rc = fts3SegReaderCursorAppend(pCsr, pSeg);
- }
- }
-
- if( iLevel!=FTS3_SEGCURSOR_PENDING ){
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
- }
-
- while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
- Fts3SegReader *pSeg = 0;
-
- /* Read the values returned by the SELECT into local variables. */
- sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
- sqlite3_int64 iLeavesEndBlock = sqlite3_column_int64(pStmt, 2);
- sqlite3_int64 iEndBlock = sqlite3_column_int64(pStmt, 3);
- int nRoot = sqlite3_column_bytes(pStmt, 4);
- char const *zRoot = sqlite3_column_blob(pStmt, 4);
-
- /* If zTerm is not NULL, and this segment is not stored entirely on its
- ** root node, the range of leaves scanned can be reduced. Do this. */
- if( iStartBlock && zTerm ){
- sqlite3_int64 *pi = (isPrefix ? &iLeavesEndBlock : 0);
- rc = fts3SelectLeaf(p, zTerm, nTerm, zRoot, nRoot, &iStartBlock, pi);
- if( rc!=SQLITE_OK ) goto finished;
- if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock;
- }
-
- rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1,
- (isPrefix==0 && isScan==0),
- iStartBlock, iLeavesEndBlock,
- iEndBlock, zRoot, nRoot, &pSeg
- );
- if( rc!=SQLITE_OK ) goto finished;
- rc = fts3SegReaderCursorAppend(pCsr, pSeg);
- }
- }
-
- finished:
- rc2 = sqlite3_reset(pStmt);
- if( rc==SQLITE_DONE ) rc = rc2;
-
- return rc;
-}
-
-/*
-** Set up a cursor object for iterating through a full-text index or a
-** single level therein.
-*/
-int sqlite3Fts3SegReaderCursor(
- Fts3Table *p, /* FTS3 table handle */
- int iLangid, /* Language-id to search */
- int iIndex, /* Index to search (from 0 to p->nIndex-1) */
- int iLevel, /* Level of segments to scan */
- const char *zTerm, /* Term to query for */
- int nTerm, /* Size of zTerm in bytes */
- int isPrefix, /* True for a prefix search */
- int isScan, /* True to scan from zTerm to EOF */
- Fts3MultiSegReader *pCsr /* Cursor object to populate */
-){
- assert( iIndex>=0 && iIndex<p->nIndex );
- assert( iLevel==FTS3_SEGCURSOR_ALL
- || iLevel==FTS3_SEGCURSOR_PENDING
- || iLevel>=0
- );
- assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
- assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
- assert( isPrefix==0 || isScan==0 );
-
- memset(pCsr, 0, sizeof(Fts3MultiSegReader));
- return fts3SegReaderCursor(
- p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
- );
-}
-
-/*
-** In addition to its current configuration, have the Fts3MultiSegReader
-** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
-**
-** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
-*/
-static int fts3SegReaderCursorAddZero(
- Fts3Table *p, /* FTS virtual table handle */
- int iLangid,
- const char *zTerm, /* Term to scan doclist of */
- int nTerm, /* Number of bytes in zTerm */
- Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
-){
- return fts3SegReaderCursor(p,
- iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
- );
-}
-
-/*
-** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
-** if isPrefix is true, to scan the doclist for all terms for which
-** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
-** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
-** an SQLite error code.
-**
-** It is the responsibility of the caller to free this object by eventually
-** passing it to fts3SegReaderCursorFree()
-**
-** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
-** Output parameter *ppSegcsr is set to 0 if an error occurs.
-*/
-static int fts3TermSegReaderCursor(
- Fts3Cursor *pCsr, /* Virtual table cursor handle */
- const char *zTerm, /* Term to query for */
- int nTerm, /* Size of zTerm in bytes */
- int isPrefix, /* True for a prefix search */
- Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
-){
- Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */
- int rc = SQLITE_NOMEM; /* Return code */
-
- pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
- if( pSegcsr ){
- int i;
- int bFound = 0; /* True once an index has been found */
- Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
-
- if( isPrefix ){
- for(i=1; bFound==0 && i<p->nIndex; i++){
- if( p->aIndex[i].nPrefix==nTerm ){
- bFound = 1;
- rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
- i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
- );
- pSegcsr->bLookup = 1;
- }
- }
-
- for(i=1; bFound==0 && i<p->nIndex; i++){
- if( p->aIndex[i].nPrefix==nTerm+1 ){
- bFound = 1;
- rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
- i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
- );
- if( rc==SQLITE_OK ){
- rc = fts3SegReaderCursorAddZero(
- p, pCsr->iLangid, zTerm, nTerm, pSegcsr
- );
- }
- }
- }
- }
-
- if( bFound==0 ){
- rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
- 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
- );
- pSegcsr->bLookup = !isPrefix;
- }
- }
-
- *ppSegcsr = pSegcsr;
- return rc;
-}
-
-/*
-** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
-*/
-static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
- sqlite3Fts3SegReaderFinish(pSegcsr);
- sqlite3_free(pSegcsr);
-}
-
-/*
-** This function retreives the doclist for the specified term (or term
-** prefix) from the database.
-*/
-static int fts3TermSelect(
- Fts3Table *p, /* Virtual table handle */
- Fts3PhraseToken *pTok, /* Token to query for */
- int iColumn, /* Column to query (or -ve for all columns) */
- int *pnOut, /* OUT: Size of buffer at *ppOut */
- char **ppOut /* OUT: Malloced result buffer */
-){
- int rc; /* Return code */
- Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */
- TermSelect tsc; /* Object for pair-wise doclist merging */
- Fts3SegFilter filter; /* Segment term filter configuration */
-
- pSegcsr = pTok->pSegcsr;
- memset(&tsc, 0, sizeof(TermSelect));
-
- filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
- | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
- | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
- | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
- filter.iCol = iColumn;
- filter.zTerm = pTok->z;
- filter.nTerm = pTok->n;
-
- rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
- while( SQLITE_OK==rc
- && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr))
- ){
- rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
- }
-
- if( rc==SQLITE_OK ){
- rc = fts3TermSelectFinishMerge(p, &tsc);
- }
- if( rc==SQLITE_OK ){
- *ppOut = tsc.aaOutput[0];
- *pnOut = tsc.anOutput[0];
- }else{
- int i;
- for(i=0; i<SizeofArray(tsc.aaOutput); i++){
- sqlite3_free(tsc.aaOutput[i]);
- }
- }
-
- fts3SegReaderCursorFree(pSegcsr);
- pTok->pSegcsr = 0;
- return rc;
-}
-
-/*
-** This function counts the total number of docids in the doclist stored
-** in buffer aList[], size nList bytes.
-**
-** If the isPoslist argument is true, then it is assumed that the doclist
-** contains a position-list following each docid. Otherwise, it is assumed
-** that the doclist is simply a list of docids stored as delta encoded
-** varints.
-*/
-static int fts3DoclistCountDocids(char *aList, int nList){
- int nDoc = 0; /* Return value */
- if( aList ){
- char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */
- char *p = aList; /* Cursor */
- while( p<aEnd ){
- nDoc++;
- while( (*p++)&0x80 ); /* Skip docid varint */
- fts3PoslistCopy(0, &p); /* Skip over position list */
- }
- }
-
- return nDoc;
-}
-
-/*
-** Advance the cursor to the next row in the %_content table that
-** matches the search criteria. For a MATCH search, this will be
-** the next row that matches. For a full-table scan, this will be
-** simply the next row in the %_content table. For a docid lookup,
-** this routine simply sets the EOF flag.
-**
-** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
-** even if we reach end-of-file. The fts3EofMethod() will be called
-** subsequently to determine whether or not an EOF was hit.
-*/
-static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
- int rc;
- Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
- if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){
- if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
- pCsr->isEof = 1;
- rc = sqlite3_reset(pCsr->pStmt);
- }else{
- pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
- rc = SQLITE_OK;
- }
- }else{
- rc = fts3EvalNext((Fts3Cursor *)pCursor);
- }
- assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
- return rc;
-}
-
-/*
-** This is the xFilter interface for the virtual table. See
-** the virtual table xFilter method documentation for additional
-** information.
-**
-** If idxNum==FTS3_FULLSCAN_SEARCH then do a full table scan against
-** the %_content table.
-**
-** If idxNum==FTS3_DOCID_SEARCH then do a docid lookup for a single entry
-** in the %_content table.
-**
-** If idxNum>=FTS3_FULLTEXT_SEARCH then use the full text index. The
-** column on the left-hand side of the MATCH operator is column
-** number idxNum-FTS3_FULLTEXT_SEARCH, 0 indexed. argv[0] is the right-hand
-** side of the MATCH operator.
-*/
-static int fts3FilterMethod(
- sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
- int idxNum, /* Strategy index */
- const char *idxStr, /* Unused */
- int nVal, /* Number of elements in apVal */
- sqlite3_value **apVal /* Arguments for the indexing scheme */
-){
- int rc;
- char *zSql; /* SQL statement used to access %_content */
- Fts3Table *p = (Fts3Table *)pCursor->pVtab;
- Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
-
- UNUSED_PARAMETER(idxStr);
- UNUSED_PARAMETER(nVal);
-
- assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
- assert( nVal==0 || nVal==1 || nVal==2 );
- assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
- assert( p->pSegments==0 );
-
- /* In case the cursor has been used before, clear it now. */
- sqlite3_finalize(pCsr->pStmt);
- sqlite3_free(pCsr->aDoclist);
- sqlite3Fts3ExprFree(pCsr->pExpr);
- memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
-
- if( idxStr ){
- pCsr->bDesc = (idxStr[0]=='D');
- }else{
- pCsr->bDesc = p->bDescIdx;
- }
- pCsr->eSearch = (i16)idxNum;
-
- if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){
- int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
- const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);
-
- if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
- return SQLITE_NOMEM;
- }
-
- pCsr->iLangid = 0;
- if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
-
- rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
- p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
- );
- if( rc!=SQLITE_OK ){
- if( rc==SQLITE_ERROR ){
- static const char *zErr = "malformed MATCH expression: [%s]";
- p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
- }
- return rc;
- }
-
- rc = sqlite3Fts3ReadLock(p);
- if( rc!=SQLITE_OK ) return rc;
-
- rc = fts3EvalStart(pCsr);
-
- sqlite3Fts3SegmentsClose(p);
- if( rc!=SQLITE_OK ) return rc;
- pCsr->pNextId = pCsr->aDoclist;
- pCsr->iPrevId = 0;
- }
-
- /* Compile a SELECT statement for this cursor. For a full-table-scan, the
- ** statement loops through all rows of the %_content table. For a
- ** full-text query or docid lookup, the statement retrieves a single
- ** row by docid.
- */
- if( idxNum==FTS3_FULLSCAN_SEARCH ){
- zSql = sqlite3_mprintf(
- "SELECT %s ORDER BY rowid %s",
- p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
- );
- if( zSql ){
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
- sqlite3_free(zSql);
- }else{
- rc = SQLITE_NOMEM;
- }
- }else if( idxNum==FTS3_DOCID_SEARCH ){
- rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
- if( rc==SQLITE_OK ){
- rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
- }
- }
- if( rc!=SQLITE_OK ) return rc;
-
- return fts3NextMethod(pCursor);
-}
-
-/*
-** This is the xEof method of the virtual table. SQLite calls this
-** routine to find out if it has reached the end of a result set.
-*/
-static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){
- return ((Fts3Cursor *)pCursor)->isEof;
-}
-
-/*
-** This is the xRowid method. The SQLite core calls this routine to
-** retrieve the rowid for the current row of the result set. fts3
-** exposes %_content.docid as the rowid for the virtual table. The
-** rowid should be written to *pRowid.
-*/
-static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
- Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
- *pRowid = pCsr->iPrevId;
- return SQLITE_OK;
-}
-
-/*
-** This is the xColumn method, called by SQLite to request a value from
-** the row that the supplied cursor currently points to.
-**
-** If:
-**
-** (iCol < p->nColumn) -> The value of the iCol'th user column.
-** (iCol == p->nColumn) -> Magic column with the same name as the table.
-** (iCol == p->nColumn+1) -> Docid column
-** (iCol == p->nColumn+2) -> Langid column
-*/
-static int fts3ColumnMethod(
- sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
- int iCol /* Index of column to read value from */
-){
- int rc = SQLITE_OK; /* Return Code */
- Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
- Fts3Table *p = (Fts3Table *)pCursor->pVtab;
-
- /* The column value supplied by SQLite must be in range. */
- assert( iCol>=0 && iCol<=p->nColumn+2 );
-
- if( iCol==p->nColumn+1 ){
- /* This call is a request for the "docid" column. Since "docid" is an
- ** alias for "rowid", use the xRowid() method to obtain the value.
- */
- sqlite3_result_int64(pCtx, pCsr->iPrevId);
- }else if( iCol==p->nColumn ){
- /* The extra column whose name is the same as the table.
- ** Return a blob which is a pointer to the cursor. */
- sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
- }else if( iCol==p->nColumn+2 && pCsr->pExpr ){
- sqlite3_result_int64(pCtx, pCsr->iLangid);
- }else{
- /* The requested column is either a user column (one that contains
- ** indexed data), or the language-id column. */
- rc = fts3CursorSeek(0, pCsr);
-
- if( rc==SQLITE_OK ){
- if( iCol==p->nColumn+2 ){
- int iLangid = 0;
- if( p->zLanguageid ){
- iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
- }
- sqlite3_result_int(pCtx, iLangid);
- }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
- sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
- }
- }
- }
-
- assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
- return rc;
-}
-
-/*
-** This function is the implementation of the xUpdate callback used by
-** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
-** inserted, updated or deleted.
-*/
-static int fts3UpdateMethod(
- sqlite3_vtab *pVtab, /* Virtual table handle */
- int nArg, /* Size of argument array */
- sqlite3_value **apVal, /* Array of arguments */
- sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
-){
- return sqlite3Fts3UpdateMethod(pVtab, nArg, apVal, pRowid);
-}
-
-/*
-** Implementation of xSync() method. Flush the contents of the pending-terms
-** hash-table to the database.
-*/
-static int fts3SyncMethod(sqlite3_vtab *pVtab){
-
- /* Following an incremental-merge operation, assuming that the input
- ** segments are not completely consumed (the usual case), they are updated
- ** in place to remove the entries that have already been merged. This
- ** involves updating the leaf block that contains the smallest unmerged
- ** entry and each block (if any) between the leaf and the root node. So
- ** if the height of the input segment b-trees is N, and input segments
- ** are merged eight at a time, updating the input segments at the end
- ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually
- ** small - often between 0 and 2. So the overhead of the incremental
- ** merge is somewhere between 8 and 24 blocks. To avoid this overhead
- ** dwarfing the actual productive work accomplished, the incremental merge
- ** is only attempted if it will write at least 64 leaf blocks. Hence
- ** nMinMerge.
- **
- ** Of course, updating the input segments also involves deleting a bunch
- ** of blocks from the segments table. But this is not considered overhead
- ** as it would also be required by a crisis-merge that used the same input
- ** segments.
- */
- const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */
-
- Fts3Table *p = (Fts3Table*)pVtab;
- int rc = sqlite3Fts3PendingTermsFlush(p);
-
- if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){
- int mxLevel = 0; /* Maximum relative level value in db */
- int A; /* Incr-merge parameter A */
-
- rc = sqlite3Fts3MaxLevel(p, &mxLevel);
- assert( rc==SQLITE_OK || mxLevel==0 );
- A = p->nLeafAdd * mxLevel;
- A += (A/2);
- if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8);
- }
- sqlite3Fts3SegmentsClose(p);
- return rc;
-}
-
-/*
-** Implementation of xBegin() method. This is a no-op.
-*/
-static int fts3BeginMethod(sqlite3_vtab *pVtab){
- Fts3Table *p = (Fts3Table*)pVtab;
- UNUSED_PARAMETER(pVtab);
- assert( p->pSegments==0 );
- assert( p->nPendingData==0 );
- assert( p->inTransaction!=1 );
- TESTONLY( p->inTransaction = 1 );
- TESTONLY( p->mxSavepoint = -1; );
- p->nLeafAdd = 0;
- return SQLITE_OK;
-}
-
-/*
-** Implementation of xCommit() method. This is a no-op. The contents of
-** the pending-terms hash-table have already been flushed into the database
-** by fts3SyncMethod().
-*/
-static int fts3CommitMethod(sqlite3_vtab *pVtab){
- TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
- UNUSED_PARAMETER(pVtab);
- assert( p->nPendingData==0 );
- assert( p->inTransaction!=0 );
- assert( p->pSegments==0 );
- TESTONLY( p->inTransaction = 0 );
- TESTONLY( p->mxSavepoint = -1; );
- return SQLITE_OK;
-}
-
-/*
-** Implementation of xRollback(). Discard the contents of the pending-terms
-** hash-table. Any changes made to the database are reverted by SQLite.
-*/
-static int fts3RollbackMethod(sqlite3_vtab *pVtab){
- Fts3Table *p = (Fts3Table*)pVtab;
- sqlite3Fts3PendingTermsClear(p);
- assert( p->inTransaction!=0 );
- TESTONLY( p->inTransaction = 0 );
- TESTONLY( p->mxSavepoint = -1; );
- return SQLITE_OK;
-}
-
-/*
-** When called, *ppPoslist must point to the byte immediately following the
-** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function
-** moves *ppPoslist so that it instead points to the first byte of the
-** same position list.
-*/
-static void fts3ReversePoslist(char *pStart, char **ppPoslist){
- char *p = &(*ppPoslist)[-2];
- char c = 0;
-
- while( p>pStart && (c=*p--)==0 );
- while( p>pStart && (*p & 0x80) | c ){
- c = *p--;
- }
- if( p>pStart ){ p = &p[2]; }
- while( *p++&0x80 );
- *ppPoslist = p;
-}
-
-/*
-** Helper function used by the implementation of the overloaded snippet(),
-** offsets() and optimize() SQL functions.
-**
-** If the value passed as the third argument is a blob of size
-** sizeof(Fts3Cursor*), then the blob contents are copied to the
-** output variable *ppCsr and SQLITE_OK is returned. Otherwise, an error
-** message is written to context pContext and SQLITE_ERROR returned. The
-** string passed via zFunc is used as part of the error message.
-*/
-static int fts3FunctionArg(
- sqlite3_context *pContext, /* SQL function call context */
- const char *zFunc, /* Function name */
- sqlite3_value *pVal, /* argv[0] passed to function */
- Fts3Cursor **ppCsr /* OUT: Store cursor handle here */
-){
- Fts3Cursor *pRet;
- if( sqlite3_value_type(pVal)!=SQLITE_BLOB
- || sqlite3_value_bytes(pVal)!=sizeof(Fts3Cursor *)
- ){
- char *zErr = sqlite3_mprintf("illegal first argument to %s", zFunc);
- sqlite3_result_error(pContext, zErr, -1);
- sqlite3_free(zErr);
- return SQLITE_ERROR;
- }
- memcpy(&pRet, sqlite3_value_blob(pVal), sizeof(Fts3Cursor *));
- *ppCsr = pRet;
- return SQLITE_OK;
-}
-
-/*
-** Implementation of the snippet() function for FTS3
-*/
-static void fts3SnippetFunc(
- sqlite3_context *pContext, /* SQLite function call context */
- int nVal, /* Size of apVal[] array */
- sqlite3_value **apVal /* Array of arguments */
-){
- Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
- const char *zStart = "<b>";
- const char *zEnd = "</b>";
- const char *zEllipsis = "<b>...</b>";
- int iCol = -1;
- int nToken = 15; /* Default number of tokens in snippet */
-
- /* There must be at least one argument passed to this function (otherwise
- ** the non-overloaded version would have been called instead of this one).
- */
- assert( nVal>=1 );
-
- if( nVal>6 ){
- sqlite3_result_error(pContext,
- "wrong number of arguments to function snippet()", -1);
- return;
- }
- if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;
-
- switch( nVal ){
- case 6: nToken = sqlite3_value_int(apVal[5]);
- case 5: iCol = sqlite3_value_int(apVal[4]);
- case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
- case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
- case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
- }
- if( !zEllipsis || !zEnd || !zStart ){
- sqlite3_result_error_nomem(pContext);
- }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
- sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
- }
-}
-
-/*
-** Implementation of the offsets() function for FTS3
-*/
-static void fts3OffsetsFunc(
- sqlite3_context *pContext, /* SQLite function call context */
- int nVal, /* Size of argument array */
- sqlite3_value **apVal /* Array of arguments */
-){
- Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
-
- UNUSED_PARAMETER(nVal);
-
- assert( nVal==1 );
- if( fts3FunctionArg(pContext, "offsets", apVal[0], &pCsr) ) return;
- assert( pCsr );
- if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
- sqlite3Fts3Offsets(pContext, pCsr);
- }
-}
-
-/*
-** Implementation of the special optimize() function for FTS3. This
-** function merges all segments in the database to a single segment.
-** Example usage is:
-**
-** SELECT optimize(t) FROM t LIMIT 1;
-**
-** where 't' is the name of an FTS3 table.
-*/
-static void fts3OptimizeFunc(
- sqlite3_context *pContext, /* SQLite function call context */
- int nVal, /* Size of argument array */
- sqlite3_value **apVal /* Array of arguments */
-){
- int rc; /* Return code */
- Fts3Table *p; /* Virtual table handle */
- Fts3Cursor *pCursor; /* Cursor handle passed through apVal[0] */
-
- UNUSED_PARAMETER(nVal);
-
- assert( nVal==1 );
- if( fts3FunctionArg(pContext, "optimize", apVal[0], &pCursor) ) return;
- p = (Fts3Table *)pCursor->base.pVtab;
- assert( p );
-
- rc = sqlite3Fts3Optimize(p);
-
- switch( rc ){
- case SQLITE_OK:
- sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC);
- break;
- case SQLITE_DONE:
- sqlite3_result_text(pContext, "Index already optimal", -1, SQLITE_STATIC);
- break;
- default:
- sqlite3_result_error_code(pContext, rc);
- break;
- }
-}
-
-/*
-** Implementation of the matchinfo() function for FTS3
-*/
-static void fts3MatchinfoFunc(
- sqlite3_context *pContext, /* SQLite function call context */
- int nVal, /* Size of argument array */
- sqlite3_value **apVal /* Array of arguments */
-){
- Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
- assert( nVal==1 || nVal==2 );
- if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
- const char *zArg = 0;
- if( nVal>1 ){
- zArg = (const char *)sqlite3_value_text(apVal[1]);
- }
- sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
- }
-}
-
-/*
-** This routine implements the xFindFunction method for the FTS3
-** virtual table.
-*/
-static int fts3FindFunctionMethod(
- sqlite3_vtab *pVtab, /* Virtual table handle */
- int nArg, /* Number of SQL function arguments */
- const char *zName, /* Name of SQL function */
- void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
- void **ppArg /* Unused */
-){
- struct Overloaded {
- const char *zName;
- void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
- } aOverload[] = {
- { "snippet", fts3SnippetFunc },
- { "offsets", fts3OffsetsFunc },
- { "optimize", fts3OptimizeFunc },
- { "matchinfo", fts3MatchinfoFunc },
- };
- int i; /* Iterator variable */
-
- UNUSED_PARAMETER(pVtab);
- UNUSED_PARAMETER(nArg);
- UNUSED_PARAMETER(ppArg);
-
- for(i=0; i<SizeofArray(aOverload); i++){
- if( strcmp(zName, aOverload[i].zName)==0 ){
- *pxFunc = aOverload[i].xFunc;
- return 1;
- }
- }
-
- /* No function of the specified name was found. Return 0. */
- return 0;
-}
-
-/*
-** Implementation of FTS3 xRename method. Rename an fts3 table.
-*/
-static int fts3RenameMethod(
- sqlite3_vtab *pVtab, /* Virtual table handle */
- const char *zName /* New name of table */
-){
- Fts3Table *p = (Fts3Table *)pVtab;
- sqlite3 *db = p->db; /* Database connection */
- int rc; /* Return Code */
-
- /* As it happens, the pending terms table is always empty here. This is
- ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction
- ** always opens a savepoint transaction. And the xSavepoint() method
- ** flushes the pending terms table. But leave the (no-op) call to
- ** PendingTermsFlush() in in case that changes.
- */
- assert( p->nPendingData==0 );
- rc = sqlite3Fts3PendingTermsFlush(p);
-
- if( p->zContentTbl==0 ){
- fts3DbExec(&rc, db,
- "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';",
- p->zDb, p->zName, zName
- );
- }
-
- if( p->bHasDocsize ){
- fts3DbExec(&rc, db,
- "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';",
- p->zDb, p->zName, zName
- );
- }
- if( p->bHasStat ){
- fts3DbExec(&rc, db,
- "ALTER TABLE %Q.'%q_stat' RENAME TO '%q_stat';",
- p->zDb, p->zName, zName
- );
- }
- fts3DbExec(&rc, db,
- "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';",
- p->zDb, p->zName, zName
- );
- fts3DbExec(&rc, db,
- "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';",
- p->zDb, p->zName, zName
- );
- return rc;
-}
-
-/*
-** The xSavepoint() method.
-**
-** Flush the contents of the pending-terms table to disk.
-*/
-static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
- int rc = SQLITE_OK;
- UNUSED_PARAMETER(iSavepoint);
- assert( ((Fts3Table *)pVtab)->inTransaction );
- assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
- TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
- if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){
- rc = fts3SyncMethod(pVtab);
- }
- return rc;
-}
-
-/*
-** The xRelease() method.
-**
-** This is a no-op.
-*/
-static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
- TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
- UNUSED_PARAMETER(iSavepoint);
- UNUSED_PARAMETER(pVtab);
- assert( p->inTransaction );
- assert( p->mxSavepoint >= iSavepoint );
- TESTONLY( p->mxSavepoint = iSavepoint-1 );
- return SQLITE_OK;
-}
-
-/*
-** The xRollbackTo() method.
-**
-** Discard the contents of the pending terms table.
-*/
-static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
- Fts3Table *p = (Fts3Table*)pVtab;
- UNUSED_PARAMETER(iSavepoint);
- assert( p->inTransaction );
- assert( p->mxSavepoint >= iSavepoint );
- TESTONLY( p->mxSavepoint = iSavepoint );
- sqlite3Fts3PendingTermsClear(p);
- return SQLITE_OK;
-}
-
-static const sqlite3_module fts3Module = {
- /* iVersion */ 2,
- /* xCreate */ fts3CreateMethod,
- /* xConnect */ fts3ConnectMethod,
- /* xBestIndex */ fts3BestIndexMethod,
- /* xDisconnect */ fts3DisconnectMethod,
- /* xDestroy */ fts3DestroyMethod,
- /* xOpen */ fts3OpenMethod,
- /* xClose */ fts3CloseMethod,
- /* xFilter */ fts3FilterMethod,
- /* xNext */ fts3NextMethod,
- /* xEof */ fts3EofMethod,
- /* xColumn */ fts3ColumnMethod,
- /* xRowid */ fts3RowidMethod,
- /* xUpdate */ fts3UpdateMethod,
- /* xBegin */ fts3BeginMethod,
- /* xSync */ fts3SyncMethod,
- /* xCommit */ fts3CommitMethod,
- /* xRollback */ fts3RollbackMethod,
- /* xFindFunction */ fts3FindFunctionMethod,
- /* xRename */ fts3RenameMethod,
- /* xSavepoint */ fts3SavepointMethod,
- /* xRelease */ fts3ReleaseMethod,
- /* xRollbackTo */ fts3RollbackToMethod,
-};
-
-/*
-** This function is registered as the module destructor (called when an
-** FTS3 enabled database connection is closed). It frees the memory
-** allocated for the tokenizer hash table.
-*/
-static void hashDestroy(void *p){
- Fts3Hash *pHash = (Fts3Hash *)p;
- sqlite3Fts3HashClear(pHash);
- sqlite3_free(pHash);
-}
-
-/*
-** The fts3 built-in tokenizers - "simple", "porter" and "icu"- are
-** implemented in files fts3_tokenizer1.c, fts3_porter.c and fts3_icu.c
-** respectively. The following three forward declarations are for functions
-** declared in these files used to retrieve the respective implementations.
-**
-** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
-** to by the argument to point to the "simple" tokenizer implementation.
-** And so on.
-*/
-void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
-void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
-void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
-#endif
-#ifdef SQLITE_ENABLE_ICU
-void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
-#endif
-
-/*
-** Initialise the fts3 extension. If this extension is built as part
-** of the sqlite library, then this function is called directly by
-** SQLite. If fts3 is built as a dynamically loadable extension, this
-** function is called by the sqlite3_extension_init() entry point.
-*/
-int sqlite3Fts3Init(sqlite3 *db){
- int rc = SQLITE_OK;
- Fts3Hash *pHash = 0;
- const sqlite3_tokenizer_module *pSimple = 0;
- const sqlite3_tokenizer_module *pPorter = 0;
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
- const sqlite3_tokenizer_module *pUnicode = 0;
-#endif
-
-#ifdef SQLITE_ENABLE_ICU
- const sqlite3_tokenizer_module *pIcu = 0;
- sqlite3Fts3IcuTokenizerModule(&pIcu);
-#endif
-
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
- sqlite3Fts3UnicodeTokenizer(&pUnicode);
-#endif
-
-#ifdef SQLITE_TEST
- rc = sqlite3Fts3InitTerm(db);
- if( rc!=SQLITE_OK ) return rc;
-#endif
-
- rc = sqlite3Fts3InitAux(db);
- if( rc!=SQLITE_OK ) return rc;
-
- sqlite3Fts3SimpleTokenizerModule(&pSimple);
- sqlite3Fts3PorterTokenizerModule(&pPorter);
-
- /* Allocate and initialise the hash-table used to store tokenizers. */
- pHash = sqlite3_malloc(sizeof(Fts3Hash));
- if( !pHash ){
- rc = SQLITE_NOMEM;
- }else{
- sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
- }
-
- /* Load the built-in tokenizers into the hash table */
- if( rc==SQLITE_OK ){
- if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
- || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)
-
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
- || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode)
-#endif
-#ifdef SQLITE_ENABLE_ICU
- || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
-#endif
- ){
- rc = SQLITE_NOMEM;
- }
- }
-
-#ifdef SQLITE_TEST
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3ExprInitTestInterface(db);
- }
-#endif
-
- /* Create the virtual table wrapper around the hash-table and overload
- ** the two scalar functions. If this is successful, register the
- ** module with sqlite.
- */
- if( SQLITE_OK==rc
- && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
- && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
- && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
- && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 1))
- && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", 2))
- && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
- ){
- rc = sqlite3_create_module_v2(
- db, "fts3", &fts3Module, (void *)pHash, hashDestroy
- );
- if( rc==SQLITE_OK ){
- rc = sqlite3_create_module_v2(
- db, "fts4", &fts3Module, (void *)pHash, 0
- );
- }
- return rc;
- }
-
- /* An error has occurred. Delete the hash table and return the error code. */
- assert( rc!=SQLITE_OK );
- if( pHash ){
- sqlite3Fts3HashClear(pHash);
- sqlite3_free(pHash);
- }
- return rc;
-}
-
-/*
-** Allocate an Fts3MultiSegReader for each token in the expression headed
-** by pExpr.
-**
-** An Fts3SegReader object is a cursor that can seek or scan a range of
-** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
-** Fts3SegReader objects internally to provide an interface to seek or scan
-** within the union of all segments of a b-tree. Hence the name.
-**
-** If the allocated Fts3MultiSegReader just seeks to a single entry in a
-** segment b-tree (if the term is not a prefix or it is a prefix for which
-** there exists prefix b-tree of the right length) then it may be traversed
-** and merged incrementally. Otherwise, it has to be merged into an in-memory
-** doclist and then traversed.
-*/
-static void fts3EvalAllocateReaders(
- Fts3Cursor *pCsr, /* FTS cursor handle */
- Fts3Expr *pExpr, /* Allocate readers for this expression */
- int *pnToken, /* OUT: Total number of tokens in phrase. */
- int *pnOr, /* OUT: Total number of OR nodes in expr. */
- int *pRc /* IN/OUT: Error code */
-){
- if( pExpr && SQLITE_OK==*pRc ){
- if( pExpr->eType==FTSQUERY_PHRASE ){
- int i;
- int nToken = pExpr->pPhrase->nToken;
- *pnToken += nToken;
- for(i=0; i<nToken; i++){
- Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
- int rc = fts3TermSegReaderCursor(pCsr,
- pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
- );
- if( rc!=SQLITE_OK ){
- *pRc = rc;
- return;
- }
- }
- assert( pExpr->pPhrase->iDoclistToken==0 );
- pExpr->pPhrase->iDoclistToken = -1;
- }else{
- *pnOr += (pExpr->eType==FTSQUERY_OR);
- fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
- fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
- }
- }
-}
-
-/*
-** Arguments pList/nList contain the doclist for token iToken of phrase p.
-** It is merged into the main doclist stored in p->doclist.aAll/nAll.
-**
-** This function assumes that pList points to a buffer allocated using
-** sqlite3_malloc(). This function takes responsibility for eventually
-** freeing the buffer.
-*/
-static void fts3EvalPhraseMergeToken(
- Fts3Table *pTab, /* FTS Table pointer */
- Fts3Phrase *p, /* Phrase to merge pList/nList into */
- int iToken, /* Token pList/nList corresponds to */
- char *pList, /* Pointer to doclist */
- int nList /* Number of bytes in pList */
-){
- assert( iToken!=p->iDoclistToken );
-
- if( pList==0 ){
- sqlite3_free(p->doclist.aAll);
- p->doclist.aAll = 0;
- p->doclist.nAll = 0;
- }
-
- else if( p->iDoclistToken<0 ){
- p->doclist.aAll = pList;
- p->doclist.nAll = nList;
- }
-
- else if( p->doclist.aAll==0 ){
- sqlite3_free(pList);
- }
-
- else {
- char *pLeft;
- char *pRight;
- int nLeft;
- int nRight;
- int nDiff;
-
- if( p->iDoclistToken<iToken ){
- pLeft = p->doclist.aAll;
- nLeft = p->doclist.nAll;
- pRight = pList;
- nRight = nList;
- nDiff = iToken - p->iDoclistToken;
- }else{
- pRight = p->doclist.aAll;
- nRight = p->doclist.nAll;
- pLeft = pList;
- nLeft = nList;
- nDiff = p->iDoclistToken - iToken;
- }
-
- fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
- sqlite3_free(pLeft);
- p->doclist.aAll = pRight;
- p->doclist.nAll = nRight;
- }
-
- if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
-}
-
-/*
-** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
-** does not take deferred tokens into account.
-**
-** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
-*/
-static int fts3EvalPhraseLoad(
- Fts3Cursor *pCsr, /* FTS Cursor handle */
- Fts3Phrase *p /* Phrase object */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int iToken;
- int rc = SQLITE_OK;
-
- for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
- Fts3PhraseToken *pToken = &p->aToken[iToken];
- assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
-
- if( pToken->pSegcsr ){
- int nThis = 0;
- char *pThis = 0;
- rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
- if( rc==SQLITE_OK ){
- fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
- }
- }
- assert( pToken->pSegcsr==0 );
- }
-
- return rc;
-}
-
-/*
-** This function is called on each phrase after the position lists for
-** any deferred tokens have been loaded into memory. It updates the phrases
-** current position list to include only those positions that are really
-** instances of the phrase (after considering deferred tokens). If this
-** means that the phrase does not appear in the current row, doclist.pList
-** and doclist.nList are both zeroed.
-**
-** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
-*/
-static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
- int iToken; /* Used to iterate through phrase tokens */
- char *aPoslist = 0; /* Position list for deferred tokens */
- int nPoslist = 0; /* Number of bytes in aPoslist */
- int iPrev = -1; /* Token number of previous deferred token */
-
- assert( pPhrase->doclist.bFreeList==0 );
-
- for(iToken=0; iToken<pPhrase->nToken; iToken++){
- Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
- Fts3DeferredToken *pDeferred = pToken->pDeferred;
-
- if( pDeferred ){
- char *pList;
- int nList;
- int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
- if( rc!=SQLITE_OK ) return rc;
-
- if( pList==0 ){
- sqlite3_free(aPoslist);
- pPhrase->doclist.pList = 0;
- pPhrase->doclist.nList = 0;
- return SQLITE_OK;
-
- }else if( aPoslist==0 ){
- aPoslist = pList;
- nPoslist = nList;
-
- }else{
- char *aOut = pList;
- char *p1 = aPoslist;
- char *p2 = aOut;
-
- assert( iPrev>=0 );
- fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2);
- sqlite3_free(aPoslist);
- aPoslist = pList;
- nPoslist = (int)(aOut - aPoslist);
- if( nPoslist==0 ){
- sqlite3_free(aPoslist);
- pPhrase->doclist.pList = 0;
- pPhrase->doclist.nList = 0;
- return SQLITE_OK;
- }
- }
- iPrev = iToken;
- }
- }
-
- if( iPrev>=0 ){
- int nMaxUndeferred = pPhrase->iDoclistToken;
- if( nMaxUndeferred<0 ){
- pPhrase->doclist.pList = aPoslist;
- pPhrase->doclist.nList = nPoslist;
- pPhrase->doclist.iDocid = pCsr->iPrevId;
- pPhrase->doclist.bFreeList = 1;
- }else{
- int nDistance;
- char *p1;
- char *p2;
- char *aOut;
-
- if( nMaxUndeferred>iPrev ){
- p1 = aPoslist;
- p2 = pPhrase->doclist.pList;
- nDistance = nMaxUndeferred - iPrev;
- }else{
- p1 = pPhrase->doclist.pList;
- p2 = aPoslist;
- nDistance = iPrev - nMaxUndeferred;
- }
-
- aOut = (char *)sqlite3_malloc(nPoslist+8);
- if( !aOut ){
- sqlite3_free(aPoslist);
- return SQLITE_NOMEM;
- }
-
- pPhrase->doclist.pList = aOut;
- if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){
- pPhrase->doclist.bFreeList = 1;
- pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList);
- }else{
- sqlite3_free(aOut);
- pPhrase->doclist.pList = 0;
- pPhrase->doclist.nList = 0;
- }
- sqlite3_free(aPoslist);
- }
- }
-
- return SQLITE_OK;
-}
-
-/*
-** This function is called for each Fts3Phrase in a full-text query
-** expression to initialize the mechanism for returning rows. Once this
-** function has been called successfully on an Fts3Phrase, it may be
-** used with fts3EvalPhraseNext() to iterate through the matching docids.
-**
-** If parameter bOptOk is true, then the phrase may (or may not) use the
-** incremental loading strategy. Otherwise, the entire doclist is loaded into
-** memory within this call.
-**
-** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
-*/
-static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
- int rc; /* Error code */
- Fts3PhraseToken *pFirst = &p->aToken[0];
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
-
- if( pCsr->bDesc==pTab->bDescIdx
- && bOptOk==1
- && p->nToken==1
- && pFirst->pSegcsr
- && pFirst->pSegcsr->bLookup
- && pFirst->bFirst==0
- ){
- /* Use the incremental approach. */
- int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
- rc = sqlite3Fts3MsrIncrStart(
- pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
- p->bIncr = 1;
-
- }else{
- /* Load the full doclist for the phrase into memory. */
- rc = fts3EvalPhraseLoad(pCsr, p);
- p->bIncr = 0;
- }
-
- assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
- return rc;
-}
-
-/*
-** This function is used to iterate backwards (from the end to start)
-** through doclists. It is used by this module to iterate through phrase
-** doclists in reverse and by the fts3_write.c module to iterate through
-** pending-terms lists when writing to databases with "order=desc".
-**
-** The doclist may be sorted in ascending (parameter bDescIdx==0) or
-** descending (parameter bDescIdx==1) order of docid. Regardless, this
-** function iterates from the end of the doclist to the beginning.
-*/
-void sqlite3Fts3DoclistPrev(
- int bDescIdx, /* True if the doclist is desc */
- char *aDoclist, /* Pointer to entire doclist */
- int nDoclist, /* Length of aDoclist in bytes */
- char **ppIter, /* IN/OUT: Iterator pointer */
- sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
- int *pnList, /* OUT: List length pointer */
- u8 *pbEof /* OUT: End-of-file flag */
-){
- char *p = *ppIter;
-
- assert( nDoclist>0 );
- assert( *pbEof==0 );
- assert( p || *piDocid==0 );
- assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );
-
- if( p==0 ){
- sqlite3_int64 iDocid = 0;
- char *pNext = 0;
- char *pDocid = aDoclist;
- char *pEnd = &aDoclist[nDoclist];
- int iMul = 1;
-
- while( pDocid<pEnd ){
- sqlite3_int64 iDelta;
- pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
- iDocid += (iMul * iDelta);
- pNext = pDocid;
- fts3PoslistCopy(0, &pDocid);
- while( pDocid<pEnd && *pDocid==0 ) pDocid++;
- iMul = (bDescIdx ? -1 : 1);
- }
-
- *pnList = (int)(pEnd - pNext);
- *ppIter = pNext;
- *piDocid = iDocid;
- }else{
- int iMul = (bDescIdx ? -1 : 1);
- sqlite3_int64 iDelta;
- fts3GetReverseVarint(&p, aDoclist, &iDelta);
- *piDocid -= (iMul * iDelta);
-
- if( p==aDoclist ){
- *pbEof = 1;
- }else{
- char *pSave = p;
- fts3ReversePoslist(aDoclist, &p);
- *pnList = (int)(pSave - p);
- }
- *ppIter = p;
- }
-}
-
-/*
-** Iterate forwards through a doclist.
-*/
-void sqlite3Fts3DoclistNext(
- int bDescIdx, /* True if the doclist is desc */
- char *aDoclist, /* Pointer to entire doclist */
- int nDoclist, /* Length of aDoclist in bytes */
- char **ppIter, /* IN/OUT: Iterator pointer */
- sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
- u8 *pbEof /* OUT: End-of-file flag */
-){
- char *p = *ppIter;
-
- assert( nDoclist>0 );
- assert( *pbEof==0 );
- assert( p || *piDocid==0 );
- assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) );
-
- if( p==0 ){
- p = aDoclist;
- p += sqlite3Fts3GetVarint(p, piDocid);
- }else{
- fts3PoslistCopy(0, &p);
- if( p>=&aDoclist[nDoclist] ){
- *pbEof = 1;
- }else{
- sqlite3_int64 iVar;
- p += sqlite3Fts3GetVarint(p, &iVar);
- *piDocid += ((bDescIdx ? -1 : 1) * iVar);
- }
- }
-
- *ppIter = p;
-}
-
-/*
-** Attempt to move the phrase iterator to point to the next matching docid.
-** If an error occurs, return an SQLite error code. Otherwise, return
-** SQLITE_OK.
-**
-** If there is no "next" entry and no error occurs, then *pbEof is set to
-** 1 before returning. Otherwise, if no error occurs and the iterator is
-** successfully advanced, *pbEof is set to 0.
-*/
-static int fts3EvalPhraseNext(
- Fts3Cursor *pCsr, /* FTS Cursor handle */
- Fts3Phrase *p, /* Phrase object to advance to next docid */
- u8 *pbEof /* OUT: Set to 1 if EOF */
-){
- int rc = SQLITE_OK;
- Fts3Doclist *pDL = &p->doclist;
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
-
- if( p->bIncr ){
- assert( p->nToken==1 );
- assert( pDL->pNextDocid==0 );
- rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr,
- &pDL->iDocid, &pDL->pList, &pDL->nList
- );
- if( rc==SQLITE_OK && !pDL->pList ){
- *pbEof = 1;
- }
- }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
- sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll,
- &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
- );
- pDL->pList = pDL->pNextDocid;
- }else{
- char *pIter; /* Used to iterate through aAll */
- char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
- if( pDL->pNextDocid ){
- pIter = pDL->pNextDocid;
- }else{
- pIter = pDL->aAll;
- }
-
- if( pIter>=pEnd ){
- /* We have already reached the end of this doclist. EOF. */
- *pbEof = 1;
- }else{
- sqlite3_int64 iDelta;
- pIter += sqlite3Fts3GetVarint(pIter, &iDelta);
- if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){
- pDL->iDocid += iDelta;
- }else{
- pDL->iDocid -= iDelta;
- }
- pDL->pList = pIter;
- fts3PoslistCopy(0, &pIter);
- pDL->nList = (int)(pIter - pDL->pList);
-
- /* pIter now points just past the 0x00 that terminates the position-
- ** list for document pDL->iDocid. However, if this position-list was
- ** edited in place by fts3EvalNearTrim(), then pIter may not actually
- ** point to the start of the next docid value. The following line deals
- ** with this case by advancing pIter past the zero-padding added by
- ** fts3EvalNearTrim(). */
- while( pIter<pEnd && *pIter==0 ) pIter++;
-
- pDL->pNextDocid = pIter;
- assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
- *pbEof = 0;
- }
- }
-
- return rc;
-}
-
-/*
-**
-** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
-** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
-** expression. Also the Fts3Expr.bDeferred variable is set to true for any
-** expressions for which all descendent tokens are deferred.
-**
-** If parameter bOptOk is zero, then it is guaranteed that the
-** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
-** each phrase in the expression (subject to deferred token processing).
-** Or, if bOptOk is non-zero, then one or more tokens within the expression
-** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
-**
-** If an error occurs within this function, *pRc is set to an SQLite error
-** code before returning.
-*/
-static void fts3EvalStartReaders(
- Fts3Cursor *pCsr, /* FTS Cursor handle */
- Fts3Expr *pExpr, /* Expression to initialize phrases in */
- int bOptOk, /* True to enable incremental loading */
- int *pRc /* IN/OUT: Error code */
-){
- if( pExpr && SQLITE_OK==*pRc ){
- if( pExpr->eType==FTSQUERY_PHRASE ){
- int i;
- int nToken = pExpr->pPhrase->nToken;
- for(i=0; i<nToken; i++){
- if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
- }
- pExpr->bDeferred = (i==nToken);
- *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
- }else{
- fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
- fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
- pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
- }
- }
-}
-
-/*
-** An array of the following structures is assembled as part of the process
-** of selecting tokens to defer before the query starts executing (as part
-** of the xFilter() method). There is one element in the array for each
-** token in the FTS expression.
-**
-** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
-** to phrases that are connected only by AND and NEAR operators (not OR or
-** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
-** separately. The root of a tokens AND/NEAR cluster is stored in
-** Fts3TokenAndCost.pRoot.
-*/
-typedef struct Fts3TokenAndCost Fts3TokenAndCost;
-struct Fts3TokenAndCost {
- Fts3Phrase *pPhrase; /* The phrase the token belongs to */
- int iToken; /* Position of token in phrase */
- Fts3PhraseToken *pToken; /* The token itself */
- Fts3Expr *pRoot; /* Root of NEAR/AND cluster */
- int nOvfl; /* Number of overflow pages to load doclist */
- int iCol; /* The column the token must match */
-};
-
-/*
-** This function is used to populate an allocated Fts3TokenAndCost array.
-**
-** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
-** Otherwise, if an error occurs during execution, *pRc is set to an
-** SQLite error code.
-*/
-static void fts3EvalTokenCosts(
- Fts3Cursor *pCsr, /* FTS Cursor handle */
- Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */
- Fts3Expr *pExpr, /* Expression to consider */
- Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */
- Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */
- int *pRc /* IN/OUT: Error code */
-){
- if( *pRc==SQLITE_OK ){
- if( pExpr->eType==FTSQUERY_PHRASE ){
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- int i;
- for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
- Fts3TokenAndCost *pTC = (*ppTC)++;
- pTC->pPhrase = pPhrase;
- pTC->iToken = i;
- pTC->pRoot = pRoot;
- pTC->pToken = &pPhrase->aToken[i];
- pTC->iCol = pPhrase->iColumn;
- *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
- }
- }else if( pExpr->eType!=FTSQUERY_NOT ){
- assert( pExpr->eType==FTSQUERY_OR
- || pExpr->eType==FTSQUERY_AND
- || pExpr->eType==FTSQUERY_NEAR
- );
- assert( pExpr->pLeft && pExpr->pRight );
- if( pExpr->eType==FTSQUERY_OR ){
- pRoot = pExpr->pLeft;
- **ppOr = pRoot;
- (*ppOr)++;
- }
- fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
- if( pExpr->eType==FTSQUERY_OR ){
- pRoot = pExpr->pRight;
- **ppOr = pRoot;
- (*ppOr)++;
- }
- fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
- }
- }
-}
-
-/*
-** Determine the average document (row) size in pages. If successful,
-** write this value to *pnPage and return SQLITE_OK. Otherwise, return
-** an SQLite error code.
-**
-** The average document size in pages is calculated by first calculating
-** determining the average size in bytes, B. If B is less than the amount
-** of data that will fit on a single leaf page of an intkey table in
-** this database, then the average docsize is 1. Otherwise, it is 1 plus
-** the number of overflow pages consumed by a record B bytes in size.
-*/
-static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
- if( pCsr->nRowAvg==0 ){
- /* The average document size, which is required to calculate the cost
- ** of each doclist, has not yet been determined. Read the required
- ** data from the %_stat table to calculate it.
- **
- ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
- ** varints, where nCol is the number of columns in the FTS3 table.
- ** The first varint is the number of documents currently stored in
- ** the table. The following nCol varints contain the total amount of
- ** data stored in all rows of each column of the table, from left
- ** to right.
- */
- int rc;
- Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
- sqlite3_stmt *pStmt;
- sqlite3_int64 nDoc = 0;
- sqlite3_int64 nByte = 0;
- const char *pEnd;
- const char *a;
-
- rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
- if( rc!=SQLITE_OK ) return rc;
- a = sqlite3_column_blob(pStmt, 0);
- assert( a );
-
- pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
- a += sqlite3Fts3GetVarint(a, &nDoc);
- while( a<pEnd ){
- a += sqlite3Fts3GetVarint(a, &nByte);
- }
- if( nDoc==0 || nByte==0 ){
- sqlite3_reset(pStmt);
- return FTS_CORRUPT_VTAB;
- }
-
- pCsr->nDoc = nDoc;
- pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz);
- assert( pCsr->nRowAvg>0 );
- rc = sqlite3_reset(pStmt);
- if( rc!=SQLITE_OK ) return rc;
- }
-
- *pnPage = pCsr->nRowAvg;
- return SQLITE_OK;
-}
-
-/*
-** This function is called to select the tokens (if any) that will be
-** deferred. The array aTC[] has already been populated when this is
-** called.
-**
-** This function is called once for each AND/NEAR cluster in the
-** expression. Each invocation determines which tokens to defer within
-** the cluster with root node pRoot. See comments above the definition
-** of struct Fts3TokenAndCost for more details.
-**
-** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
-** called on each token to defer. Otherwise, an SQLite error code is
-** returned.
-*/
-static int fts3EvalSelectDeferred(
- Fts3Cursor *pCsr, /* FTS Cursor handle */
- Fts3Expr *pRoot, /* Consider tokens with this root node */
- Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */
- int nTC /* Number of entries in aTC[] */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int nDocSize = 0; /* Number of pages per doc loaded */
- int rc = SQLITE_OK; /* Return code */
- int ii; /* Iterator variable for various purposes */
- int nOvfl = 0; /* Total overflow pages used by doclists */
- int nToken = 0; /* Total number of tokens in cluster */
-
- int nMinEst = 0; /* The minimum count for any phrase so far. */
- int nLoad4 = 1; /* (Phrases that will be loaded)^4. */
-
- /* Tokens are never deferred for FTS tables created using the content=xxx
- ** option. The reason being that it is not guaranteed that the content
- ** table actually contains the same data as the index. To prevent this from
- ** causing any problems, the deferred token optimization is completely
- ** disabled for content=xxx tables. */
- if( pTab->zContentTbl ){
- return SQLITE_OK;
- }
-
- /* Count the tokens in this AND/NEAR cluster. If none of the doclists
- ** associated with the tokens spill onto overflow pages, or if there is
- ** only 1 token, exit early. No tokens to defer in this case. */
- for(ii=0; ii<nTC; ii++){
- if( aTC[ii].pRoot==pRoot ){
- nOvfl += aTC[ii].nOvfl;
- nToken++;
- }
- }
- if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
-
- /* Obtain the average docsize (in pages). */
- rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
- assert( rc!=SQLITE_OK || nDocSize>0 );
-
-
- /* Iterate through all tokens in this AND/NEAR cluster, in ascending order
- ** of the number of overflow pages that will be loaded by the pager layer
- ** to retrieve the entire doclist for the token from the full-text index.
- ** Load the doclists for tokens that are either:
- **
- ** a. The cheapest token in the entire query (i.e. the one visited by the
- ** first iteration of this loop), or
- **
- ** b. Part of a multi-token phrase.
- **
- ** After each token doclist is loaded, merge it with the others from the
- ** same phrase and count the number of documents that the merged doclist
- ** contains. Set variable "nMinEst" to the smallest number of documents in
- ** any phrase doclist for which 1 or more token doclists have been loaded.
- ** Let nOther be the number of other phrases for which it is certain that
- ** one or more tokens will not be deferred.
- **
- ** Then, for each token, defer it if loading the doclist would result in
- ** loading N or more overflow pages into memory, where N is computed as:
- **
- ** (nMinEst + 4^nOther - 1) / (4^nOther)
- */
- for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
- int iTC; /* Used to iterate through aTC[] array. */
- Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */
-
- /* Set pTC to point to the cheapest remaining token. */
- for(iTC=0; iTC<nTC; iTC++){
- if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot
- && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl)
- ){
- pTC = &aTC[iTC];
- }
- }
- assert( pTC );
-
- if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
- /* The number of overflow pages to load for this (and therefore all
- ** subsequent) tokens is greater than the estimated number of pages
- ** that will be loaded if all subsequent tokens are deferred.
- */
- Fts3PhraseToken *pToken = pTC->pToken;
- rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
- fts3SegReaderCursorFree(pToken->pSegcsr);
- pToken->pSegcsr = 0;
- }else{
- /* Set nLoad4 to the value of (4^nOther) for the next iteration of the
- ** for-loop. Except, limit the value to 2^24 to prevent it from
- ** overflowing the 32-bit integer it is stored in. */
- if( ii<12 ) nLoad4 = nLoad4*4;
-
- if( ii==0 || pTC->pPhrase->nToken>1 ){
- /* Either this is the cheapest token in the entire query, or it is
- ** part of a multi-token phrase. Either way, the entire doclist will
- ** (eventually) be loaded into memory. It may as well be now. */
- Fts3PhraseToken *pToken = pTC->pToken;
- int nList = 0;
- char *pList = 0;
- rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
- assert( rc==SQLITE_OK || pList==0 );
- if( rc==SQLITE_OK ){
- int nCount;
- fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
- nCount = fts3DoclistCountDocids(
- pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
- );
- if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
- }
- }
- }
- pTC->pToken = 0;
- }
-
- return rc;
-}
-
-/*
-** This function is called from within the xFilter method. It initializes
-** the full-text query currently stored in pCsr->pExpr. To iterate through
-** the results of a query, the caller does:
-**
-** fts3EvalStart(pCsr);
-** while( 1 ){
-** fts3EvalNext(pCsr);
-** if( pCsr->bEof ) break;
-** ... return row pCsr->iPrevId to the caller ...
-** }
-*/
-static int fts3EvalStart(Fts3Cursor *pCsr){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc = SQLITE_OK;
- int nToken = 0;
- int nOr = 0;
-
- /* Allocate a MultiSegReader for each token in the expression. */
- fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
-
- /* Determine which, if any, tokens in the expression should be deferred. */
-#ifndef SQLITE_DISABLE_FTS4_DEFERRED
- if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){
- Fts3TokenAndCost *aTC;
- Fts3Expr **apOr;
- aTC = (Fts3TokenAndCost *)sqlite3_malloc(
- sizeof(Fts3TokenAndCost) * nToken
- + sizeof(Fts3Expr *) * nOr * 2
- );
- apOr = (Fts3Expr **)&aTC[nToken];
-
- if( !aTC ){
- rc = SQLITE_NOMEM;
- }else{
- int ii;
- Fts3TokenAndCost *pTC = aTC;
- Fts3Expr **ppOr = apOr;
-
- fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
- nToken = (int)(pTC-aTC);
- nOr = (int)(ppOr-apOr);
-
- if( rc==SQLITE_OK ){
- rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
- for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
- rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
- }
- }
-
- sqlite3_free(aTC);
- }
- }
-#endif
-
- fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
- return rc;
-}
-
-/*
-** Invalidate the current position list for phrase pPhrase.
-*/
-static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
- if( pPhrase->doclist.bFreeList ){
- sqlite3_free(pPhrase->doclist.pList);
- }
- pPhrase->doclist.pList = 0;
- pPhrase->doclist.nList = 0;
- pPhrase->doclist.bFreeList = 0;
-}
-
-/*
-** This function is called to edit the position list associated with
-** the phrase object passed as the fifth argument according to a NEAR
-** condition. For example:
-**
-** abc NEAR/5 "def ghi"
-**
-** Parameter nNear is passed the NEAR distance of the expression (5 in
-** the example above). When this function is called, *paPoslist points to
-** the position list, and *pnToken is the number of phrase tokens in, the
-** phrase on the other side of the NEAR operator to pPhrase. For example,
-** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
-** the position list associated with phrase "abc".
-**
-** All positions in the pPhrase position list that are not sufficiently
-** close to a position in the *paPoslist position list are removed. If this
-** leaves 0 positions, zero is returned. Otherwise, non-zero.
-**
-** Before returning, *paPoslist is set to point to the position lsit
-** associated with pPhrase. And *pnToken is set to the number of tokens in
-** pPhrase.
-*/
-static int fts3EvalNearTrim(
- int nNear, /* NEAR distance. As in "NEAR/nNear". */
- char *aTmp, /* Temporary space to use */
- char **paPoslist, /* IN/OUT: Position list */
- int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */
- Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */
-){
- int nParam1 = nNear + pPhrase->nToken;
- int nParam2 = nNear + *pnToken;
- int nNew;
- char *p2;
- char *pOut;
- int res;
-
- assert( pPhrase->doclist.pList );
-
- p2 = pOut = pPhrase->doclist.pList;
- res = fts3PoslistNearMerge(
- &pOut, aTmp, nParam1, nParam2, paPoslist, &p2
- );
- if( res ){
- nNew = (int)(pOut - pPhrase->doclist.pList) - 1;
- assert( pPhrase->doclist.pList[nNew]=='\0' );
- assert( nNew<=pPhrase->doclist.nList && nNew>0 );
- memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
- pPhrase->doclist.nList = nNew;
- *paPoslist = pPhrase->doclist.pList;
- *pnToken = pPhrase->nToken;
- }
-
- return res;
-}
-
-/*
-** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
-** Otherwise, it advances the expression passed as the second argument to
-** point to the next matching row in the database. Expressions iterate through
-** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
-** or descending if it is non-zero.
-**
-** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
-** successful, the following variables in pExpr are set:
-**
-** Fts3Expr.bEof (non-zero if EOF - there is no next row)
-** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row)
-**
-** If the expression is of type FTSQUERY_PHRASE, and the expression is not
-** at EOF, then the following variables are populated with the position list
-** for the phrase for the visited row:
-**
-** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes)
-** FTs3Expr.pPhrase->doclist.pList (pointer to position list)
-**
-** It says above that this function advances the expression to the next
-** matching row. This is usually true, but there are the following exceptions:
-**
-** 1. Deferred tokens are not taken into account. If a phrase consists
-** entirely of deferred tokens, it is assumed to match every row in
-** the db. In this case the position-list is not populated at all.
-**
-** Or, if a phrase contains one or more deferred tokens and one or
-** more non-deferred tokens, then the expression is advanced to the
-** next possible match, considering only non-deferred tokens. In other
-** words, if the phrase is "A B C", and "B" is deferred, the expression
-** is advanced to the next row that contains an instance of "A * C",
-** where "*" may match any single token. The position list in this case
-** is populated as for "A * C" before returning.
-**
-** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is
-** advanced to point to the next row that matches "x AND y".
-**
-** See fts3EvalTestDeferredAndNear() for details on testing if a row is
-** really a match, taking into account deferred tokens and NEAR operators.
-*/
-static void fts3EvalNextRow(
- Fts3Cursor *pCsr, /* FTS Cursor handle */
- Fts3Expr *pExpr, /* Expr. to advance to next matching row */
- int *pRc /* IN/OUT: Error code */
-){
- if( *pRc==SQLITE_OK ){
- int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */
- assert( pExpr->bEof==0 );
- pExpr->bStart = 1;
-
- switch( pExpr->eType ){
- case FTSQUERY_NEAR:
- case FTSQUERY_AND: {
- Fts3Expr *pLeft = pExpr->pLeft;
- Fts3Expr *pRight = pExpr->pRight;
- assert( !pLeft->bDeferred || !pRight->bDeferred );
-
- if( pLeft->bDeferred ){
- /* LHS is entirely deferred. So we assume it matches every row.
- ** Advance the RHS iterator to find the next row visited. */
- fts3EvalNextRow(pCsr, pRight, pRc);
- pExpr->iDocid = pRight->iDocid;
- pExpr->bEof = pRight->bEof;
- }else if( pRight->bDeferred ){
- /* RHS is entirely deferred. So we assume it matches every row.
- ** Advance the LHS iterator to find the next row visited. */
- fts3EvalNextRow(pCsr, pLeft, pRc);
- pExpr->iDocid = pLeft->iDocid;
- pExpr->bEof = pLeft->bEof;
- }else{
- /* Neither the RHS or LHS are deferred. */
- fts3EvalNextRow(pCsr, pLeft, pRc);
- fts3EvalNextRow(pCsr, pRight, pRc);
- while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
- sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
- if( iDiff==0 ) break;
- if( iDiff<0 ){
- fts3EvalNextRow(pCsr, pLeft, pRc);
- }else{
- fts3EvalNextRow(pCsr, pRight, pRc);
- }
- }
- pExpr->iDocid = pLeft->iDocid;
- pExpr->bEof = (pLeft->bEof || pRight->bEof);
- }
- break;
- }
-
- case FTSQUERY_OR: {
- Fts3Expr *pLeft = pExpr->pLeft;
- Fts3Expr *pRight = pExpr->pRight;
- sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
-
- assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
- assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
-
- if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
- fts3EvalNextRow(pCsr, pLeft, pRc);
- }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
- fts3EvalNextRow(pCsr, pRight, pRc);
- }else{
- fts3EvalNextRow(pCsr, pLeft, pRc);
- fts3EvalNextRow(pCsr, pRight, pRc);
- }
-
- pExpr->bEof = (pLeft->bEof && pRight->bEof);
- iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
- if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
- pExpr->iDocid = pLeft->iDocid;
- }else{
- pExpr->iDocid = pRight->iDocid;
- }
-
- break;
- }
-
- case FTSQUERY_NOT: {
- Fts3Expr *pLeft = pExpr->pLeft;
- Fts3Expr *pRight = pExpr->pRight;
-
- if( pRight->bStart==0 ){
- fts3EvalNextRow(pCsr, pRight, pRc);
- assert( *pRc!=SQLITE_OK || pRight->bStart );
- }
-
- fts3EvalNextRow(pCsr, pLeft, pRc);
- if( pLeft->bEof==0 ){
- while( !*pRc
- && !pRight->bEof
- && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0
- ){
- fts3EvalNextRow(pCsr, pRight, pRc);
- }
- }
- pExpr->iDocid = pLeft->iDocid;
- pExpr->bEof = pLeft->bEof;
- break;
- }
-
- default: {
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- fts3EvalInvalidatePoslist(pPhrase);
- *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
- pExpr->iDocid = pPhrase->doclist.iDocid;
- break;
- }
- }
- }
-}
-
-/*
-** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
-** cluster, then this function returns 1 immediately.
-**
-** Otherwise, it checks if the current row really does match the NEAR
-** expression, using the data currently stored in the position lists
-** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression.
-**
-** If the current row is a match, the position list associated with each
-** phrase in the NEAR expression is edited in place to contain only those
-** phrase instances sufficiently close to their peers to satisfy all NEAR
-** constraints. In this case it returns 1. If the NEAR expression does not
-** match the current row, 0 is returned. The position lists may or may not
-** be edited if 0 is returned.
-*/
-static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
- int res = 1;
-
- /* The following block runs if pExpr is the root of a NEAR query.
- ** For example, the query:
- **
- ** "w" NEAR "x" NEAR "y" NEAR "z"
- **
- ** which is represented in tree form as:
- **
- ** |
- ** +--NEAR--+ <-- root of NEAR query
- ** | |
- ** +--NEAR--+ "z"
- ** | |
- ** +--NEAR--+ "y"
- ** | |
- ** "w" "x"
- **
- ** The right-hand child of a NEAR node is always a phrase. The
- ** left-hand child may be either a phrase or a NEAR node. There are
- ** no exceptions to this - it's the way the parser in fts3_expr.c works.
- */
- if( *pRc==SQLITE_OK
- && pExpr->eType==FTSQUERY_NEAR
- && pExpr->bEof==0
- && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
- ){
- Fts3Expr *p;
- int nTmp = 0; /* Bytes of temp space */
- char *aTmp; /* Temp space for PoslistNearMerge() */
-
- /* Allocate temporary working space. */
- for(p=pExpr; p->pLeft; p=p->pLeft){
- nTmp += p->pRight->pPhrase->doclist.nList;
- }
- nTmp += p->pPhrase->doclist.nList;
- if( nTmp==0 ){
- res = 0;
- }else{
- aTmp = sqlite3_malloc(nTmp*2);
- if( !aTmp ){
- *pRc = SQLITE_NOMEM;
- res = 0;
- }else{
- char *aPoslist = p->pPhrase->doclist.pList;
- int nToken = p->pPhrase->nToken;
-
- for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
- Fts3Phrase *pPhrase = p->pRight->pPhrase;
- int nNear = p->nNear;
- res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
- }
-
- aPoslist = pExpr->pRight->pPhrase->doclist.pList;
- nToken = pExpr->pRight->pPhrase->nToken;
- for(p=pExpr->pLeft; p && res; p=p->pLeft){
- int nNear;
- Fts3Phrase *pPhrase;
- assert( p->pParent && p->pParent->pLeft==p );
- nNear = p->pParent->nNear;
- pPhrase = (
- p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
- );
- res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
- }
- }
-
- sqlite3_free(aTmp);
- }
- }
-
- return res;
-}
-
-/*
-** This function is a helper function for fts3EvalTestDeferredAndNear().
-** Assuming no error occurs or has occurred, It returns non-zero if the
-** expression passed as the second argument matches the row that pCsr
-** currently points to, or zero if it does not.
-**
-** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
-** If an error occurs during execution of this function, *pRc is set to
-** the appropriate SQLite error code. In this case the returned value is
-** undefined.
-*/
-static int fts3EvalTestExpr(
- Fts3Cursor *pCsr, /* FTS cursor handle */
- Fts3Expr *pExpr, /* Expr to test. May or may not be root. */
- int *pRc /* IN/OUT: Error code */
-){
- int bHit = 1; /* Return value */
- if( *pRc==SQLITE_OK ){
- switch( pExpr->eType ){
- case FTSQUERY_NEAR:
- case FTSQUERY_AND:
- bHit = (
- fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
- && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
- && fts3EvalNearTest(pExpr, pRc)
- );
-
- /* If the NEAR expression does not match any rows, zero the doclist for
- ** all phrases involved in the NEAR. This is because the snippet(),
- ** offsets() and matchinfo() functions are not supposed to recognize
- ** any instances of phrases that are part of unmatched NEAR queries.
- ** For example if this expression:
- **
- ** ... MATCH 'a OR (b NEAR c)'
- **
- ** is matched against a row containing:
- **
- ** 'a b d e'
- **
- ** then any snippet() should ony highlight the "a" term, not the "b"
- ** (as "b" is part of a non-matching NEAR clause).
- */
- if( bHit==0
- && pExpr->eType==FTSQUERY_NEAR
- && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
- ){
- Fts3Expr *p;
- for(p=pExpr; p->pPhrase==0; p=p->pLeft){
- if( p->pRight->iDocid==pCsr->iPrevId ){
- fts3EvalInvalidatePoslist(p->pRight->pPhrase);
- }
- }
- if( p->iDocid==pCsr->iPrevId ){
- fts3EvalInvalidatePoslist(p->pPhrase);
- }
- }
-
- break;
-
- case FTSQUERY_OR: {
- int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
- int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
- bHit = bHit1 || bHit2;
- break;
- }
-
- case FTSQUERY_NOT:
- bHit = (
- fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
- && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
- );
- break;
-
- default: {
-#ifndef SQLITE_DISABLE_FTS4_DEFERRED
- if( pCsr->pDeferred
- && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
- ){
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
- if( pExpr->bDeferred ){
- fts3EvalInvalidatePoslist(pPhrase);
- }
- *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
- bHit = (pPhrase->doclist.pList!=0);
- pExpr->iDocid = pCsr->iPrevId;
- }else
-#endif
- {
- bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
- }
- break;
- }
- }
- }
- return bHit;
-}
-
-/*
-** This function is called as the second part of each xNext operation when
-** iterating through the results of a full-text query. At this point the
-** cursor points to a row that matches the query expression, with the
-** following caveats:
-**
-** * Up until this point, "NEAR" operators in the expression have been
-** treated as "AND".
-**
-** * Deferred tokens have not yet been considered.
-**
-** If *pRc is not SQLITE_OK when this function is called, it immediately
-** returns 0. Otherwise, it tests whether or not after considering NEAR
-** operators and deferred tokens the current row is still a match for the
-** expression. It returns 1 if both of the following are true:
-**
-** 1. *pRc is SQLITE_OK when this function returns, and
-**
-** 2. After scanning the current FTS table row for the deferred tokens,
-** it is determined that the row does *not* match the query.
-**
-** Or, if no error occurs and it seems the current row does match the FTS
-** query, return 0.
-*/
-static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
- int rc = *pRc;
- int bMiss = 0;
- if( rc==SQLITE_OK ){
-
- /* If there are one or more deferred tokens, load the current row into
- ** memory and scan it to determine the position list for each deferred
- ** token. Then, see if this row is really a match, considering deferred
- ** tokens and NEAR operators (neither of which were taken into account
- ** earlier, by fts3EvalNextRow()).
- */
- if( pCsr->pDeferred ){
- rc = fts3CursorSeek(0, pCsr);
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
- }
- }
- bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
-
- /* Free the position-lists accumulated for each deferred token above. */
- sqlite3Fts3FreeDeferredDoclists(pCsr);
- *pRc = rc;
- }
- return (rc==SQLITE_OK && bMiss);
-}
-
-/*
-** Advance to the next document that matches the FTS expression in
-** Fts3Cursor.pExpr.
-*/
-static int fts3EvalNext(Fts3Cursor *pCsr){
- int rc = SQLITE_OK; /* Return Code */
- Fts3Expr *pExpr = pCsr->pExpr;
- assert( pCsr->isEof==0 );
- if( pExpr==0 ){
- pCsr->isEof = 1;
- }else{
- do {
- if( pCsr->isRequireSeek==0 ){
- sqlite3_reset(pCsr->pStmt);
- }
- assert( sqlite3_data_count(pCsr->pStmt)==0 );
- fts3EvalNextRow(pCsr, pExpr, &rc);
- pCsr->isEof = pExpr->bEof;
- pCsr->isRequireSeek = 1;
- pCsr->isMatchinfoNeeded = 1;
- pCsr->iPrevId = pExpr->iDocid;
- }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
- }
- return rc;
-}
-
-/*
-** Restart interation for expression pExpr so that the next call to
-** fts3EvalNext() visits the first row. Do not allow incremental
-** loading or merging of phrase doclists for this iteration.
-**
-** If *pRc is other than SQLITE_OK when this function is called, it is
-** a no-op. If an error occurs within this function, *pRc is set to an
-** SQLite error code before returning.
-*/
-static void fts3EvalRestart(
- Fts3Cursor *pCsr,
- Fts3Expr *pExpr,
- int *pRc
-){
- if( pExpr && *pRc==SQLITE_OK ){
- Fts3Phrase *pPhrase = pExpr->pPhrase;
-
- if( pPhrase ){
- fts3EvalInvalidatePoslist(pPhrase);
- if( pPhrase->bIncr ){
- assert( pPhrase->nToken==1 );
- assert( pPhrase->aToken[0].pSegcsr );
- sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
- *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
- }
-
- pPhrase->doclist.pNextDocid = 0;
- pPhrase->doclist.iDocid = 0;
- }
-
- pExpr->iDocid = 0;
- pExpr->bEof = 0;
- pExpr->bStart = 0;
-
- fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
- fts3EvalRestart(pCsr, pExpr->pRight, pRc);
- }
-}
-
-/*
-** After allocating the Fts3Expr.aMI[] array for each phrase in the
-** expression rooted at pExpr, the cursor iterates through all rows matched
-** by pExpr, calling this function for each row. This function increments
-** the values in Fts3Expr.aMI[] according to the position-list currently
-** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase
-** expression nodes.
-*/
-static void fts3EvalUpdateCounts(Fts3Expr *pExpr){
- if( pExpr ){
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- if( pPhrase && pPhrase->doclist.pList ){
- int iCol = 0;
- char *p = pPhrase->doclist.pList;
-
- assert( *p );
- while( 1 ){
- u8 c = 0;
- int iCnt = 0;
- while( 0xFE & (*p | c) ){
- if( (c&0x80)==0 ) iCnt++;
- c = *p++ & 0x80;
- }
-
- /* aMI[iCol*3 + 1] = Number of occurrences
- ** aMI[iCol*3 + 2] = Number of rows containing at least one instance
- */
- pExpr->aMI[iCol*3 + 1] += iCnt;
- pExpr->aMI[iCol*3 + 2] += (iCnt>0);
- if( *p==0x00 ) break;
- p++;
- p += sqlite3Fts3GetVarint32(p, &iCol);
- }
- }
-
- fts3EvalUpdateCounts(pExpr->pLeft);
- fts3EvalUpdateCounts(pExpr->pRight);
- }
-}
-
-/*
-** Expression pExpr must be of type FTSQUERY_PHRASE.
-**
-** If it is not already allocated and populated, this function allocates and
-** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part
-** of a NEAR expression, then it also allocates and populates the same array
-** for all other phrases that are part of the NEAR expression.
-**
-** SQLITE_OK is returned if the aMI[] array is successfully allocated and
-** populated. Otherwise, if an error occurs, an SQLite error code is returned.
-*/
-static int fts3EvalGatherStats(
- Fts3Cursor *pCsr, /* Cursor object */
- Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */
-){
- int rc = SQLITE_OK; /* Return code */
-
- assert( pExpr->eType==FTSQUERY_PHRASE );
- if( pExpr->aMI==0 ){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- Fts3Expr *pRoot; /* Root of NEAR expression */
- Fts3Expr *p; /* Iterator used for several purposes */
-
- sqlite3_int64 iPrevId = pCsr->iPrevId;
- sqlite3_int64 iDocid;
- u8 bEof;
-
- /* Find the root of the NEAR expression */
- pRoot = pExpr;
- while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
- pRoot = pRoot->pParent;
- }
- iDocid = pRoot->iDocid;
- bEof = pRoot->bEof;
- assert( pRoot->bStart );
-
- /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
- for(p=pRoot; p; p=p->pLeft){
- Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
- assert( pE->aMI==0 );
- pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32));
- if( !pE->aMI ) return SQLITE_NOMEM;
- memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
- }
-
- fts3EvalRestart(pCsr, pRoot, &rc);
-
- while( pCsr->isEof==0 && rc==SQLITE_OK ){
-
- do {
- /* Ensure the %_content statement is reset. */
- if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
- assert( sqlite3_data_count(pCsr->pStmt)==0 );
-
- /* Advance to the next document */
- fts3EvalNextRow(pCsr, pRoot, &rc);
- pCsr->isEof = pRoot->bEof;
- pCsr->isRequireSeek = 1;
- pCsr->isMatchinfoNeeded = 1;
- pCsr->iPrevId = pRoot->iDocid;
- }while( pCsr->isEof==0
- && pRoot->eType==FTSQUERY_NEAR
- && fts3EvalTestDeferredAndNear(pCsr, &rc)
- );
-
- if( rc==SQLITE_OK && pCsr->isEof==0 ){
- fts3EvalUpdateCounts(pRoot);
- }
- }
-
- pCsr->isEof = 0;
- pCsr->iPrevId = iPrevId;
-
- if( bEof ){
- pRoot->bEof = bEof;
- }else{
- /* Caution: pRoot may iterate through docids in ascending or descending
- ** order. For this reason, even though it seems more defensive, the
- ** do loop can not be written:
- **
- ** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
- */
- fts3EvalRestart(pCsr, pRoot, &rc);
- do {
- fts3EvalNextRow(pCsr, pRoot, &rc);
- assert( pRoot->bEof==0 );
- }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
- fts3EvalTestDeferredAndNear(pCsr, &rc);
- }
- }
- return rc;
-}
-
-/*
-** This function is used by the matchinfo() module to query a phrase
-** expression node for the following information:
-**
-** 1. The total number of occurrences of the phrase in each column of
-** the FTS table (considering all rows), and
-**
-** 2. For each column, the number of rows in the table for which the
-** column contains at least one instance of the phrase.
-**
-** If no error occurs, SQLITE_OK is returned and the values for each column
-** written into the array aiOut as follows:
-**
-** aiOut[iCol*3 + 1] = Number of occurrences
-** aiOut[iCol*3 + 2] = Number of rows containing at least one instance
-**
-** Caveats:
-**
-** * If a phrase consists entirely of deferred tokens, then all output
-** values are set to the number of documents in the table. In other
-** words we assume that very common tokens occur exactly once in each
-** column of each row of the table.
-**
-** * If a phrase contains some deferred tokens (and some non-deferred
-** tokens), count the potential occurrence identified by considering
-** the non-deferred tokens instead of actual phrase occurrences.
-**
-** * If the phrase is part of a NEAR expression, then only phrase instances
-** that meet the NEAR constraint are included in the counts.
-*/
-int sqlite3Fts3EvalPhraseStats(
- Fts3Cursor *pCsr, /* FTS cursor handle */
- Fts3Expr *pExpr, /* Phrase expression */
- u32 *aiOut /* Array to write results into (see above) */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc = SQLITE_OK;
- int iCol;
-
- if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){
- assert( pCsr->nDoc>0 );
- for(iCol=0; iCol<pTab->nColumn; iCol++){
- aiOut[iCol*3 + 1] = (u32)pCsr->nDoc;
- aiOut[iCol*3 + 2] = (u32)pCsr->nDoc;
- }
- }else{
- rc = fts3EvalGatherStats(pCsr, pExpr);
- if( rc==SQLITE_OK ){
- assert( pExpr->aMI );
- for(iCol=0; iCol<pTab->nColumn; iCol++){
- aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1];
- aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2];
- }
- }
- }
-
- return rc;
-}
-
-/*
-** The expression pExpr passed as the second argument to this function
-** must be of type FTSQUERY_PHRASE.
-**
-** The returned value is either NULL or a pointer to a buffer containing
-** a position-list indicating the occurrences of the phrase in column iCol
-** of the current row.
-**
-** More specifically, the returned buffer contains 1 varint for each
-** occurence of the phrase in the column, stored using the normal (delta+2)
-** compression and is terminated by either an 0x01 or 0x00 byte. For example,
-** if the requested column contains "a b X c d X X" and the position-list
-** for 'X' is requested, the buffer returned may contain:
-**
-** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00
-**
-** This function works regardless of whether or not the phrase is deferred,
-** incremental, or neither.
-*/
-int sqlite3Fts3EvalPhrasePoslist(
- Fts3Cursor *pCsr, /* FTS3 cursor object */
- Fts3Expr *pExpr, /* Phrase to return doclist for */
- int iCol, /* Column to return position list for */
- char **ppOut /* OUT: Pointer to position list */
-){
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- char *pIter;
- int iThis;
- sqlite3_int64 iDocid;
-
- /* If this phrase is applies specifically to some column other than
- ** column iCol, return a NULL pointer. */
- *ppOut = 0;
- assert( iCol>=0 && iCol<pTab->nColumn );
- if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
- return SQLITE_OK;
- }
-
- iDocid = pExpr->iDocid;
- pIter = pPhrase->doclist.pList;
- if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
- int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */
- int bOr = 0;
- u8 bEof = 0;
- Fts3Expr *p;
-
- /* Check if this phrase descends from an OR expression node. If not,
- ** return NULL. Otherwise, the entry that corresponds to docid
- ** pCsr->iPrevId may lie earlier in the doclist buffer. */
- for(p=pExpr->pParent; p; p=p->pParent){
- if( p->eType==FTSQUERY_OR ) bOr = 1;
- }
- if( bOr==0 ) return SQLITE_OK;
-
- /* This is the descendent of an OR node. In this case we cannot use
- ** an incremental phrase. Load the entire doclist for the phrase
- ** into memory in this case. */
- if( pPhrase->bIncr ){
- int rc = SQLITE_OK;
- int bEofSave = pExpr->bEof;
- fts3EvalRestart(pCsr, pExpr, &rc);
- while( rc==SQLITE_OK && !pExpr->bEof ){
- fts3EvalNextRow(pCsr, pExpr, &rc);
- if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
- }
- pIter = pPhrase->doclist.pList;
- assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
- if( rc!=SQLITE_OK ) return rc;
- }
-
- if( pExpr->bEof ){
- pIter = 0;
- iDocid = 0;
- }
- bEof = (pPhrase->doclist.nAll==0);
- assert( bDescDoclist==0 || bDescDoclist==1 );
- assert( pCsr->bDesc==0 || pCsr->bDesc==1 );
-
- if( pCsr->bDesc==bDescDoclist ){
- int dummy;
- while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
- sqlite3Fts3DoclistPrev(
- bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
- &pIter, &iDocid, &dummy, &bEof
- );
- }
- }else{
- while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
- sqlite3Fts3DoclistNext(
- bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
- &pIter, &iDocid, &bEof
- );
- }
- }
-
- if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
- }
- if( pIter==0 ) return SQLITE_OK;
-
- if( *pIter==0x01 ){
- pIter++;
- pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
- }else{
- iThis = 0;
- }
- while( iThis<iCol ){
- fts3ColumnlistCopy(0, &pIter);
- if( *pIter==0x00 ) return 0;
- pIter++;
- pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
- }
-
- *ppOut = ((iCol==iThis)?pIter:0);
- return SQLITE_OK;
-}
-
-/*
-** Free all components of the Fts3Phrase structure that were allocated by
-** the eval module. Specifically, this means to free:
-**
-** * the contents of pPhrase->doclist, and
-** * any Fts3MultiSegReader objects held by phrase tokens.
-*/
-void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
- if( pPhrase ){
- int i;
- sqlite3_free(pPhrase->doclist.aAll);
- fts3EvalInvalidatePoslist(pPhrase);
- memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
- for(i=0; i<pPhrase->nToken; i++){
- fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
- pPhrase->aToken[i].pSegcsr = 0;
- }
- }
-}
-
-
-/*
-** Return SQLITE_CORRUPT_VTAB.
-*/
-#ifdef SQLITE_DEBUG
-int sqlite3Fts3Corrupt(){
- return SQLITE_CORRUPT_VTAB;
-}
-#endif
-
-#if !SQLITE_CORE
-/*
-** Initialize API pointer table, if required.
-*/
-int fts4_extension_init(
- sqlite3 *db,
- char **pzErrMsg,
- void *pApi
-){
- SQLITE_EXTENSION_INIT2(pApi)
- return sqlite3Fts3Init(db);
-}
-#endif
-
-#endif
diff --git a/src/libtracker-fts/fts3.h b/src/libtracker-fts/fts3.h
deleted file mode 100644
index b0346826d..000000000
--- a/src/libtracker-fts/fts3.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-** 2006 Oct 10
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This header file is used by programs that want to link against the
-** FTS3 library. All it does is declare the sqlite3Fts3Init() interface.
-*/
-#include "sqlite3.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-int sqlite3Fts3Init(sqlite3 *db);
-
-int fts4_extension_init(
- sqlite3 *db,
- char **pzErrMsg,
- void *pApi
-);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif /* __cplusplus */
diff --git a/src/libtracker-fts/fts3Int.h b/src/libtracker-fts/fts3Int.h
deleted file mode 100644
index 77ca4704e..000000000
--- a/src/libtracker-fts/fts3Int.h
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
-** 2009 Nov 12
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-*/
-#ifndef _FTSINT_H
-#define _FTSINT_H
-
-#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
-# define NDEBUG 1
-#endif
-
-/*
-** FTS4 is really an extension for FTS3. It is enabled using the
-** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all
-** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3.
-*/
-#if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3)
-# define SQLITE_ENABLE_FTS3
-#endif
-
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-/* If not building as part of the core, include sqlite3ext.h. */
-#ifndef SQLITE_CORE
-# include "sqlite3ext.h"
-extern const sqlite3_api_routines *sqlite3_api;
-#endif
-
-#include "sqlite3.h"
-#include "fts3_tokenizer.h"
-#include "fts3_hash.h"
-
-/*
-** This constant controls how often segments are merged. Once there are
-** FTS3_MERGE_COUNT segments of level N, they are merged into a single
-** segment of level N+1.
-*/
-#define FTS3_MERGE_COUNT 16
-
-/*
-** This is the maximum amount of data (in bytes) to store in the
-** Fts3Table.pendingTerms hash table. Normally, the hash table is
-** populated as documents are inserted/updated/deleted in a transaction
-** and used to create a new segment when the transaction is committed.
-** However if this limit is reached midway through a transaction, a new
-** segment is created and the hash table cleared immediately.
-*/
-#define FTS3_MAX_PENDING_DATA (1*1024*1024)
-
-/*
-** Macro to return the number of elements in an array. SQLite has a
-** similar macro called ArraySize(). Use a different name to avoid
-** a collision when building an amalgamation with built-in FTS3.
-*/
-#define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
-
-
-#ifndef MIN
-# define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-#ifndef MAX
-# define MAX(x,y) ((x)>(y)?(x):(y))
-#endif
-
-/*
-** Maximum length of a varint encoded integer. The varint format is different
-** from that used by SQLite, so the maximum length is 10, not 9.
-*/
-#define FTS3_VARINT_MAX 10
-
-/*
-** FTS4 virtual tables may maintain multiple indexes - one index of all terms
-** in the document set and zero or more prefix indexes. All indexes are stored
-** as one or more b+-trees in the %_segments and %_segdir tables.
-**
-** It is possible to determine which index a b+-tree belongs to based on the
-** value stored in the "%_segdir.level" column. Given this value L, the index
-** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with
-** level values between 0 and 1023 (inclusive) belong to index 0, all levels
-** between 1024 and 2047 to index 1, and so on.
-**
-** It is considered impossible for an index to use more than 1024 levels. In
-** theory though this may happen, but only after at least
-** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables.
-*/
-#define FTS3_SEGDIR_MAXLEVEL 1024
-#define FTS3_SEGDIR_MAXLEVEL_STR "1024"
-
-/*
-** The testcase() macro is only used by the amalgamation. If undefined,
-** make it a no-op.
-*/
-#ifndef testcase
-# define testcase(X)
-#endif
-
-/*
-** Terminator values for position-lists and column-lists.
-*/
-#define POS_COLUMN (1) /* Column-list terminator */
-#define POS_END (0) /* Position-list terminator */
-
-/*
-** This section provides definitions to allow the
-** FTS3 extension to be compiled outside of the
-** amalgamation.
-*/
-#ifndef SQLITE_AMALGAMATION
-/*
-** Macros indicating that conditional expressions are always true or
-** false.
-*/
-#ifdef SQLITE_COVERAGE_TEST
-# define ALWAYS(x) (1)
-# define NEVER(X) (0)
-#else
-# define ALWAYS(x) (x)
-# define NEVER(x) (x)
-#endif
-
-/*
-** Internal types used by SQLite.
-*/
-typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */
-typedef short int i16; /* 2-byte (or larger) signed integer */
-typedef unsigned int u32; /* 4-byte unsigned integer */
-typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
-typedef sqlite3_int64 i64; /* 8-byte signed integer */
-
-/*
-** Macro used to suppress compiler warnings for unused parameters.
-*/
-#define UNUSED_PARAMETER(x) (void)(x)
-
-/*
-** Activate assert() only if SQLITE_TEST is enabled.
-*/
-#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
-# define NDEBUG 1
-#endif
-
-/*
-** The TESTONLY macro is used to enclose variable declarations or
-** other bits of code that are needed to support the arguments
-** within testcase() and assert() macros.
-*/
-#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
-# define TESTONLY(X) X
-#else
-# define TESTONLY(X)
-#endif
-
-#endif /* SQLITE_AMALGAMATION */
-
-#ifdef SQLITE_DEBUG
-int sqlite3Fts3Corrupt(void);
-# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
-#else
-# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
-#endif
-
-typedef struct Fts3Table Fts3Table;
-typedef struct Fts3Cursor Fts3Cursor;
-typedef struct Fts3Expr Fts3Expr;
-typedef struct Fts3Phrase Fts3Phrase;
-typedef struct Fts3PhraseToken Fts3PhraseToken;
-
-typedef struct Fts3Doclist Fts3Doclist;
-typedef struct Fts3SegFilter Fts3SegFilter;
-typedef struct Fts3DeferredToken Fts3DeferredToken;
-typedef struct Fts3SegReader Fts3SegReader;
-typedef struct Fts3MultiSegReader Fts3MultiSegReader;
-
-/*
-** A connection to a fulltext index is an instance of the following
-** structure. The xCreate and xConnect methods create an instance
-** of this structure and xDestroy and xDisconnect free that instance.
-** All other methods receive a pointer to the structure as one of their
-** arguments.
-*/
-struct Fts3Table {
- sqlite3_vtab base; /* Base class used by SQLite core */
- sqlite3 *db; /* The database connection */
- const char *zDb; /* logical database name */
- const char *zName; /* virtual table name */
- int nColumn; /* number of named columns in virtual table */
- char **azColumn; /* column names. malloced */
- sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
- char *zContentTbl; /* content=xxx option, or NULL */
- char *zLanguageid; /* languageid=xxx option, or NULL */
- u8 bAutoincrmerge; /* True if automerge=1 */
- u32 nLeafAdd; /* Number of leaf blocks added this trans */
-
- /* Precompiled statements used by the implementation. Each of these
- ** statements is run and reset within a single virtual table API call.
- */
- sqlite3_stmt *aStmt[37];
-
- char *zReadExprlist;
- char *zWriteExprlist;
-
- int nNodeSize; /* Soft limit for node size */
- u8 bFts4; /* True for FTS4, false for FTS3 */
- u8 bHasStat; /* True if %_stat table exists */
- u8 bHasDocsize; /* True if %_docsize table exists */
- u8 bDescIdx; /* True if doclists are in reverse order */
- u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
- int nPgsz; /* Page size for host database */
- char *zSegmentsTbl; /* Name of %_segments table */
- sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
-
- /*
- ** The following array of hash tables is used to buffer pending index
- ** updates during transactions. All pending updates buffered at any one
- ** time must share a common language-id (see the FTS4 langid= feature).
- ** The current language id is stored in variable iPrevLangid.
- **
- ** A single FTS4 table may have multiple full-text indexes. For each index
- ** there is an entry in the aIndex[] array. Index 0 is an index of all the
- ** terms that appear in the document set. Each subsequent index in aIndex[]
- ** is an index of prefixes of a specific length.
- **
- ** Variable nPendingData contains an estimate the memory consumed by the
- ** pending data structures, including hash table overhead, but not including
- ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
- ** tables are flushed to disk. Variable iPrevDocid is the docid of the most
- ** recently inserted record.
- */
- int nIndex; /* Size of aIndex[] */
- struct Fts3Index {
- int nPrefix; /* Prefix length (0 for main terms index) */
- Fts3Hash hPending; /* Pending terms table for this index */
- } *aIndex;
- int nMaxPendingData; /* Max pending data before flush to disk */
- int nPendingData; /* Current bytes of pending data */
- sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
- int iPrevLangid; /* Langid of recently inserted document */
-
-#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
- /* State variables used for validating that the transaction control
- ** methods of the virtual table are called at appropriate times. These
- ** values do not contribute to FTS functionality; they are used for
- ** verifying the operation of the SQLite core.
- */
- int inTransaction; /* True after xBegin but before xCommit/xRollback */
- int mxSavepoint; /* Largest valid xSavepoint integer */
-#endif
-};
-
-/*
-** When the core wants to read from the virtual table, it creates a
-** virtual table cursor (an instance of the following structure) using
-** the xOpen method. Cursors are destroyed using the xClose method.
-*/
-struct Fts3Cursor {
- sqlite3_vtab_cursor base; /* Base class used by SQLite core */
- i16 eSearch; /* Search strategy (see below) */
- u8 isEof; /* True if at End Of Results */
- u8 isRequireSeek; /* True if must seek pStmt to %_content row */
- sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
- Fts3Expr *pExpr; /* Parsed MATCH query string */
- int iLangid; /* Language being queried for */
- int nPhrase; /* Number of matchable phrases in query */
- Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
- sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
- char *pNextId; /* Pointer into the body of aDoclist */
- char *aDoclist; /* List of docids for full-text queries */
- int nDoclist; /* Size of buffer at aDoclist */
- u8 bDesc; /* True to sort in descending order */
- int eEvalmode; /* An FTS3_EVAL_XX constant */
- int nRowAvg; /* Average size of database rows, in pages */
- sqlite3_int64 nDoc; /* Documents in table */
-
- int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
- u32 *aMatchinfo; /* Information about most recent match */
- int nMatchinfo; /* Number of elements in aMatchinfo[] */
- char *zMatchinfo; /* Matchinfo specification */
-};
-
-#define FTS3_EVAL_FILTER 0
-#define FTS3_EVAL_NEXT 1
-#define FTS3_EVAL_MATCHINFO 2
-
-/*
-** The Fts3Cursor.eSearch member is always set to one of the following.
-** Actualy, Fts3Cursor.eSearch can be greater than or equal to
-** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index
-** of the column to be searched. For example, in
-**
-** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
-** SELECT docid FROM ex1 WHERE b MATCH 'one two three';
-**
-** Because the LHS of the MATCH operator is 2nd column "b",
-** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a,
-** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1"
-** indicating that all columns should be searched,
-** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
-*/
-#define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */
-#define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */
-#define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */
-
-
-struct Fts3Doclist {
- char *aAll; /* Array containing doclist (or NULL) */
- int nAll; /* Size of a[] in bytes */
- char *pNextDocid; /* Pointer to next docid */
-
- sqlite3_int64 iDocid; /* Current docid (if pList!=0) */
- int bFreeList; /* True if pList should be sqlite3_free()d */
- char *pList; /* Pointer to position list following iDocid */
- int nList; /* Length of position list */
-};
-
-/*
-** A "phrase" is a sequence of one or more tokens that must match in
-** sequence. A single token is the base case and the most common case.
-** For a sequence of tokens contained in double-quotes (i.e. "one two three")
-** nToken will be the number of tokens in the string.
-*/
-struct Fts3PhraseToken {
- char *z; /* Text of the token */
- int n; /* Number of bytes in buffer z */
- int isPrefix; /* True if token ends with a "*" character */
- int bFirst; /* True if token must appear at position 0 */
-
- /* Variables above this point are populated when the expression is
- ** parsed (by code in fts3_expr.c). Below this point the variables are
- ** used when evaluating the expression. */
- Fts3DeferredToken *pDeferred; /* Deferred token object for this token */
- Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */
-};
-
-struct Fts3Phrase {
- /* Cache of doclist for this phrase. */
- Fts3Doclist doclist;
- int bIncr; /* True if doclist is loaded incrementally */
- int iDoclistToken;
-
- /* Variables below this point are populated by fts3_expr.c when parsing
- ** a MATCH expression. Everything above is part of the evaluation phase.
- */
- int nToken; /* Number of tokens in the phrase */
- int iColumn; /* Index of column this phrase must match */
- Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
-};
-
-/*
-** A tree of these objects forms the RHS of a MATCH operator.
-**
-** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist
-** points to a malloced buffer, size nDoclist bytes, containing the results
-** of this phrase query in FTS3 doclist format. As usual, the initial
-** "Length" field found in doclists stored on disk is omitted from this
-** buffer.
-**
-** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
-** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
-** where nCol is the number of columns in the queried FTS table. The array
-** is populated as follows:
-**
-** aMI[iCol*3 + 0] = Undefined
-** aMI[iCol*3 + 1] = Number of occurrences
-** aMI[iCol*3 + 2] = Number of rows containing at least one instance
-**
-** The aMI array is allocated using sqlite3_malloc(). It should be freed
-** when the expression node is.
-*/
-struct Fts3Expr {
- int eType; /* One of the FTSQUERY_XXX values defined below */
- int nNear; /* Valid if eType==FTSQUERY_NEAR */
- Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */
- Fts3Expr *pLeft; /* Left operand */
- Fts3Expr *pRight; /* Right operand */
- Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */
-
- /* The following are used by the fts3_eval.c module. */
- sqlite3_int64 iDocid; /* Current docid */
- u8 bEof; /* True this expression is at EOF already */
- u8 bStart; /* True if iDocid is valid */
- u8 bDeferred; /* True if this expression is entirely deferred */
-
- u32 *aMI;
-};
-
-/*
-** Candidate values for Fts3Query.eType. Note that the order of the first
-** four values is in order of precedence when parsing expressions. For
-** example, the following:
-**
-** "a OR b AND c NOT d NEAR e"
-**
-** is equivalent to:
-**
-** "a OR (b AND (c NOT (d NEAR e)))"
-*/
-#define FTSQUERY_NEAR 1
-#define FTSQUERY_NOT 2
-#define FTSQUERY_AND 3
-#define FTSQUERY_OR 4
-#define FTSQUERY_PHRASE 5
-
-
-/* fts3_write.c */
-int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
-int sqlite3Fts3PendingTermsFlush(Fts3Table *);
-void sqlite3Fts3PendingTermsClear(Fts3Table *);
-int sqlite3Fts3Optimize(Fts3Table *);
-int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
- sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
-int sqlite3Fts3SegReaderPending(
- Fts3Table*,int,const char*,int,int,Fts3SegReader**);
-void sqlite3Fts3SegReaderFree(Fts3SegReader *);
-int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
-int sqlite3Fts3ReadLock(Fts3Table *);
-int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
-
-int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
-int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
-
-#ifndef SQLITE_DISABLE_FTS4_DEFERRED
-void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
-int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
-int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
-void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
-int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
-#else
-# define sqlite3Fts3FreeDeferredTokens(x)
-# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
-# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
-# define sqlite3Fts3FreeDeferredDoclists(x)
-# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
-#endif
-
-void sqlite3Fts3SegmentsClose(Fts3Table *);
-int sqlite3Fts3MaxLevel(Fts3Table *, int *);
-
-/* Special values interpreted by sqlite3SegReaderCursor() */
-#define FTS3_SEGCURSOR_PENDING -1
-#define FTS3_SEGCURSOR_ALL -2
-
-int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
-int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
-void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
-
-int sqlite3Fts3SegReaderCursor(Fts3Table *,
- int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
-
-/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
-#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
-#define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002
-#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
-#define FTS3_SEGMENT_PREFIX 0x00000008
-#define FTS3_SEGMENT_SCAN 0x00000010
-#define FTS3_SEGMENT_FIRST 0x00000020
-
-/* Type passed as 4th argument to SegmentReaderIterate() */
-struct Fts3SegFilter {
- const char *zTerm;
- int nTerm;
- int iCol;
- int flags;
-};
-
-struct Fts3MultiSegReader {
- /* Used internally by sqlite3Fts3SegReaderXXX() calls */
- Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */
- int nSegment; /* Size of apSegment array */
- int nAdvance; /* How many seg-readers to advance */
- Fts3SegFilter *pFilter; /* Pointer to filter object */
- char *aBuffer; /* Buffer to merge doclists in */
- int nBuffer; /* Allocated size of aBuffer[] in bytes */
-
- int iColFilter; /* If >=0, filter for this column */
- int bRestart;
-
- /* Used by fts3.c only. */
- int nCost; /* Cost of running iterator */
- int bLookup; /* True if a lookup of a single entry. */
-
- /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
- char *zTerm; /* Pointer to term buffer */
- int nTerm; /* Size of zTerm in bytes */
- char *aDoclist; /* Pointer to doclist buffer */
- int nDoclist; /* Size of aDoclist[] in bytes */
-};
-
-int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
-
-/* fts3.c */
-int sqlite3Fts3PutVarint(char *, sqlite3_int64);
-int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
-int sqlite3Fts3GetVarint32(const char *, int *);
-int sqlite3Fts3VarintLen(sqlite3_uint64);
-void sqlite3Fts3Dequote(char *);
-void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
-int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
-int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
-void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
-
-/* fts3_tokenizer.c */
-const char *sqlite3Fts3NextToken(const char *, int *);
-int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
-int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
- sqlite3_tokenizer **, char **
-);
-int sqlite3Fts3IsIdChar(char);
-
-/* fts3_snippet.c */
-void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
-void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
- const char *, const char *, int, int
-);
-void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
-
-/* fts3_expr.c */
-int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
- char **, int, int, int, const char *, int, Fts3Expr **
-);
-void sqlite3Fts3ExprFree(Fts3Expr *);
-#ifdef SQLITE_TEST
-int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
-int sqlite3Fts3InitTerm(sqlite3 *db);
-#endif
-
-int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
- sqlite3_tokenizer_cursor **
-);
-
-/* fts3_aux.c */
-int sqlite3Fts3InitAux(sqlite3 *db);
-
-void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
-
-int sqlite3Fts3MsrIncrStart(
- Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
-int sqlite3Fts3MsrIncrNext(
- Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
-int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);
-int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
-int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
-
-/* fts3_unicode2.c (functions generated by parsing unicode text files) */
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
-int sqlite3FtsUnicodeFold(int, int);
-int sqlite3FtsUnicodeIsalnum(int);
-int sqlite3FtsUnicodeIsdiacritic(int);
-#endif
-
-#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
-#endif /* _FTSINT_H */
diff --git a/src/libtracker-fts/fts3_aux.c b/src/libtracker-fts/fts3_aux.c
deleted file mode 100644
index a2bff2e1d..000000000
--- a/src/libtracker-fts/fts3_aux.c
+++ /dev/null
@@ -1,474 +0,0 @@
-/*
-** 2011 Jan 27
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-*/
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <string.h>
-#include <assert.h>
-
-typedef struct Fts3auxTable Fts3auxTable;
-typedef struct Fts3auxCursor Fts3auxCursor;
-
-struct Fts3auxTable {
- sqlite3_vtab base; /* Base class used by SQLite core */
- Fts3Table *pFts3Tab;
-};
-
-struct Fts3auxCursor {
- sqlite3_vtab_cursor base; /* Base class used by SQLite core */
- Fts3MultiSegReader csr; /* Must be right after "base" */
- Fts3SegFilter filter;
- char *zStop;
- int nStop; /* Byte-length of string zStop */
- int isEof; /* True if cursor is at EOF */
- sqlite3_int64 iRowid; /* Current rowid */
-
- int iCol; /* Current value of 'col' column */
- int nStat; /* Size of aStat[] array */
- struct Fts3auxColstats {
- sqlite3_int64 nDoc; /* 'documents' values for current csr row */
- sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
- } *aStat;
-};
-
-/*
-** Schema of the terms table.
-*/
-#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, col, documents, occurrences)"
-
-/*
-** This function does all the work for both the xConnect and xCreate methods.
-** These tables have no persistent representation of their own, so xConnect
-** and xCreate are identical operations.
-*/
-static int fts3auxConnectMethod(
- sqlite3 *db, /* Database connection */
- void *pUnused, /* Unused */
- int argc, /* Number of elements in argv array */
- const char * const *argv, /* xCreate/xConnect argument array */
- sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
- char **pzErr /* OUT: sqlite3_malloc'd error message */
-){
- char const *zDb; /* Name of database (e.g. "main") */
- char const *zFts3; /* Name of fts3 table */
- int nDb; /* Result of strlen(zDb) */
- int nFts3; /* Result of strlen(zFts3) */
- int nByte; /* Bytes of space to allocate here */
- int rc; /* value returned by declare_vtab() */
- Fts3auxTable *p; /* Virtual table object to return */
-
- UNUSED_PARAMETER(pUnused);
-
- /* The user should specify a single argument - the name of an fts3 table. */
- if( argc!=4 ){
- *pzErr = sqlite3_mprintf(
- "wrong number of arguments to fts4aux constructor"
- );
- return SQLITE_ERROR;
- }
-
- zDb = argv[1];
- nDb = (int)strlen(zDb);
- zFts3 = argv[3];
- nFts3 = (int)strlen(zFts3);
-
- rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
- if( rc!=SQLITE_OK ) return rc;
-
- nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
- p = (Fts3auxTable *)sqlite3_malloc(nByte);
- if( !p ) return SQLITE_NOMEM;
- memset(p, 0, nByte);
-
- p->pFts3Tab = (Fts3Table *)&p[1];
- p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
- p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
- p->pFts3Tab->db = db;
- p->pFts3Tab->nIndex = 1;
-
- memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
- memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
- sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
-
- *ppVtab = (sqlite3_vtab *)p;
- return SQLITE_OK;
-}
-
-/*
-** This function does the work for both the xDisconnect and xDestroy methods.
-** These tables have no persistent representation of their own, so xDisconnect
-** and xDestroy are identical operations.
-*/
-static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
- Fts3auxTable *p = (Fts3auxTable *)pVtab;
- Fts3Table *pFts3 = p->pFts3Tab;
- int i;
-
- /* Free any prepared statements held */
- for(i=0; i<SizeofArray(pFts3->aStmt); i++){
- sqlite3_finalize(pFts3->aStmt[i]);
- }
- sqlite3_free(pFts3->zSegmentsTbl);
- sqlite3_free(p);
- return SQLITE_OK;
-}
-
-#define FTS4AUX_EQ_CONSTRAINT 1
-#define FTS4AUX_GE_CONSTRAINT 2
-#define FTS4AUX_LE_CONSTRAINT 4
-
-/*
-** xBestIndex - Analyze a WHERE and ORDER BY clause.
-*/
-static int fts3auxBestIndexMethod(
- sqlite3_vtab *pVTab,
- sqlite3_index_info *pInfo
-){
- int i;
- int iEq = -1;
- int iGe = -1;
- int iLe = -1;
-
- UNUSED_PARAMETER(pVTab);
-
- /* This vtab delivers always results in "ORDER BY term ASC" order. */
- if( pInfo->nOrderBy==1
- && pInfo->aOrderBy[0].iColumn==0
- && pInfo->aOrderBy[0].desc==0
- ){
- pInfo->orderByConsumed = 1;
- }
-
- /* Search for equality and range constraints on the "term" column. */
- for(i=0; i<pInfo->nConstraint; i++){
- if( pInfo->aConstraint[i].usable && pInfo->aConstraint[i].iColumn==0 ){
- int op = pInfo->aConstraint[i].op;
- if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
- if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
- if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
- if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
- if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
- }
- }
-
- if( iEq>=0 ){
- pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
- pInfo->aConstraintUsage[iEq].argvIndex = 1;
- pInfo->estimatedCost = 5;
- }else{
- pInfo->idxNum = 0;
- pInfo->estimatedCost = 20000;
- if( iGe>=0 ){
- pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
- pInfo->aConstraintUsage[iGe].argvIndex = 1;
- pInfo->estimatedCost /= 2;
- }
- if( iLe>=0 ){
- pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
- pInfo->aConstraintUsage[iLe].argvIndex = 1 + (iGe>=0);
- pInfo->estimatedCost /= 2;
- }
- }
-
- return SQLITE_OK;
-}
-
-/*
-** xOpen - Open a cursor.
-*/
-static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
- Fts3auxCursor *pCsr; /* Pointer to cursor object to return */
-
- UNUSED_PARAMETER(pVTab);
-
- pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
- if( !pCsr ) return SQLITE_NOMEM;
- memset(pCsr, 0, sizeof(Fts3auxCursor));
-
- *ppCsr = (sqlite3_vtab_cursor *)pCsr;
- return SQLITE_OK;
-}
-
-/*
-** xClose - Close a cursor.
-*/
-static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
- Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
- Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
-
- sqlite3Fts3SegmentsClose(pFts3);
- sqlite3Fts3SegReaderFinish(&pCsr->csr);
- sqlite3_free((void *)pCsr->filter.zTerm);
- sqlite3_free(pCsr->zStop);
- sqlite3_free(pCsr->aStat);
- sqlite3_free(pCsr);
- return SQLITE_OK;
-}
-
-static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
- if( nSize>pCsr->nStat ){
- struct Fts3auxColstats *aNew;
- aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat,
- sizeof(struct Fts3auxColstats) * nSize
- );
- if( aNew==0 ) return SQLITE_NOMEM;
- memset(&aNew[pCsr->nStat], 0,
- sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
- );
- pCsr->aStat = aNew;
- pCsr->nStat = nSize;
- }
- return SQLITE_OK;
-}
-
-/*
-** xNext - Advance the cursor to the next row, if any.
-*/
-static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
- Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
- Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
- int rc;
-
- /* Increment our pretend rowid value. */
- pCsr->iRowid++;
-
- for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
- if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
- }
-
- rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
- if( rc==SQLITE_ROW ){
- int i = 0;
- int nDoclist = pCsr->csr.nDoclist;
- char *aDoclist = pCsr->csr.aDoclist;
- int iCol;
-
- int eState = 0;
-
- if( pCsr->zStop ){
- int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
- int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n);
- if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){
- pCsr->isEof = 1;
- return SQLITE_OK;
- }
- }
-
- if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
- memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
- iCol = 0;
-
- while( i<nDoclist ){
- sqlite3_int64 v = 0;
-
- i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
- switch( eState ){
- /* State 0. In this state the integer just read was a docid. */
- case 0:
- pCsr->aStat[0].nDoc++;
- eState = 1;
- iCol = 0;
- break;
-
- /* State 1. In this state we are expecting either a 1, indicating
- ** that the following integer will be a column number, or the
- ** start of a position list for column 0.
- **
- ** The only difference between state 1 and state 2 is that if the
- ** integer encountered in state 1 is not 0 or 1, then we need to
- ** increment the column 0 "nDoc" count for this term.
- */
- case 1:
- assert( iCol==0 );
- if( v>1 ){
- pCsr->aStat[1].nDoc++;
- }
- eState = 2;
- /* fall through */
-
- case 2:
- if( v==0 ){ /* 0x00. Next integer will be a docid. */
- eState = 0;
- }else if( v==1 ){ /* 0x01. Next integer will be a column number. */
- eState = 3;
- }else{ /* 2 or greater. A position. */
- pCsr->aStat[iCol+1].nOcc++;
- pCsr->aStat[0].nOcc++;
- }
- break;
-
- /* State 3. The integer just read is a column number. */
- default: assert( eState==3 );
- iCol = (int)v;
- if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
- pCsr->aStat[iCol+1].nDoc++;
- eState = 2;
- break;
- }
- }
-
- pCsr->iCol = 0;
- rc = SQLITE_OK;
- }else{
- pCsr->isEof = 1;
- }
- return rc;
-}
-
-/*
-** xFilter - Initialize a cursor to point at the start of its data.
-*/
-static int fts3auxFilterMethod(
- sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
- int idxNum, /* Strategy index */
- const char *idxStr, /* Unused */
- int nVal, /* Number of elements in apVal */
- sqlite3_value **apVal /* Arguments for the indexing scheme */
-){
- Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
- Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
- int rc;
- int isScan;
-
- UNUSED_PARAMETER(nVal);
- UNUSED_PARAMETER(idxStr);
-
- assert( idxStr==0 );
- assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
- || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
- || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
- );
- isScan = (idxNum!=FTS4AUX_EQ_CONSTRAINT);
-
- /* In case this cursor is being reused, close and zero it. */
- testcase(pCsr->filter.zTerm);
- sqlite3Fts3SegReaderFinish(&pCsr->csr);
- sqlite3_free((void *)pCsr->filter.zTerm);
- sqlite3_free(pCsr->aStat);
- memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
-
- pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
- if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
-
- if( idxNum&(FTS4AUX_EQ_CONSTRAINT|FTS4AUX_GE_CONSTRAINT) ){
- const unsigned char *zStr = sqlite3_value_text(apVal[0]);
- if( zStr ){
- pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
- pCsr->filter.nTerm = sqlite3_value_bytes(apVal[0]);
- if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
- }
- }
- if( idxNum&FTS4AUX_LE_CONSTRAINT ){
- int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0;
- pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx]));
- pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]);
- if( pCsr->zStop==0 ) return SQLITE_NOMEM;
- }
-
- rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL,
- pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
- );
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
- }
-
- if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
- return rc;
-}
-
-/*
-** xEof - Return true if the cursor is at EOF, or false otherwise.
-*/
-static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
- Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
- return pCsr->isEof;
-}
-
-/*
-** xColumn - Return a column value.
-*/
-static int fts3auxColumnMethod(
- sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */
- int iCol /* Index of column to read value from */
-){
- Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
-
- assert( p->isEof==0 );
- if( iCol==0 ){ /* Column "term" */
- sqlite3_result_text(pContext, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
- }else if( iCol==1 ){ /* Column "col" */
- if( p->iCol ){
- sqlite3_result_int(pContext, p->iCol-1);
- }else{
- sqlite3_result_text(pContext, "*", -1, SQLITE_STATIC);
- }
- }else if( iCol==2 ){ /* Column "documents" */
- sqlite3_result_int64(pContext, p->aStat[p->iCol].nDoc);
- }else{ /* Column "occurrences" */
- sqlite3_result_int64(pContext, p->aStat[p->iCol].nOcc);
- }
-
- return SQLITE_OK;
-}
-
-/*
-** xRowid - Return the current rowid for the cursor.
-*/
-static int fts3auxRowidMethod(
- sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite_int64 *pRowid /* OUT: Rowid value */
-){
- Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
- *pRowid = pCsr->iRowid;
- return SQLITE_OK;
-}
-
-/*
-** Register the fts3aux module with database connection db. Return SQLITE_OK
-** if successful or an error code if sqlite3_create_module() fails.
-*/
-int sqlite3Fts3InitAux(sqlite3 *db){
- static const sqlite3_module fts3aux_module = {
- 0, /* iVersion */
- fts3auxConnectMethod, /* xCreate */
- fts3auxConnectMethod, /* xConnect */
- fts3auxBestIndexMethod, /* xBestIndex */
- fts3auxDisconnectMethod, /* xDisconnect */
- fts3auxDisconnectMethod, /* xDestroy */
- fts3auxOpenMethod, /* xOpen */
- fts3auxCloseMethod, /* xClose */
- fts3auxFilterMethod, /* xFilter */
- fts3auxNextMethod, /* xNext */
- fts3auxEofMethod, /* xEof */
- fts3auxColumnMethod, /* xColumn */
- fts3auxRowidMethod, /* xRowid */
- 0, /* xUpdate */
- 0, /* xBegin */
- 0, /* xSync */
- 0, /* xCommit */
- 0, /* xRollback */
- 0, /* xFindFunction */
- 0, /* xRename */
- 0, /* xSavepoint */
- 0, /* xRelease */
- 0 /* xRollbackTo */
- };
- int rc; /* Return code */
-
- rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
- return rc;
-}
-
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_expr.c b/src/libtracker-fts/fts3_expr.c
deleted file mode 100644
index 7612789de..000000000
--- a/src/libtracker-fts/fts3_expr.c
+++ /dev/null
@@ -1,1013 +0,0 @@
-/*
-** 2008 Nov 28
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This module contains code that implements a parser for fts3 query strings
-** (the right-hand argument to the MATCH operator). Because the supported
-** syntax is relatively simple, the whole tokenizer/parser system is
-** hand-coded.
-*/
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-/*
-** By default, this module parses the legacy syntax that has been
-** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
-** is defined, then it uses the new syntax. The differences between
-** the new and the old syntaxes are:
-**
-** a) The new syntax supports parenthesis. The old does not.
-**
-** b) The new syntax supports the AND and NOT operators. The old does not.
-**
-** c) The old syntax supports the "-" token qualifier. This is not
-** supported by the new syntax (it is replaced by the NOT operator).
-**
-** d) When using the old syntax, the OR operator has a greater precedence
-** than an implicit AND. When using the new, both implicity and explicit
-** AND operators have a higher precedence than OR.
-**
-** If compiled with SQLITE_TEST defined, then this module exports the
-** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
-** to zero causes the module to use the old syntax. If it is set to
-** non-zero the new syntax is activated. This is so both syntaxes can
-** be tested using a single build of testfixture.
-**
-** The following describes the syntax supported by the fts3 MATCH
-** operator in a similar format to that used by the lemon parser
-** generator. This module does not use actually lemon, it uses a
-** custom parser.
-**
-** query ::= andexpr (OR andexpr)*.
-**
-** andexpr ::= notexpr (AND? notexpr)*.
-**
-** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
-** notexpr ::= LP query RP.
-**
-** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
-**
-** distance_opt ::= .
-** distance_opt ::= / INTEGER.
-**
-** phrase ::= TOKEN.
-** phrase ::= COLUMN:TOKEN.
-** phrase ::= "TOKEN TOKEN TOKEN...".
-*/
-
-#ifdef SQLITE_TEST
-int sqlite3_fts3_enable_parentheses = 0;
-#else
-# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
-# define sqlite3_fts3_enable_parentheses 1
-# else
-# define sqlite3_fts3_enable_parentheses 0
-# endif
-#endif
-
-/*
-** Default span for NEAR operators.
-*/
-#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
-
-#include <string.h>
-#include <assert.h>
-
-/*
-** isNot:
-** This variable is used by function getNextNode(). When getNextNode() is
-** called, it sets ParseContext.isNot to true if the 'next node' is a
-** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
-** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
-** zero.
-*/
-typedef struct ParseContext ParseContext;
-struct ParseContext {
- sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
- int iLangid; /* Language id used with tokenizer */
- const char **azCol; /* Array of column names for fts3 table */
- int bFts4; /* True to allow FTS4-only syntax */
- int nCol; /* Number of entries in azCol[] */
- int iDefaultCol; /* Default column to query */
- int isNot; /* True if getNextNode() sees a unary - */
- sqlite3_context *pCtx; /* Write error message here */
- int nNest; /* Number of nested brackets */
-};
-
-/*
-** This function is equivalent to the standard isspace() function.
-**
-** The standard isspace() can be awkward to use safely, because although it
-** is defined to accept an argument of type int, its behaviour when passed
-** an integer that falls outside of the range of the unsigned char type
-** is undefined (and sometimes, "undefined" means segfault). This wrapper
-** is defined to accept an argument of type char, and always returns 0 for
-** any values that fall outside of the range of the unsigned char type (i.e.
-** negative values).
-*/
-static int fts3isspace(char c){
- return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
-}
-
-/*
-** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
-** zero the memory before returning a pointer to it. If unsuccessful,
-** return NULL.
-*/
-static void *fts3MallocZero(int nByte){
- void *pRet = sqlite3_malloc(nByte);
- if( pRet ) memset(pRet, 0, nByte);
- return pRet;
-}
-
-int sqlite3Fts3OpenTokenizer(
- sqlite3_tokenizer *pTokenizer,
- int iLangid,
- const char *z,
- int n,
- sqlite3_tokenizer_cursor **ppCsr
-){
- sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
- sqlite3_tokenizer_cursor *pCsr = 0;
- int rc;
-
- rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
- assert( rc==SQLITE_OK || pCsr==0 );
- if( rc==SQLITE_OK ){
- pCsr->pTokenizer = pTokenizer;
- if( pModule->iVersion>=1 ){
- rc = pModule->xLanguageid(pCsr, iLangid);
- if( rc!=SQLITE_OK ){
- pModule->xClose(pCsr);
- pCsr = 0;
- }
- }
- }
- *ppCsr = pCsr;
- return rc;
-}
-
-
-/*
-** Extract the next token from buffer z (length n) using the tokenizer
-** and other information (column names etc.) in pParse. Create an Fts3Expr
-** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
-** single token and set *ppExpr to point to it. If the end of the buffer is
-** reached before a token is found, set *ppExpr to zero. It is the
-** responsibility of the caller to eventually deallocate the allocated
-** Fts3Expr structure (if any) by passing it to sqlite3_free().
-**
-** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
-** fails.
-*/
-static int getNextToken(
- ParseContext *pParse, /* fts3 query parse context */
- int iCol, /* Value for Fts3Phrase.iColumn */
- const char *z, int n, /* Input string */
- Fts3Expr **ppExpr, /* OUT: expression */
- int *pnConsumed /* OUT: Number of bytes consumed */
-){
- sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
- sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
- int rc;
- sqlite3_tokenizer_cursor *pCursor;
- Fts3Expr *pRet = 0;
- int nConsumed = 0;
-
- rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
- if( rc==SQLITE_OK ){
- const char *zToken;
- int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
- int nByte; /* total space to allocate */
-
- rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
- if( rc==SQLITE_OK ){
- nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
- pRet = (Fts3Expr *)fts3MallocZero(nByte);
- if( !pRet ){
- rc = SQLITE_NOMEM;
- }else{
- pRet->eType = FTSQUERY_PHRASE;
- pRet->pPhrase = (Fts3Phrase *)&pRet[1];
- pRet->pPhrase->nToken = 1;
- pRet->pPhrase->iColumn = iCol;
- pRet->pPhrase->aToken[0].n = nToken;
- pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
- memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
-
- if( iEnd<n && z[iEnd]=='*' ){
- pRet->pPhrase->aToken[0].isPrefix = 1;
- iEnd++;
- }
-
- while( 1 ){
- if( !sqlite3_fts3_enable_parentheses
- && iStart>0 && z[iStart-1]=='-'
- ){
- pParse->isNot = 1;
- iStart--;
- }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
- pRet->pPhrase->aToken[0].bFirst = 1;
- iStart--;
- }else{
- break;
- }
- }
-
- }
- nConsumed = iEnd;
- }
-
- pModule->xClose(pCursor);
- }
-
- *pnConsumed = nConsumed;
- *ppExpr = pRet;
- return rc;
-}
-
-
-/*
-** Enlarge a memory allocation. If an out-of-memory allocation occurs,
-** then free the old allocation.
-*/
-static void *fts3ReallocOrFree(void *pOrig, int nNew){
- void *pRet = sqlite3_realloc(pOrig, nNew);
- if( !pRet ){
- sqlite3_free(pOrig);
- }
- return pRet;
-}
-
-/*
-** Buffer zInput, length nInput, contains the contents of a quoted string
-** that appeared as part of an fts3 query expression. Neither quote character
-** is included in the buffer. This function attempts to tokenize the entire
-** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
-** containing the results.
-**
-** If successful, SQLITE_OK is returned and *ppExpr set to point at the
-** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
-** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
-** to 0.
-*/
-static int getNextString(
- ParseContext *pParse, /* fts3 query parse context */
- const char *zInput, int nInput, /* Input string */
- Fts3Expr **ppExpr /* OUT: expression */
-){
- sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
- sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
- int rc;
- Fts3Expr *p = 0;
- sqlite3_tokenizer_cursor *pCursor = 0;
- char *zTemp = 0;
- int nTemp = 0;
-
- const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
- int nToken = 0;
-
- /* The final Fts3Expr data structure, including the Fts3Phrase,
- ** Fts3PhraseToken structures token buffers are all stored as a single
- ** allocation so that the expression can be freed with a single call to
- ** sqlite3_free(). Setting this up requires a two pass approach.
- **
- ** The first pass, in the block below, uses a tokenizer cursor to iterate
- ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
- ** to assemble data in two dynamic buffers:
- **
- ** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
- ** structure, followed by the array of Fts3PhraseToken
- ** structures. This pass only populates the Fts3PhraseToken array.
- **
- ** Buffer zTemp: Contains copies of all tokens.
- **
- ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
- ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
- ** structures.
- */
- rc = sqlite3Fts3OpenTokenizer(
- pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
- if( rc==SQLITE_OK ){
- int ii;
- for(ii=0; rc==SQLITE_OK; ii++){
- const char *zByte;
- int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
- rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
- if( rc==SQLITE_OK ){
- Fts3PhraseToken *pToken;
-
- p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
- if( !p ) goto no_mem;
-
- zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
- if( !zTemp ) goto no_mem;
-
- assert( nToken==ii );
- pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
- memset(pToken, 0, sizeof(Fts3PhraseToken));
-
- memcpy(&zTemp[nTemp], zByte, nByte);
- nTemp += nByte;
-
- pToken->n = nByte;
- pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
- pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
- nToken = ii+1;
- }
- }
-
- pModule->xClose(pCursor);
- pCursor = 0;
- }
-
- if( rc==SQLITE_DONE ){
- int jj;
- char *zBuf = 0;
-
- p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
- if( !p ) goto no_mem;
- memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
- p->eType = FTSQUERY_PHRASE;
- p->pPhrase = (Fts3Phrase *)&p[1];
- p->pPhrase->iColumn = pParse->iDefaultCol;
- p->pPhrase->nToken = nToken;
-
- zBuf = (char *)&p->pPhrase->aToken[nToken];
- if( zTemp ){
- memcpy(zBuf, zTemp, nTemp);
- sqlite3_free(zTemp);
- }else{
- assert( nTemp==0 );
- }
-
- for(jj=0; jj<p->pPhrase->nToken; jj++){
- p->pPhrase->aToken[jj].z = zBuf;
- zBuf += p->pPhrase->aToken[jj].n;
- }
- rc = SQLITE_OK;
- }
-
- *ppExpr = p;
- return rc;
-no_mem:
-
- if( pCursor ){
- pModule->xClose(pCursor);
- }
- sqlite3_free(zTemp);
- sqlite3_free(p);
- *ppExpr = 0;
- return SQLITE_NOMEM;
-}
-
-/*
-** Function getNextNode(), which is called by fts3ExprParse(), may itself
-** call fts3ExprParse(). So this forward declaration is required.
-*/
-static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
-
-/*
-** The output variable *ppExpr is populated with an allocated Fts3Expr
-** structure, or set to 0 if the end of the input buffer is reached.
-**
-** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
-** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
-** If SQLITE_ERROR is returned, pContext is populated with an error message.
-*/
-static int getNextNode(
- ParseContext *pParse, /* fts3 query parse context */
- const char *z, int n, /* Input string */
- Fts3Expr **ppExpr, /* OUT: expression */
- int *pnConsumed /* OUT: Number of bytes consumed */
-){
- static const struct Fts3Keyword {
- char *z; /* Keyword text */
- unsigned char n; /* Length of the keyword */
- unsigned char parenOnly; /* Only valid in paren mode */
- unsigned char eType; /* Keyword code */
- } aKeyword[] = {
- { "OR" , 2, 0, FTSQUERY_OR },
- { "AND", 3, 1, FTSQUERY_AND },
- { "NOT", 3, 1, FTSQUERY_NOT },
- { "NEAR", 4, 0, FTSQUERY_NEAR }
- };
- int ii;
- int iCol;
- int iColLen;
- int rc;
- Fts3Expr *pRet = 0;
-
- const char *zInput = z;
- int nInput = n;
-
- pParse->isNot = 0;
-
- /* Skip over any whitespace before checking for a keyword, an open or
- ** close bracket, or a quoted string.
- */
- while( nInput>0 && fts3isspace(*zInput) ){
- nInput--;
- zInput++;
- }
- if( nInput==0 ){
- return SQLITE_DONE;
- }
-
- /* See if we are dealing with a keyword. */
- for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
- const struct Fts3Keyword *pKey = &aKeyword[ii];
-
- if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
- continue;
- }
-
- if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
- int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
- int nKey = pKey->n;
- char cNext;
-
- /* If this is a "NEAR" keyword, check for an explicit nearness. */
- if( pKey->eType==FTSQUERY_NEAR ){
- assert( nKey==4 );
- if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
- nNear = 0;
- for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){
- nNear = nNear * 10 + (zInput[nKey] - '0');
- }
- }
- }
-
- /* At this point this is probably a keyword. But for that to be true,
- ** the next byte must contain either whitespace, an open or close
- ** parenthesis, a quote character, or EOF.
- */
- cNext = zInput[nKey];
- if( fts3isspace(cNext)
- || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
- ){
- pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
- if( !pRet ){
- return SQLITE_NOMEM;
- }
- pRet->eType = pKey->eType;
- pRet->nNear = nNear;
- *ppExpr = pRet;
- *pnConsumed = (int)((zInput - z) + nKey);
- return SQLITE_OK;
- }
-
- /* Turns out that wasn't a keyword after all. This happens if the
- ** user has supplied a token such as "ORacle". Continue.
- */
- }
- }
-
- /* Check for an open bracket. */
- if( sqlite3_fts3_enable_parentheses ){
- if( *zInput=='(' ){
- int nConsumed;
- pParse->nNest++;
- rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed);
- if( rc==SQLITE_OK && !*ppExpr ){
- rc = SQLITE_DONE;
- }
- *pnConsumed = (int)((zInput - z) + 1 + nConsumed);
- return rc;
- }
-
- /* Check for a close bracket. */
- if( *zInput==')' ){
- pParse->nNest--;
- *pnConsumed = (int)((zInput - z) + 1);
- return SQLITE_DONE;
- }
- }
-
- /* See if we are dealing with a quoted phrase. If this is the case, then
- ** search for the closing quote and pass the whole string to getNextString()
- ** for processing. This is easy to do, as fts3 has no syntax for escaping
- ** a quote character embedded in a string.
- */
- if( *zInput=='"' ){
- for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
- *pnConsumed = (int)((zInput - z) + ii + 1);
- if( ii==nInput ){
- return SQLITE_ERROR;
- }
- return getNextString(pParse, &zInput[1], ii-1, ppExpr);
- }
-
-
- /* If control flows to this point, this must be a regular token, or
- ** the end of the input. Read a regular token using the sqlite3_tokenizer
- ** interface. Before doing so, figure out if there is an explicit
- ** column specifier for the token.
- **
- ** TODO: Strangely, it is not possible to associate a column specifier
- ** with a quoted phrase, only with a single token. Not sure if this was
- ** an implementation artifact or an intentional decision when fts3 was
- ** first implemented. Whichever it was, this module duplicates the
- ** limitation.
- */
- iCol = pParse->iDefaultCol;
- iColLen = 0;
- for(ii=0; ii<pParse->nCol; ii++){
- const char *zStr = pParse->azCol[ii];
- int nStr = (int)strlen(zStr);
- if( nInput>nStr && zInput[nStr]==':'
- && sqlite3_strnicmp(zStr, zInput, nStr)==0
- ){
- iCol = ii;
- iColLen = (int)((zInput - z) + nStr + 1);
- break;
- }
- }
- rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
- *pnConsumed += iColLen;
- return rc;
-}
-
-/*
-** The argument is an Fts3Expr structure for a binary operator (any type
-** except an FTSQUERY_PHRASE). Return an integer value representing the
-** precedence of the operator. Lower values have a higher precedence (i.e.
-** group more tightly). For example, in the C language, the == operator
-** groups more tightly than ||, and would therefore have a higher precedence.
-**
-** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
-** is defined), the order of the operators in precedence from highest to
-** lowest is:
-**
-** NEAR
-** NOT
-** AND (including implicit ANDs)
-** OR
-**
-** Note that when using the old query syntax, the OR operator has a higher
-** precedence than the AND operator.
-*/
-static int opPrecedence(Fts3Expr *p){
- assert( p->eType!=FTSQUERY_PHRASE );
- if( sqlite3_fts3_enable_parentheses ){
- return p->eType;
- }else if( p->eType==FTSQUERY_NEAR ){
- return 1;
- }else if( p->eType==FTSQUERY_OR ){
- return 2;
- }
- assert( p->eType==FTSQUERY_AND );
- return 3;
-}
-
-/*
-** Argument ppHead contains a pointer to the current head of a query
-** expression tree being parsed. pPrev is the expression node most recently
-** inserted into the tree. This function adds pNew, which is always a binary
-** operator node, into the expression tree based on the relative precedence
-** of pNew and the existing nodes of the tree. This may result in the head
-** of the tree changing, in which case *ppHead is set to the new root node.
-*/
-static void insertBinaryOperator(
- Fts3Expr **ppHead, /* Pointer to the root node of a tree */
- Fts3Expr *pPrev, /* Node most recently inserted into the tree */
- Fts3Expr *pNew /* New binary node to insert into expression tree */
-){
- Fts3Expr *pSplit = pPrev;
- while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
- pSplit = pSplit->pParent;
- }
-
- if( pSplit->pParent ){
- assert( pSplit->pParent->pRight==pSplit );
- pSplit->pParent->pRight = pNew;
- pNew->pParent = pSplit->pParent;
- }else{
- *ppHead = pNew;
- }
- pNew->pLeft = pSplit;
- pSplit->pParent = pNew;
-}
-
-/*
-** Parse the fts3 query expression found in buffer z, length n. This function
-** returns either when the end of the buffer is reached or an unmatched
-** closing bracket - ')' - is encountered.
-**
-** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
-** parsed form of the expression and *pnConsumed is set to the number of
-** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
-** (out of memory error) or SQLITE_ERROR (parse error) is returned.
-*/
-static int fts3ExprParse(
- ParseContext *pParse, /* fts3 query parse context */
- const char *z, int n, /* Text of MATCH query */
- Fts3Expr **ppExpr, /* OUT: Parsed query structure */
- int *pnConsumed /* OUT: Number of bytes consumed */
-){
- Fts3Expr *pRet = 0;
- Fts3Expr *pPrev = 0;
- Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */
- int nIn = n;
- const char *zIn = z;
- int rc = SQLITE_OK;
- int isRequirePhrase = 1;
-
- while( rc==SQLITE_OK ){
- Fts3Expr *p = 0;
- int nByte = 0;
- rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
- if( rc==SQLITE_OK ){
- int isPhrase;
-
- if( !sqlite3_fts3_enable_parentheses
- && p->eType==FTSQUERY_PHRASE && pParse->isNot
- ){
- /* Create an implicit NOT operator. */
- Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
- if( !pNot ){
- sqlite3Fts3ExprFree(p);
- rc = SQLITE_NOMEM;
- goto exprparse_out;
- }
- pNot->eType = FTSQUERY_NOT;
- pNot->pRight = p;
- if( pNotBranch ){
- pNot->pLeft = pNotBranch;
- }
- pNotBranch = pNot;
- p = pPrev;
- }else{
- int eType = p->eType;
- isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
-
- /* The isRequirePhrase variable is set to true if a phrase or
- ** an expression contained in parenthesis is required. If a
- ** binary operator (AND, OR, NOT or NEAR) is encounted when
- ** isRequirePhrase is set, this is a syntax error.
- */
- if( !isPhrase && isRequirePhrase ){
- sqlite3Fts3ExprFree(p);
- rc = SQLITE_ERROR;
- goto exprparse_out;
- }
-
- if( isPhrase && !isRequirePhrase ){
- /* Insert an implicit AND operator. */
- Fts3Expr *pAnd;
- assert( pRet && pPrev );
- pAnd = fts3MallocZero(sizeof(Fts3Expr));
- if( !pAnd ){
- sqlite3Fts3ExprFree(p);
- rc = SQLITE_NOMEM;
- goto exprparse_out;
- }
- pAnd->eType = FTSQUERY_AND;
- insertBinaryOperator(&pRet, pPrev, pAnd);
- pPrev = pAnd;
- }
-
- /* This test catches attempts to make either operand of a NEAR
- ** operator something other than a phrase. For example, either of
- ** the following:
- **
- ** (bracketed expression) NEAR phrase
- ** phrase NEAR (bracketed expression)
- **
- ** Return an error in either case.
- */
- if( pPrev && (
- (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
- || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
- )){
- sqlite3Fts3ExprFree(p);
- rc = SQLITE_ERROR;
- goto exprparse_out;
- }
-
- if( isPhrase ){
- if( pRet ){
- assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
- pPrev->pRight = p;
- p->pParent = pPrev;
- }else{
- pRet = p;
- }
- }else{
- insertBinaryOperator(&pRet, pPrev, p);
- }
- isRequirePhrase = !isPhrase;
- }
- assert( nByte>0 );
- }
- assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
- nIn -= nByte;
- zIn += nByte;
- pPrev = p;
- }
-
- if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
- rc = SQLITE_ERROR;
- }
-
- if( rc==SQLITE_DONE ){
- rc = SQLITE_OK;
- if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
- if( !pRet ){
- rc = SQLITE_ERROR;
- }else{
- Fts3Expr *pIter = pNotBranch;
- while( pIter->pLeft ){
- pIter = pIter->pLeft;
- }
- pIter->pLeft = pRet;
- pRet = pNotBranch;
- }
- }
- }
- *pnConsumed = n - nIn;
-
-exprparse_out:
- if( rc!=SQLITE_OK ){
- sqlite3Fts3ExprFree(pRet);
- sqlite3Fts3ExprFree(pNotBranch);
- pRet = 0;
- }
- *ppExpr = pRet;
- return rc;
-}
-
-/*
-** Parameters z and n contain a pointer to and length of a buffer containing
-** an fts3 query expression, respectively. This function attempts to parse the
-** query expression and create a tree of Fts3Expr structures representing the
-** parsed expression. If successful, *ppExpr is set to point to the head
-** of the parsed expression tree and SQLITE_OK is returned. If an error
-** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
-** error) is returned and *ppExpr is set to 0.
-**
-** If parameter n is a negative number, then z is assumed to point to a
-** nul-terminated string and the length is determined using strlen().
-**
-** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
-** use to normalize query tokens while parsing the expression. The azCol[]
-** array, which is assumed to contain nCol entries, should contain the names
-** of each column in the target fts3 table, in order from left to right.
-** Column names must be nul-terminated strings.
-**
-** The iDefaultCol parameter should be passed the index of the table column
-** that appears on the left-hand-side of the MATCH operator (the default
-** column to match against for tokens for which a column name is not explicitly
-** specified as part of the query string), or -1 if tokens may by default
-** match any table column.
-*/
-int sqlite3Fts3ExprParse(
- sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
- int iLangid, /* Language id for tokenizer */
- char **azCol, /* Array of column names for fts3 table */
- int bFts4, /* True to allow FTS4-only syntax */
- int nCol, /* Number of entries in azCol[] */
- int iDefaultCol, /* Default column to query */
- const char *z, int n, /* Text of MATCH query */
- Fts3Expr **ppExpr /* OUT: Parsed query structure */
-){
- int nParsed;
- int rc;
- ParseContext sParse;
-
- memset(&sParse, 0, sizeof(ParseContext));
- sParse.pTokenizer = pTokenizer;
- sParse.iLangid = iLangid;
- sParse.azCol = (const char **)azCol;
- sParse.nCol = nCol;
- sParse.iDefaultCol = iDefaultCol;
- sParse.bFts4 = bFts4;
- if( z==0 ){
- *ppExpr = 0;
- return SQLITE_OK;
- }
- if( n<0 ){
- n = (int)strlen(z);
- }
- rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
-
- /* Check for mismatched parenthesis */
- if( rc==SQLITE_OK && sParse.nNest ){
- rc = SQLITE_ERROR;
- sqlite3Fts3ExprFree(*ppExpr);
- *ppExpr = 0;
- }
-
- return rc;
-}
-
-/*
-** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
-*/
-void sqlite3Fts3ExprFree(Fts3Expr *p){
- if( p ){
- assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
- sqlite3Fts3ExprFree(p->pLeft);
- sqlite3Fts3ExprFree(p->pRight);
- sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
- sqlite3_free(p->aMI);
- sqlite3_free(p);
- }
-}
-
-/****************************************************************************
-*****************************************************************************
-** Everything after this point is just test code.
-*/
-
-#ifdef SQLITE_TEST
-
-#include <stdio.h>
-
-/*
-** Function to query the hash-table of tokenizers (see README.tokenizers).
-*/
-static int queryTestTokenizer(
- sqlite3 *db,
- const char *zName,
- const sqlite3_tokenizer_module **pp
-){
- int rc;
- sqlite3_stmt *pStmt;
- const char zSql[] = "SELECT fts3_tokenizer(?)";
-
- *pp = 0;
- rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
- if( SQLITE_ROW==sqlite3_step(pStmt) ){
- if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
- memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
- }
- }
-
- return sqlite3_finalize(pStmt);
-}
-
-/*
-** Return a pointer to a buffer containing a text representation of the
-** expression passed as the first argument. The buffer is obtained from
-** sqlite3_malloc(). It is the responsibility of the caller to use
-** sqlite3_free() to release the memory. If an OOM condition is encountered,
-** NULL is returned.
-**
-** If the second argument is not NULL, then its contents are prepended to
-** the returned expression text and then freed using sqlite3_free().
-*/
-static char *exprToString(Fts3Expr *pExpr, char *zBuf){
- switch( pExpr->eType ){
- case FTSQUERY_PHRASE: {
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- int i;
- zBuf = sqlite3_mprintf(
- "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
- for(i=0; zBuf && i<pPhrase->nToken; i++){
- zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
- pPhrase->aToken[i].n, pPhrase->aToken[i].z,
- (pPhrase->aToken[i].isPrefix?"+":"")
- );
- }
- return zBuf;
- }
-
- case FTSQUERY_NEAR:
- zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
- break;
- case FTSQUERY_NOT:
- zBuf = sqlite3_mprintf("%zNOT ", zBuf);
- break;
- case FTSQUERY_AND:
- zBuf = sqlite3_mprintf("%zAND ", zBuf);
- break;
- case FTSQUERY_OR:
- zBuf = sqlite3_mprintf("%zOR ", zBuf);
- break;
- }
-
- if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
- if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
- if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
-
- if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
- if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
-
- return zBuf;
-}
-
-/*
-** This is the implementation of a scalar SQL function used to test the
-** expression parser. It should be called as follows:
-**
-** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
-**
-** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
-** to parse the query expression (see README.tokenizers). The second argument
-** is the query expression to parse. Each subsequent argument is the name
-** of a column of the fts3 table that the query expression may refer to.
-** For example:
-**
-** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
-*/
-static void fts3ExprTest(
- sqlite3_context *context,
- int argc,
- sqlite3_value **argv
-){
- sqlite3_tokenizer_module const *pModule = 0;
- sqlite3_tokenizer *pTokenizer = 0;
- int rc;
- char **azCol = 0;
- const char *zExpr;
- int nExpr;
- int nCol;
- int ii;
- Fts3Expr *pExpr;
- char *zBuf = 0;
- sqlite3 *db = sqlite3_context_db_handle(context);
-
- if( argc<3 ){
- sqlite3_result_error(context,
- "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
- );
- return;
- }
-
- rc = queryTestTokenizer(db,
- (const char *)sqlite3_value_text(argv[0]), &pModule);
- if( rc==SQLITE_NOMEM ){
- sqlite3_result_error_nomem(context);
- goto exprtest_out;
- }else if( !pModule ){
- sqlite3_result_error(context, "No such tokenizer module", -1);
- goto exprtest_out;
- }
-
- rc = pModule->xCreate(0, 0, &pTokenizer);
- assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
- if( rc==SQLITE_NOMEM ){
- sqlite3_result_error_nomem(context);
- goto exprtest_out;
- }
- pTokenizer->pModule = pModule;
-
- zExpr = (const char *)sqlite3_value_text(argv[1]);
- nExpr = sqlite3_value_bytes(argv[1]);
- nCol = argc-2;
- azCol = (char **)sqlite3_malloc(nCol*sizeof(char *));
- if( !azCol ){
- sqlite3_result_error_nomem(context);
- goto exprtest_out;
- }
- for(ii=0; ii<nCol; ii++){
- azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
- }
-
- rc = sqlite3Fts3ExprParse(
- pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
- );
- if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
- sqlite3_result_error(context, "Error parsing expression", -1);
- }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
- sqlite3_result_error_nomem(context);
- }else{
- sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
- sqlite3_free(zBuf);
- }
-
- sqlite3Fts3ExprFree(pExpr);
-
-exprtest_out:
- if( pModule && pTokenizer ){
- rc = pModule->xDestroy(pTokenizer);
- }
- sqlite3_free(azCol);
-}
-
-/*
-** Register the query expression parser test function fts3_exprtest()
-** with database connection db.
-*/
-int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
- return sqlite3_create_function(
- db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0
- );
-}
-
-#endif
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_hash.c b/src/libtracker-fts/fts3_hash.c
deleted file mode 100644
index 57c59b587..000000000
--- a/src/libtracker-fts/fts3_hash.c
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
-** 2001 September 22
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-** This is the implementation of generic hash-tables used in SQLite.
-** We've modified it slightly to serve as a standalone hash table
-** implementation for the full-text indexing module.
-*/
-
-/*
-** The code in this file is only compiled if:
-**
-** * The FTS3 module is being built as an extension
-** (in which case SQLITE_CORE is not defined), or
-**
-** * The FTS3 module is being built into the core of
-** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
-*/
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "fts3_hash.h"
-
-/*
-** Malloc and Free functions
-*/
-static void *fts3HashMalloc(int n){
- void *p = sqlite3_malloc(n);
- if( p ){
- memset(p, 0, n);
- }
- return p;
-}
-static void fts3HashFree(void *p){
- sqlite3_free(p);
-}
-
-/* Turn bulk memory into a hash table object by initializing the
-** fields of the Hash structure.
-**
-** "pNew" is a pointer to the hash table that is to be initialized.
-** keyClass is one of the constants
-** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass
-** determines what kind of key the hash table will use. "copyKey" is
-** true if the hash table should make its own private copy of keys and
-** false if it should just use the supplied pointer.
-*/
-void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){
- assert( pNew!=0 );
- assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY );
- pNew->keyClass = keyClass;
- pNew->copyKey = copyKey;
- pNew->first = 0;
- pNew->count = 0;
- pNew->htsize = 0;
- pNew->ht = 0;
-}
-
-/* Remove all entries from a hash table. Reclaim all memory.
-** Call this routine to delete a hash table or to reset a hash table
-** to the empty state.
-*/
-void sqlite3Fts3HashClear(Fts3Hash *pH){
- Fts3HashElem *elem; /* For looping over all elements of the table */
-
- assert( pH!=0 );
- elem = pH->first;
- pH->first = 0;
- fts3HashFree(pH->ht);
- pH->ht = 0;
- pH->htsize = 0;
- while( elem ){
- Fts3HashElem *next_elem = elem->next;
- if( pH->copyKey && elem->pKey ){
- fts3HashFree(elem->pKey);
- }
- fts3HashFree(elem);
- elem = next_elem;
- }
- pH->count = 0;
-}
-
-/*
-** Hash and comparison functions when the mode is FTS3_HASH_STRING
-*/
-static int fts3StrHash(const void *pKey, int nKey){
- const char *z = (const char *)pKey;
- int h = 0;
- if( nKey<=0 ) nKey = (int) strlen(z);
- while( nKey > 0 ){
- h = (h<<3) ^ h ^ *z++;
- nKey--;
- }
- return h & 0x7fffffff;
-}
-static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
- if( n1!=n2 ) return 1;
- return strncmp((const char*)pKey1,(const char*)pKey2,n1);
-}
-
-/*
-** Hash and comparison functions when the mode is FTS3_HASH_BINARY
-*/
-static int fts3BinHash(const void *pKey, int nKey){
- int h = 0;
- const char *z = (const char *)pKey;
- while( nKey-- > 0 ){
- h = (h<<3) ^ h ^ *(z++);
- }
- return h & 0x7fffffff;
-}
-static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){
- if( n1!=n2 ) return 1;
- return memcmp(pKey1,pKey2,n1);
-}
-
-/*
-** Return a pointer to the appropriate hash function given the key class.
-**
-** The C syntax in this function definition may be unfamilar to some
-** programmers, so we provide the following additional explanation:
-**
-** The name of the function is "ftsHashFunction". The function takes a
-** single parameter "keyClass". The return value of ftsHashFunction()
-** is a pointer to another function. Specifically, the return value
-** of ftsHashFunction() is a pointer to a function that takes two parameters
-** with types "const void*" and "int" and returns an "int".
-*/
-static int (*ftsHashFunction(int keyClass))(const void*,int){
- if( keyClass==FTS3_HASH_STRING ){
- return &fts3StrHash;
- }else{
- assert( keyClass==FTS3_HASH_BINARY );
- return &fts3BinHash;
- }
-}
-
-/*
-** Return a pointer to the appropriate hash function given the key class.
-**
-** For help in interpreted the obscure C code in the function definition,
-** see the header comment on the previous function.
-*/
-static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){
- if( keyClass==FTS3_HASH_STRING ){
- return &fts3StrCompare;
- }else{
- assert( keyClass==FTS3_HASH_BINARY );
- return &fts3BinCompare;
- }
-}
-
-/* Link an element into the hash table
-*/
-static void fts3HashInsertElement(
- Fts3Hash *pH, /* The complete hash table */
- struct _fts3ht *pEntry, /* The entry into which pNew is inserted */
- Fts3HashElem *pNew /* The element to be inserted */
-){
- Fts3HashElem *pHead; /* First element already in pEntry */
- pHead = pEntry->chain;
- if( pHead ){
- pNew->next = pHead;
- pNew->prev = pHead->prev;
- if( pHead->prev ){ pHead->prev->next = pNew; }
- else { pH->first = pNew; }
- pHead->prev = pNew;
- }else{
- pNew->next = pH->first;
- if( pH->first ){ pH->first->prev = pNew; }
- pNew->prev = 0;
- pH->first = pNew;
- }
- pEntry->count++;
- pEntry->chain = pNew;
-}
-
-
-/* Resize the hash table so that it cantains "new_size" buckets.
-** "new_size" must be a power of 2. The hash table might fail
-** to resize if sqliteMalloc() fails.
-**
-** Return non-zero if a memory allocation error occurs.
-*/
-static int fts3Rehash(Fts3Hash *pH, int new_size){
- struct _fts3ht *new_ht; /* The new hash table */
- Fts3HashElem *elem, *next_elem; /* For looping over existing elements */
- int (*xHash)(const void*,int); /* The hash function */
-
- assert( (new_size & (new_size-1))==0 );
- new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) );
- if( new_ht==0 ) return 1;
- fts3HashFree(pH->ht);
- pH->ht = new_ht;
- pH->htsize = new_size;
- xHash = ftsHashFunction(pH->keyClass);
- for(elem=pH->first, pH->first=0; elem; elem = next_elem){
- int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
- next_elem = elem->next;
- fts3HashInsertElement(pH, &new_ht[h], elem);
- }
- return 0;
-}
-
-/* This function (for internal use only) locates an element in an
-** hash table that matches the given key. The hash for this key has
-** already been computed and is passed as the 4th parameter.
-*/
-static Fts3HashElem *fts3FindElementByHash(
- const Fts3Hash *pH, /* The pH to be searched */
- const void *pKey, /* The key we are searching for */
- int nKey,
- int h /* The hash for this key. */
-){
- Fts3HashElem *elem; /* Used to loop thru the element list */
- int count; /* Number of elements left to test */
- int (*xCompare)(const void*,int,const void*,int); /* comparison function */
-
- if( pH->ht ){
- struct _fts3ht *pEntry = &pH->ht[h];
- elem = pEntry->chain;
- count = pEntry->count;
- xCompare = ftsCompareFunction(pH->keyClass);
- while( count-- && elem ){
- if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
- return elem;
- }
- elem = elem->next;
- }
- }
- return 0;
-}
-
-/* Remove a single entry from the hash table given a pointer to that
-** element and a hash on the element's key.
-*/
-static void fts3RemoveElementByHash(
- Fts3Hash *pH, /* The pH containing "elem" */
- Fts3HashElem* elem, /* The element to be removed from the pH */
- int h /* Hash value for the element */
-){
- struct _fts3ht *pEntry;
- if( elem->prev ){
- elem->prev->next = elem->next;
- }else{
- pH->first = elem->next;
- }
- if( elem->next ){
- elem->next->prev = elem->prev;
- }
- pEntry = &pH->ht[h];
- if( pEntry->chain==elem ){
- pEntry->chain = elem->next;
- }
- pEntry->count--;
- if( pEntry->count<=0 ){
- pEntry->chain = 0;
- }
- if( pH->copyKey && elem->pKey ){
- fts3HashFree(elem->pKey);
- }
- fts3HashFree( elem );
- pH->count--;
- if( pH->count<=0 ){
- assert( pH->first==0 );
- assert( pH->count==0 );
- fts3HashClear(pH);
- }
-}
-
-Fts3HashElem *sqlite3Fts3HashFindElem(
- const Fts3Hash *pH,
- const void *pKey,
- int nKey
-){
- int h; /* A hash on key */
- int (*xHash)(const void*,int); /* The hash function */
-
- if( pH==0 || pH->ht==0 ) return 0;
- xHash = ftsHashFunction(pH->keyClass);
- assert( xHash!=0 );
- h = (*xHash)(pKey,nKey);
- assert( (pH->htsize & (pH->htsize-1))==0 );
- return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1));
-}
-
-/*
-** Attempt to locate an element of the hash table pH with a key
-** that matches pKey,nKey. Return the data for this element if it is
-** found, or NULL if there is no match.
-*/
-void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){
- Fts3HashElem *pElem; /* The element that matches key (if any) */
-
- pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey);
- return pElem ? pElem->data : 0;
-}
-
-/* Insert an element into the hash table pH. The key is pKey,nKey
-** and the data is "data".
-**
-** If no element exists with a matching key, then a new
-** element is created. A copy of the key is made if the copyKey
-** flag is set. NULL is returned.
-**
-** If another element already exists with the same key, then the
-** new data replaces the old data and the old data is returned.
-** The key is not copied in this instance. If a malloc fails, then
-** the new data is returned and the hash table is unchanged.
-**
-** If the "data" parameter to this function is NULL, then the
-** element corresponding to "key" is removed from the hash table.
-*/
-void *sqlite3Fts3HashInsert(
- Fts3Hash *pH, /* The hash table to insert into */
- const void *pKey, /* The key */
- int nKey, /* Number of bytes in the key */
- void *data /* The data */
-){
- int hraw; /* Raw hash value of the key */
- int h; /* the hash of the key modulo hash table size */
- Fts3HashElem *elem; /* Used to loop thru the element list */
- Fts3HashElem *new_elem; /* New element added to the pH */
- int (*xHash)(const void*,int); /* The hash function */
-
- assert( pH!=0 );
- xHash = ftsHashFunction(pH->keyClass);
- assert( xHash!=0 );
- hraw = (*xHash)(pKey, nKey);
- assert( (pH->htsize & (pH->htsize-1))==0 );
- h = hraw & (pH->htsize-1);
- elem = fts3FindElementByHash(pH,pKey,nKey,h);
- if( elem ){
- void *old_data = elem->data;
- if( data==0 ){
- fts3RemoveElementByHash(pH,elem,h);
- }else{
- elem->data = data;
- }
- return old_data;
- }
- if( data==0 ) return 0;
- if( (pH->htsize==0 && fts3Rehash(pH,8))
- || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2))
- ){
- pH->count = 0;
- return data;
- }
- assert( pH->htsize>0 );
- new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) );
- if( new_elem==0 ) return data;
- if( pH->copyKey && pKey!=0 ){
- new_elem->pKey = fts3HashMalloc( nKey );
- if( new_elem->pKey==0 ){
- fts3HashFree(new_elem);
- return data;
- }
- memcpy((void*)new_elem->pKey, pKey, nKey);
- }else{
- new_elem->pKey = (void*)pKey;
- }
- new_elem->nKey = nKey;
- pH->count++;
- assert( pH->htsize>0 );
- assert( (pH->htsize & (pH->htsize-1))==0 );
- h = hraw & (pH->htsize-1);
- fts3HashInsertElement(pH, &pH->ht[h], new_elem);
- new_elem->data = data;
- return 0;
-}
-
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_hash.h b/src/libtracker-fts/fts3_hash.h
deleted file mode 100644
index 399f51544..000000000
--- a/src/libtracker-fts/fts3_hash.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
-** 2001 September 22
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-** This is the header file for the generic hash-table implemenation
-** used in SQLite. We've modified it slightly to serve as a standalone
-** hash table implementation for the full-text indexing module.
-**
-*/
-#ifndef _FTS3_HASH_H_
-#define _FTS3_HASH_H_
-
-/* Forward declarations of structures. */
-typedef struct Fts3Hash Fts3Hash;
-typedef struct Fts3HashElem Fts3HashElem;
-
-/* A complete hash table is an instance of the following structure.
-** The internals of this structure are intended to be opaque -- client
-** code should not attempt to access or modify the fields of this structure
-** directly. Change this structure only by using the routines below.
-** However, many of the "procedures" and "functions" for modifying and
-** accessing this structure are really macros, so we can't really make
-** this structure opaque.
-*/
-struct Fts3Hash {
- char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
- char copyKey; /* True if copy of key made on insert */
- int count; /* Number of entries in this table */
- Fts3HashElem *first; /* The first element of the array */
- int htsize; /* Number of buckets in the hash table */
- struct _fts3ht { /* the hash table */
- int count; /* Number of entries with this hash */
- Fts3HashElem *chain; /* Pointer to first entry with this hash */
- } *ht;
-};
-
-/* Each element in the hash table is an instance of the following
-** structure. All elements are stored on a single doubly-linked list.
-**
-** Again, this structure is intended to be opaque, but it can't really
-** be opaque because it is used by macros.
-*/
-struct Fts3HashElem {
- Fts3HashElem *next, *prev; /* Next and previous elements in the table */
- void *data; /* Data associated with this element */
- void *pKey; int nKey; /* Key associated with this element */
-};
-
-/*
-** There are 2 different modes of operation for a hash table:
-**
-** FTS3_HASH_STRING pKey points to a string that is nKey bytes long
-** (including the null-terminator, if any). Case
-** is respected in comparisons.
-**
-** FTS3_HASH_BINARY pKey points to binary data nKey bytes long.
-** memcmp() is used to compare keys.
-**
-** A copy of the key is made if the copyKey parameter to fts3HashInit is 1.
-*/
-#define FTS3_HASH_STRING 1
-#define FTS3_HASH_BINARY 2
-
-/*
-** Access routines. To delete, insert a NULL pointer.
-*/
-void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey);
-void *sqlite3Fts3HashInsert(Fts3Hash*, const void *pKey, int nKey, void *pData);
-void *sqlite3Fts3HashFind(const Fts3Hash*, const void *pKey, int nKey);
-void sqlite3Fts3HashClear(Fts3Hash*);
-Fts3HashElem *sqlite3Fts3HashFindElem(const Fts3Hash *, const void *, int);
-
-/*
-** Shorthand for the functions above
-*/
-#define fts3HashInit sqlite3Fts3HashInit
-#define fts3HashInsert sqlite3Fts3HashInsert
-#define fts3HashFind sqlite3Fts3HashFind
-#define fts3HashClear sqlite3Fts3HashClear
-#define fts3HashFindElem sqlite3Fts3HashFindElem
-
-/*
-** Macros for looping over all elements of a hash table. The idiom is
-** like this:
-**
-** Fts3Hash h;
-** Fts3HashElem *p;
-** ...
-** for(p=fts3HashFirst(&h); p; p=fts3HashNext(p)){
-** SomeStructure *pData = fts3HashData(p);
-** // do something with pData
-** }
-*/
-#define fts3HashFirst(H) ((H)->first)
-#define fts3HashNext(E) ((E)->next)
-#define fts3HashData(E) ((E)->data)
-#define fts3HashKey(E) ((E)->pKey)
-#define fts3HashKeysize(E) ((E)->nKey)
-
-/*
-** Number of entries in a hash table
-*/
-#define fts3HashCount(H) ((H)->count)
-
-#endif /* _FTS3_HASH_H_ */
diff --git a/src/libtracker-fts/fts3_icu.c b/src/libtracker-fts/fts3_icu.c
deleted file mode 100644
index 52df8c7d8..000000000
--- a/src/libtracker-fts/fts3_icu.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
-** 2007 June 22
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-** This file implements a tokenizer for fts3 based on the ICU library.
-*/
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-#ifdef SQLITE_ENABLE_ICU
-
-#include <assert.h>
-#include <string.h>
-#include "fts3_tokenizer.h"
-
-#include <unicode/ubrk.h>
-#include <unicode/ucol.h>
-#include <unicode/ustring.h>
-#include <unicode/utf16.h>
-
-typedef struct IcuTokenizer IcuTokenizer;
-typedef struct IcuCursor IcuCursor;
-
-struct IcuTokenizer {
- sqlite3_tokenizer base;
- char *zLocale;
-};
-
-struct IcuCursor {
- sqlite3_tokenizer_cursor base;
-
- UBreakIterator *pIter; /* ICU break-iterator object */
- int nChar; /* Number of UChar elements in pInput */
- UChar *aChar; /* Copy of input using utf-16 encoding */
- int *aOffset; /* Offsets of each character in utf-8 input */
-
- int nBuffer;
- char *zBuffer;
-
- int iToken;
-};
-
-/*
-** Create a new tokenizer instance.
-*/
-static int icuCreate(
- int argc, /* Number of entries in argv[] */
- const char * const *argv, /* Tokenizer creation arguments */
- sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
-){
- IcuTokenizer *p;
- int n = 0;
-
- if( argc>0 ){
- n = strlen(argv[0])+1;
- }
- p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
- if( !p ){
- return SQLITE_NOMEM;
- }
- memset(p, 0, sizeof(IcuTokenizer));
-
- if( n ){
- p->zLocale = (char *)&p[1];
- memcpy(p->zLocale, argv[0], n);
- }
-
- *ppTokenizer = (sqlite3_tokenizer *)p;
-
- return SQLITE_OK;
-}
-
-/*
-** Destroy a tokenizer
-*/
-static int icuDestroy(sqlite3_tokenizer *pTokenizer){
- IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
- sqlite3_free(p);
- return SQLITE_OK;
-}
-
-/*
-** Prepare to begin tokenizing a particular string. The input
-** string to be tokenized is pInput[0..nBytes-1]. A cursor
-** used to incrementally tokenize this string is returned in
-** *ppCursor.
-*/
-static int icuOpen(
- sqlite3_tokenizer *pTokenizer, /* The tokenizer */
- const char *zInput, /* Input string */
- int nInput, /* Length of zInput in bytes */
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
-){
- IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
- IcuCursor *pCsr;
-
- const int32_t opt = U_FOLD_CASE_DEFAULT;
- UErrorCode status = U_ZERO_ERROR;
- int nChar;
-
- UChar32 c;
- int iInput = 0;
- int iOut = 0;
-
- *ppCursor = 0;
-
- if( zInput==0 ){
- nInput = 0;
- zInput = "";
- }else if( nInput<0 ){
- nInput = strlen(zInput);
- }
- nChar = nInput+1;
- pCsr = (IcuCursor *)sqlite3_malloc(
- sizeof(IcuCursor) + /* IcuCursor */
- ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
- (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
- );
- if( !pCsr ){
- return SQLITE_NOMEM;
- }
- memset(pCsr, 0, sizeof(IcuCursor));
- pCsr->aChar = (UChar *)&pCsr[1];
- pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
-
- pCsr->aOffset[iOut] = iInput;
- U8_NEXT(zInput, iInput, nInput, c);
- while( c>0 ){
- int isError = 0;
- c = u_foldCase(c, opt);
- U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
- if( isError ){
- sqlite3_free(pCsr);
- return SQLITE_ERROR;
- }
- pCsr->aOffset[iOut] = iInput;
-
- if( iInput<nInput ){
- U8_NEXT(zInput, iInput, nInput, c);
- }else{
- c = 0;
- }
- }
-
- pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
- if( !U_SUCCESS(status) ){
- sqlite3_free(pCsr);
- return SQLITE_ERROR;
- }
- pCsr->nChar = iOut;
-
- ubrk_first(pCsr->pIter);
- *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
- return SQLITE_OK;
-}
-
-/*
-** Close a tokenization cursor previously opened by a call to icuOpen().
-*/
-static int icuClose(sqlite3_tokenizer_cursor *pCursor){
- IcuCursor *pCsr = (IcuCursor *)pCursor;
- ubrk_close(pCsr->pIter);
- sqlite3_free(pCsr->zBuffer);
- sqlite3_free(pCsr);
- return SQLITE_OK;
-}
-
-/*
-** Extract the next token from a tokenization cursor.
-*/
-static int icuNext(
- sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
- const char **ppToken, /* OUT: *ppToken is the token text */
- int *pnBytes, /* OUT: Number of bytes in token */
- int *piStartOffset, /* OUT: Starting offset of token */
- int *piEndOffset, /* OUT: Ending offset of token */
- int *piPosition /* OUT: Position integer of token */
-){
- IcuCursor *pCsr = (IcuCursor *)pCursor;
-
- int iStart = 0;
- int iEnd = 0;
- int nByte = 0;
-
- while( iStart==iEnd ){
- UChar32 c;
-
- iStart = ubrk_current(pCsr->pIter);
- iEnd = ubrk_next(pCsr->pIter);
- if( iEnd==UBRK_DONE ){
- return SQLITE_DONE;
- }
-
- while( iStart<iEnd ){
- int iWhite = iStart;
- U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
- if( u_isspace(c) ){
- iStart = iWhite;
- }else{
- break;
- }
- }
- assert(iStart<=iEnd);
- }
-
- do {
- UErrorCode status = U_ZERO_ERROR;
- if( nByte ){
- char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
- if( !zNew ){
- return SQLITE_NOMEM;
- }
- pCsr->zBuffer = zNew;
- pCsr->nBuffer = nByte;
- }
-
- u_strToUTF8(
- pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
- &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
- &status /* Output success/failure */
- );
- } while( nByte>pCsr->nBuffer );
-
- *ppToken = pCsr->zBuffer;
- *pnBytes = nByte;
- *piStartOffset = pCsr->aOffset[iStart];
- *piEndOffset = pCsr->aOffset[iEnd];
- *piPosition = pCsr->iToken++;
-
- return SQLITE_OK;
-}
-
-/*
-** The set of routines that implement the simple tokenizer
-*/
-static const sqlite3_tokenizer_module icuTokenizerModule = {
- 0, /* iVersion */
- icuCreate, /* xCreate */
- icuDestroy, /* xCreate */
- icuOpen, /* xOpen */
- icuClose, /* xClose */
- icuNext, /* xNext */
-};
-
-/*
-** Set *ppModule to point at the implementation of the ICU tokenizer.
-*/
-void sqlite3Fts3IcuTokenizerModule(
- sqlite3_tokenizer_module const**ppModule
-){
- *ppModule = &icuTokenizerModule;
-}
-
-#endif /* defined(SQLITE_ENABLE_ICU) */
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_porter.c b/src/libtracker-fts/fts3_porter.c
deleted file mode 100644
index 579745b85..000000000
--- a/src/libtracker-fts/fts3_porter.c
+++ /dev/null
@@ -1,646 +0,0 @@
-/*
-** 2006 September 30
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-** Implementation of the full-text-search tokenizer that implements
-** a Porter stemmer.
-*/
-
-/*
-** The code in this file is only compiled if:
-**
-** * The FTS3 module is being built as an extension
-** (in which case SQLITE_CORE is not defined), or
-**
-** * The FTS3 module is being built into the core of
-** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
-*/
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "fts3_tokenizer.h"
-
-/*
-** Class derived from sqlite3_tokenizer
-*/
-typedef struct porter_tokenizer {
- sqlite3_tokenizer base; /* Base class */
-} porter_tokenizer;
-
-/*
-** Class derived from sqlite3_tokenizer_cursor
-*/
-typedef struct porter_tokenizer_cursor {
- sqlite3_tokenizer_cursor base;
- const char *zInput; /* input we are tokenizing */
- int nInput; /* size of the input */
- int iOffset; /* current position in zInput */
- int iToken; /* index of next token to be returned */
- char *zToken; /* storage for current token */
- int nAllocated; /* space allocated to zToken buffer */
-} porter_tokenizer_cursor;
-
-
-/*
-** Create a new tokenizer instance.
-*/
-static int porterCreate(
- int argc, const char * const *argv,
- sqlite3_tokenizer **ppTokenizer
-){
- porter_tokenizer *t;
-
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
-
- t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
- if( t==NULL ) return SQLITE_NOMEM;
- memset(t, 0, sizeof(*t));
- *ppTokenizer = &t->base;
- return SQLITE_OK;
-}
-
-/*
-** Destroy a tokenizer
-*/
-static int porterDestroy(sqlite3_tokenizer *pTokenizer){
- sqlite3_free(pTokenizer);
- return SQLITE_OK;
-}
-
-/*
-** Prepare to begin tokenizing a particular string. The input
-** string to be tokenized is zInput[0..nInput-1]. A cursor
-** used to incrementally tokenize this string is returned in
-** *ppCursor.
-*/
-static int porterOpen(
- sqlite3_tokenizer *pTokenizer, /* The tokenizer */
- const char *zInput, int nInput, /* String to be tokenized */
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
-){
- porter_tokenizer_cursor *c;
-
- UNUSED_PARAMETER(pTokenizer);
-
- c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
- if( c==NULL ) return SQLITE_NOMEM;
-
- c->zInput = zInput;
- if( zInput==0 ){
- c->nInput = 0;
- }else if( nInput<0 ){
- c->nInput = (int)strlen(zInput);
- }else{
- c->nInput = nInput;
- }
- c->iOffset = 0; /* start tokenizing at the beginning */
- c->iToken = 0;
- c->zToken = NULL; /* no space allocated, yet. */
- c->nAllocated = 0;
-
- *ppCursor = &c->base;
- return SQLITE_OK;
-}
-
-/*
-** Close a tokenization cursor previously opened by a call to
-** porterOpen() above.
-*/
-static int porterClose(sqlite3_tokenizer_cursor *pCursor){
- porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
- sqlite3_free(c->zToken);
- sqlite3_free(c);
- return SQLITE_OK;
-}
-/*
-** Vowel or consonant
-*/
-static const char cType[] = {
- 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
- 1, 1, 1, 2, 1
-};
-
-/*
-** isConsonant() and isVowel() determine if their first character in
-** the string they point to is a consonant or a vowel, according
-** to Porter ruls.
-**
-** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
-** 'Y' is a consonant unless it follows another consonant,
-** in which case it is a vowel.
-**
-** In these routine, the letters are in reverse order. So the 'y' rule
-** is that 'y' is a consonant unless it is followed by another
-** consonent.
-*/
-static int isVowel(const char*);
-static int isConsonant(const char *z){
- int j;
- char x = *z;
- if( x==0 ) return 0;
- assert( x>='a' && x<='z' );
- j = cType[x-'a'];
- if( j<2 ) return j;
- return z[1]==0 || isVowel(z + 1);
-}
-static int isVowel(const char *z){
- int j;
- char x = *z;
- if( x==0 ) return 0;
- assert( x>='a' && x<='z' );
- j = cType[x-'a'];
- if( j<2 ) return 1-j;
- return isConsonant(z + 1);
-}
-
-/*
-** Let any sequence of one or more vowels be represented by V and let
-** C be sequence of one or more consonants. Then every word can be
-** represented as:
-**
-** [C] (VC){m} [V]
-**
-** In prose: A word is an optional consonant followed by zero or
-** vowel-consonant pairs followed by an optional vowel. "m" is the
-** number of vowel consonant pairs. This routine computes the value
-** of m for the first i bytes of a word.
-**
-** Return true if the m-value for z is 1 or more. In other words,
-** return true if z contains at least one vowel that is followed
-** by a consonant.
-**
-** In this routine z[] is in reverse order. So we are really looking
-** for an instance of of a consonant followed by a vowel.
-*/
-static int m_gt_0(const char *z){
- while( isVowel(z) ){ z++; }
- if( *z==0 ) return 0;
- while( isConsonant(z) ){ z++; }
- return *z!=0;
-}
-
-/* Like mgt0 above except we are looking for a value of m which is
-** exactly 1
-*/
-static int m_eq_1(const char *z){
- while( isVowel(z) ){ z++; }
- if( *z==0 ) return 0;
- while( isConsonant(z) ){ z++; }
- if( *z==0 ) return 0;
- while( isVowel(z) ){ z++; }
- if( *z==0 ) return 1;
- while( isConsonant(z) ){ z++; }
- return *z==0;
-}
-
-/* Like mgt0 above except we are looking for a value of m>1 instead
-** or m>0
-*/
-static int m_gt_1(const char *z){
- while( isVowel(z) ){ z++; }
- if( *z==0 ) return 0;
- while( isConsonant(z) ){ z++; }
- if( *z==0 ) return 0;
- while( isVowel(z) ){ z++; }
- if( *z==0 ) return 0;
- while( isConsonant(z) ){ z++; }
- return *z!=0;
-}
-
-/*
-** Return TRUE if there is a vowel anywhere within z[0..n-1]
-*/
-static int hasVowel(const char *z){
- while( isConsonant(z) ){ z++; }
- return *z!=0;
-}
-
-/*
-** Return TRUE if the word ends in a double consonant.
-**
-** The text is reversed here. So we are really looking at
-** the first two characters of z[].
-*/
-static int doubleConsonant(const char *z){
- return isConsonant(z) && z[0]==z[1];
-}
-
-/*
-** Return TRUE if the word ends with three letters which
-** are consonant-vowel-consonent and where the final consonant
-** is not 'w', 'x', or 'y'.
-**
-** The word is reversed here. So we are really checking the
-** first three letters and the first one cannot be in [wxy].
-*/
-static int star_oh(const char *z){
- return
- isConsonant(z) &&
- z[0]!='w' && z[0]!='x' && z[0]!='y' &&
- isVowel(z+1) &&
- isConsonant(z+2);
-}
-
-/*
-** If the word ends with zFrom and xCond() is true for the stem
-** of the word that preceeds the zFrom ending, then change the
-** ending to zTo.
-**
-** The input word *pz and zFrom are both in reverse order. zTo
-** is in normal order.
-**
-** Return TRUE if zFrom matches. Return FALSE if zFrom does not
-** match. Not that TRUE is returned even if xCond() fails and
-** no substitution occurs.
-*/
-static int stem(
- char **pz, /* The word being stemmed (Reversed) */
- const char *zFrom, /* If the ending matches this... (Reversed) */
- const char *zTo, /* ... change the ending to this (not reversed) */
- int (*xCond)(const char*) /* Condition that must be true */
-){
- char *z = *pz;
- while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
- if( *zFrom!=0 ) return 0;
- if( xCond && !xCond(z) ) return 1;
- while( *zTo ){
- *(--z) = *(zTo++);
- }
- *pz = z;
- return 1;
-}
-
-/*
-** This is the fallback stemmer used when the porter stemmer is
-** inappropriate. The input word is copied into the output with
-** US-ASCII case folding. If the input word is too long (more
-** than 20 bytes if it contains no digits or more than 6 bytes if
-** it contains digits) then word is truncated to 20 or 6 bytes
-** by taking 10 or 3 bytes from the beginning and end.
-*/
-static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
- int i, mx, j;
- int hasDigit = 0;
- for(i=0; i<nIn; i++){
- char c = zIn[i];
- if( c>='A' && c<='Z' ){
- zOut[i] = c - 'A' + 'a';
- }else{
- if( c>='0' && c<='9' ) hasDigit = 1;
- zOut[i] = c;
- }
- }
- mx = hasDigit ? 3 : 10;
- if( nIn>mx*2 ){
- for(j=mx, i=nIn-mx; i<nIn; i++, j++){
- zOut[j] = zOut[i];
- }
- i = j;
- }
- zOut[i] = 0;
- *pnOut = i;
-}
-
-
-/*
-** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
-** zOut is at least big enough to hold nIn bytes. Write the actual
-** size of the output word (exclusive of the '\0' terminator) into *pnOut.
-**
-** Any upper-case characters in the US-ASCII character set ([A-Z])
-** are converted to lower case. Upper-case UTF characters are
-** unchanged.
-**
-** Words that are longer than about 20 bytes are stemmed by retaining
-** a few bytes from the beginning and the end of the word. If the
-** word contains digits, 3 bytes are taken from the beginning and
-** 3 bytes from the end. For long words without digits, 10 bytes
-** are taken from each end. US-ASCII case folding still applies.
-**
-** If the input word contains not digits but does characters not
-** in [a-zA-Z] then no stemming is attempted and this routine just
-** copies the input into the input into the output with US-ASCII
-** case folding.
-**
-** Stemming never increases the length of the word. So there is
-** no chance of overflowing the zOut buffer.
-*/
-static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
- int i, j;
- char zReverse[28];
- char *z, *z2;
- if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){
- /* The word is too big or too small for the porter stemmer.
- ** Fallback to the copy stemmer */
- copy_stemmer(zIn, nIn, zOut, pnOut);
- return;
- }
- for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
- char c = zIn[i];
- if( c>='A' && c<='Z' ){
- zReverse[j] = c + 'a' - 'A';
- }else if( c>='a' && c<='z' ){
- zReverse[j] = c;
- }else{
- /* The use of a character not in [a-zA-Z] means that we fallback
- ** to the copy stemmer */
- copy_stemmer(zIn, nIn, zOut, pnOut);
- return;
- }
- }
- memset(&zReverse[sizeof(zReverse)-5], 0, 5);
- z = &zReverse[j+1];
-
-
- /* Step 1a */
- if( z[0]=='s' ){
- if(
- !stem(&z, "sess", "ss", 0) &&
- !stem(&z, "sei", "i", 0) &&
- !stem(&z, "ss", "ss", 0)
- ){
- z++;
- }
- }
-
- /* Step 1b */
- z2 = z;
- if( stem(&z, "dee", "ee", m_gt_0) ){
- /* Do nothing. The work was all in the test */
- }else if(
- (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
- && z!=z2
- ){
- if( stem(&z, "ta", "ate", 0) ||
- stem(&z, "lb", "ble", 0) ||
- stem(&z, "zi", "ize", 0) ){
- /* Do nothing. The work was all in the test */
- }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
- z++;
- }else if( m_eq_1(z) && star_oh(z) ){
- *(--z) = 'e';
- }
- }
-
- /* Step 1c */
- if( z[0]=='y' && hasVowel(z+1) ){
- z[0] = 'i';
- }
-
- /* Step 2 */
- switch( z[1] ){
- case 'a':
- stem(&z, "lanoita", "ate", m_gt_0) ||
- stem(&z, "lanoit", "tion", m_gt_0);
- break;
- case 'c':
- stem(&z, "icne", "ence", m_gt_0) ||
- stem(&z, "icna", "ance", m_gt_0);
- break;
- case 'e':
- stem(&z, "rezi", "ize", m_gt_0);
- break;
- case 'g':
- stem(&z, "igol", "log", m_gt_0);
- break;
- case 'l':
- stem(&z, "ilb", "ble", m_gt_0) ||
- stem(&z, "illa", "al", m_gt_0) ||
- stem(&z, "iltne", "ent", m_gt_0) ||
- stem(&z, "ile", "e", m_gt_0) ||
- stem(&z, "ilsuo", "ous", m_gt_0);
- break;
- case 'o':
- stem(&z, "noitazi", "ize", m_gt_0) ||
- stem(&z, "noita", "ate", m_gt_0) ||
- stem(&z, "rota", "ate", m_gt_0);
- break;
- case 's':
- stem(&z, "msila", "al", m_gt_0) ||
- stem(&z, "ssenevi", "ive", m_gt_0) ||
- stem(&z, "ssenluf", "ful", m_gt_0) ||
- stem(&z, "ssensuo", "ous", m_gt_0);
- break;
- case 't':
- stem(&z, "itila", "al", m_gt_0) ||
- stem(&z, "itivi", "ive", m_gt_0) ||
- stem(&z, "itilib", "ble", m_gt_0);
- break;
- }
-
- /* Step 3 */
- switch( z[0] ){
- case 'e':
- stem(&z, "etaci", "ic", m_gt_0) ||
- stem(&z, "evita", "", m_gt_0) ||
- stem(&z, "ezila", "al", m_gt_0);
- break;
- case 'i':
- stem(&z, "itici", "ic", m_gt_0);
- break;
- case 'l':
- stem(&z, "laci", "ic", m_gt_0) ||
- stem(&z, "luf", "", m_gt_0);
- break;
- case 's':
- stem(&z, "ssen", "", m_gt_0);
- break;
- }
-
- /* Step 4 */
- switch( z[1] ){
- case 'a':
- if( z[0]=='l' && m_gt_1(z+2) ){
- z += 2;
- }
- break;
- case 'c':
- if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
- z += 4;
- }
- break;
- case 'e':
- if( z[0]=='r' && m_gt_1(z+2) ){
- z += 2;
- }
- break;
- case 'i':
- if( z[0]=='c' && m_gt_1(z+2) ){
- z += 2;
- }
- break;
- case 'l':
- if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
- z += 4;
- }
- break;
- case 'n':
- if( z[0]=='t' ){
- if( z[2]=='a' ){
- if( m_gt_1(z+3) ){
- z += 3;
- }
- }else if( z[2]=='e' ){
- stem(&z, "tneme", "", m_gt_1) ||
- stem(&z, "tnem", "", m_gt_1) ||
- stem(&z, "tne", "", m_gt_1);
- }
- }
- break;
- case 'o':
- if( z[0]=='u' ){
- if( m_gt_1(z+2) ){
- z += 2;
- }
- }else if( z[3]=='s' || z[3]=='t' ){
- stem(&z, "noi", "", m_gt_1);
- }
- break;
- case 's':
- if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
- z += 3;
- }
- break;
- case 't':
- stem(&z, "eta", "", m_gt_1) ||
- stem(&z, "iti", "", m_gt_1);
- break;
- case 'u':
- if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
- z += 3;
- }
- break;
- case 'v':
- case 'z':
- if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
- z += 3;
- }
- break;
- }
-
- /* Step 5a */
- if( z[0]=='e' ){
- if( m_gt_1(z+1) ){
- z++;
- }else if( m_eq_1(z+1) && !star_oh(z+1) ){
- z++;
- }
- }
-
- /* Step 5b */
- if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
- z++;
- }
-
- /* z[] is now the stemmed word in reverse order. Flip it back
- ** around into forward order and return.
- */
- *pnOut = i = (int)strlen(z);
- zOut[i] = 0;
- while( *z ){
- zOut[--i] = *(z++);
- }
-}
-
-/*
-** Characters that can be part of a token. We assume any character
-** whose value is greater than 0x80 (any UTF character) can be
-** part of a token. In other words, delimiters all must have
-** values of 0x7f or lower.
-*/
-static const char porterIdChar[] = {
-/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
-};
-#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
-
-/*
-** Extract the next token from a tokenization cursor. The cursor must
-** have been opened by a prior call to porterOpen().
-*/
-static int porterNext(
- sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
- const char **pzToken, /* OUT: *pzToken is the token text */
- int *pnBytes, /* OUT: Number of bytes in token */
- int *piStartOffset, /* OUT: Starting offset of token */
- int *piEndOffset, /* OUT: Ending offset of token */
- int *piPosition /* OUT: Position integer of token */
-){
- porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
- const char *z = c->zInput;
-
- while( c->iOffset<c->nInput ){
- int iStartOffset, ch;
-
- /* Scan past delimiter characters */
- while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
- c->iOffset++;
- }
-
- /* Count non-delimiter characters. */
- iStartOffset = c->iOffset;
- while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
- c->iOffset++;
- }
-
- if( c->iOffset>iStartOffset ){
- int n = c->iOffset-iStartOffset;
- if( n>c->nAllocated ){
- char *pNew;
- c->nAllocated = n+20;
- pNew = sqlite3_realloc(c->zToken, c->nAllocated);
- if( !pNew ) return SQLITE_NOMEM;
- c->zToken = pNew;
- }
- porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
- *pzToken = c->zToken;
- *piStartOffset = iStartOffset;
- *piEndOffset = c->iOffset;
- *piPosition = c->iToken++;
- return SQLITE_OK;
- }
- }
- return SQLITE_DONE;
-}
-
-/*
-** The set of routines that implement the porter-stemmer tokenizer
-*/
-static const sqlite3_tokenizer_module porterTokenizerModule = {
- 0,
- porterCreate,
- porterDestroy,
- porterOpen,
- porterClose,
- porterNext,
- 0
-};
-
-/*
-** Allocate a new porter tokenizer. Return a pointer to the new
-** tokenizer in *ppModule
-*/
-void sqlite3Fts3PorterTokenizerModule(
- sqlite3_tokenizer_module const**ppModule
-){
- *ppModule = &porterTokenizerModule;
-}
-
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_snippet.c b/src/libtracker-fts/fts3_snippet.c
deleted file mode 100644
index 4bee014dc..000000000
--- a/src/libtracker-fts/fts3_snippet.c
+++ /dev/null
@@ -1,1520 +0,0 @@
-/*
-** 2009 Oct 23
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-*/
-
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <string.h>
-#include <assert.h>
-
-/*
-** Characters that may appear in the second argument to matchinfo().
-*/
-#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
-#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
-#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
-#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
-#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
-#define FTS3_MATCHINFO_LCS 's' /* nCol values */
-#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
-
-/*
-** The default value for the second argument to matchinfo().
-*/
-#define FTS3_MATCHINFO_DEFAULT "pcx"
-
-
-/*
-** Used as an fts3ExprIterate() context when loading phrase doclists to
-** Fts3Expr.aDoclist[]/nDoclist.
-*/
-typedef struct LoadDoclistCtx LoadDoclistCtx;
-struct LoadDoclistCtx {
- Fts3Cursor *pCsr; /* FTS3 Cursor */
- int nPhrase; /* Number of phrases seen so far */
- int nToken; /* Number of tokens seen so far */
-};
-
-/*
-** The following types are used as part of the implementation of the
-** fts3BestSnippet() routine.
-*/
-typedef struct SnippetIter SnippetIter;
-typedef struct SnippetPhrase SnippetPhrase;
-typedef struct SnippetFragment SnippetFragment;
-
-struct SnippetIter {
- Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
- int iCol; /* Extract snippet from this column */
- int nSnippet; /* Requested snippet length (in tokens) */
- int nPhrase; /* Number of phrases in query */
- SnippetPhrase *aPhrase; /* Array of size nPhrase */
- int iCurrent; /* First token of current snippet */
-};
-
-struct SnippetPhrase {
- int nToken; /* Number of tokens in phrase */
- char *pList; /* Pointer to start of phrase position list */
- int iHead; /* Next value in position list */
- char *pHead; /* Position list data following iHead */
- int iTail; /* Next value in trailing position list */
- char *pTail; /* Position list data following iTail */
-};
-
-struct SnippetFragment {
- int iCol; /* Column snippet is extracted from */
- int iPos; /* Index of first token in snippet */
- u64 covered; /* Mask of query phrases covered */
- u64 hlmask; /* Mask of snippet terms to highlight */
-};
-
-/*
-** This type is used as an fts3ExprIterate() context object while
-** accumulating the data returned by the matchinfo() function.
-*/
-typedef struct MatchInfo MatchInfo;
-struct MatchInfo {
- Fts3Cursor *pCursor; /* FTS3 Cursor */
- int nCol; /* Number of columns in table */
- int nPhrase; /* Number of matchable phrases in query */
- sqlite3_int64 nDoc; /* Number of docs in database */
- u32 *aMatchinfo; /* Pre-allocated buffer */
-};
-
-
-
-/*
-** The snippet() and offsets() functions both return text values. An instance
-** of the following structure is used to accumulate those values while the
-** functions are running. See fts3StringAppend() for details.
-*/
-typedef struct StrBuffer StrBuffer;
-struct StrBuffer {
- char *z; /* Pointer to buffer containing string */
- int n; /* Length of z in bytes (excl. nul-term) */
- int nAlloc; /* Allocated size of buffer z in bytes */
-};
-
-
-/*
-** This function is used to help iterate through a position-list. A position
-** list is a list of unique integers, sorted from smallest to largest. Each
-** element of the list is represented by an FTS3 varint that takes the value
-** of the difference between the current element and the previous one plus
-** two. For example, to store the position-list:
-**
-** 4 9 113
-**
-** the three varints:
-**
-** 6 7 106
-**
-** are encoded.
-**
-** When this function is called, *pp points to the start of an element of
-** the list. *piPos contains the value of the previous entry in the list.
-** After it returns, *piPos contains the value of the next element of the
-** list and *pp is advanced to the following varint.
-*/
-static void fts3GetDeltaPosition(char **pp, int *piPos){
- int iVal;
- *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
- *piPos += (iVal-2);
-}
-
-/*
-** Helper function for fts3ExprIterate() (see below).
-*/
-static int fts3ExprIterate2(
- Fts3Expr *pExpr, /* Expression to iterate phrases of */
- int *piPhrase, /* Pointer to phrase counter */
- int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
- void *pCtx /* Second argument to pass to callback */
-){
- int rc; /* Return code */
- int eType = pExpr->eType; /* Type of expression node pExpr */
-
- if( eType!=FTSQUERY_PHRASE ){
- assert( pExpr->pLeft && pExpr->pRight );
- rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
- if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
- rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
- }
- }else{
- rc = x(pExpr, *piPhrase, pCtx);
- (*piPhrase)++;
- }
- return rc;
-}
-
-/*
-** Iterate through all phrase nodes in an FTS3 query, except those that
-** are part of a sub-tree that is the right-hand-side of a NOT operator.
-** For each phrase node found, the supplied callback function is invoked.
-**
-** If the callback function returns anything other than SQLITE_OK,
-** the iteration is abandoned and the error code returned immediately.
-** Otherwise, SQLITE_OK is returned after a callback has been made for
-** all eligible phrase nodes.
-*/
-static int fts3ExprIterate(
- Fts3Expr *pExpr, /* Expression to iterate phrases of */
- int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
- void *pCtx /* Second argument to pass to callback */
-){
- int iPhrase = 0; /* Variable used as the phrase counter */
- return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
-}
-
-/*
-** This is an fts3ExprIterate() callback used while loading the doclists
-** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
-** fts3ExprLoadDoclists().
-*/
-static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
- int rc = SQLITE_OK;
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
-
- UNUSED_PARAMETER(iPhrase);
-
- p->nPhrase++;
- p->nToken += pPhrase->nToken;
-
- return rc;
-}
-
-/*
-** Load the doclists for each phrase in the query associated with FTS3 cursor
-** pCsr.
-**
-** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
-** phrases in the expression (all phrases except those directly or
-** indirectly descended from the right-hand-side of a NOT operator). If
-** pnToken is not NULL, then it is set to the number of tokens in all
-** matchable phrases of the expression.
-*/
-static int fts3ExprLoadDoclists(
- Fts3Cursor *pCsr, /* Fts3 cursor for current query */
- int *pnPhrase, /* OUT: Number of phrases in query */
- int *pnToken /* OUT: Number of tokens in query */
-){
- int rc; /* Return Code */
- LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
- sCtx.pCsr = pCsr;
- rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
- if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
- if( pnToken ) *pnToken = sCtx.nToken;
- return rc;
-}
-
-static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
- (*(int *)ctx)++;
- UNUSED_PARAMETER(pExpr);
- UNUSED_PARAMETER(iPhrase);
- return SQLITE_OK;
-}
-static int fts3ExprPhraseCount(Fts3Expr *pExpr){
- int nPhrase = 0;
- (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
- return nPhrase;
-}
-
-/*
-** Advance the position list iterator specified by the first two
-** arguments so that it points to the first element with a value greater
-** than or equal to parameter iNext.
-*/
-static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
- char *pIter = *ppIter;
- if( pIter ){
- int iIter = *piIter;
-
- while( iIter<iNext ){
- if( 0==(*pIter & 0xFE) ){
- iIter = -1;
- pIter = 0;
- break;
- }
- fts3GetDeltaPosition(&pIter, &iIter);
- }
-
- *piIter = iIter;
- *ppIter = pIter;
- }
-}
-
-/*
-** Advance the snippet iterator to the next candidate snippet.
-*/
-static int fts3SnippetNextCandidate(SnippetIter *pIter){
- int i; /* Loop counter */
-
- if( pIter->iCurrent<0 ){
- /* The SnippetIter object has just been initialized. The first snippet
- ** candidate always starts at offset 0 (even if this candidate has a
- ** score of 0.0).
- */
- pIter->iCurrent = 0;
-
- /* Advance the 'head' iterator of each phrase to the first offset that
- ** is greater than or equal to (iNext+nSnippet).
- */
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
- }
- }else{
- int iStart;
- int iEnd = 0x7FFFFFFF;
-
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- if( pPhrase->pHead && pPhrase->iHead<iEnd ){
- iEnd = pPhrase->iHead;
- }
- }
- if( iEnd==0x7FFFFFFF ){
- return 1;
- }
-
- pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
- fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
- }
- }
-
- return 0;
-}
-
-/*
-** Retrieve information about the current candidate snippet of snippet
-** iterator pIter.
-*/
-static void fts3SnippetDetails(
- SnippetIter *pIter, /* Snippet iterator */
- u64 mCovered, /* Bitmask of phrases already covered */
- int *piToken, /* OUT: First token of proposed snippet */
- int *piScore, /* OUT: "Score" for this snippet */
- u64 *pmCover, /* OUT: Bitmask of phrases covered */
- u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
-){
- int iStart = pIter->iCurrent; /* First token of snippet */
- int iScore = 0; /* Score of this snippet */
- int i; /* Loop counter */
- u64 mCover = 0; /* Mask of phrases covered by this snippet */
- u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
-
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- if( pPhrase->pTail ){
- char *pCsr = pPhrase->pTail;
- int iCsr = pPhrase->iTail;
-
- while( iCsr<(iStart+pIter->nSnippet) ){
- int j;
- u64 mPhrase = (u64)1 << i;
- u64 mPos = (u64)1 << (iCsr - iStart);
- assert( iCsr>=iStart );
- if( (mCover|mCovered)&mPhrase ){
- iScore++;
- }else{
- iScore += 1000;
- }
- mCover |= mPhrase;
-
- for(j=0; j<pPhrase->nToken; j++){
- mHighlight |= (mPos>>j);
- }
-
- if( 0==(*pCsr & 0x0FE) ) break;
- fts3GetDeltaPosition(&pCsr, &iCsr);
- }
- }
- }
-
- /* Set the output variables before returning. */
- *piToken = iStart;
- *piScore = iScore;
- *pmCover = mCover;
- *pmHighlight = mHighlight;
-}
-
-/*
-** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
-** Each invocation populates an element of the SnippetIter.aPhrase[] array.
-*/
-static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
- SnippetIter *p = (SnippetIter *)ctx;
- SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
- char *pCsr;
- int rc;
-
- pPhrase->nToken = pExpr->pPhrase->nToken;
- rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
- assert( rc==SQLITE_OK || pCsr==0 );
- if( pCsr ){
- int iFirst = 0;
- pPhrase->pList = pCsr;
- fts3GetDeltaPosition(&pCsr, &iFirst);
- assert( iFirst>=0 );
- pPhrase->pHead = pCsr;
- pPhrase->pTail = pCsr;
- pPhrase->iHead = iFirst;
- pPhrase->iTail = iFirst;
- }else{
- assert( rc!=SQLITE_OK || (
- pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
- ));
- }
-
- return rc;
-}
-
-/*
-** Select the fragment of text consisting of nFragment contiguous tokens
-** from column iCol that represent the "best" snippet. The best snippet
-** is the snippet with the highest score, where scores are calculated
-** by adding:
-**
-** (a) +1 point for each occurence of a matchable phrase in the snippet.
-**
-** (b) +1000 points for the first occurence of each matchable phrase in
-** the snippet for which the corresponding mCovered bit is not set.
-**
-** The selected snippet parameters are stored in structure *pFragment before
-** returning. The score of the selected snippet is stored in *piScore
-** before returning.
-*/
-static int fts3BestSnippet(
- int nSnippet, /* Desired snippet length */
- Fts3Cursor *pCsr, /* Cursor to create snippet for */
- int iCol, /* Index of column to create snippet from */
- u64 mCovered, /* Mask of phrases already covered */
- u64 *pmSeen, /* IN/OUT: Mask of phrases seen */
- SnippetFragment *pFragment, /* OUT: Best snippet found */
- int *piScore /* OUT: Score of snippet pFragment */
-){
- int rc; /* Return Code */
- int nList; /* Number of phrases in expression */
- SnippetIter sIter; /* Iterates through snippet candidates */
- int nByte; /* Number of bytes of space to allocate */
- int iBestScore = -1; /* Best snippet score found so far */
- int i; /* Loop counter */
-
- memset(&sIter, 0, sizeof(sIter));
-
- /* Iterate through the phrases in the expression to count them. The same
- ** callback makes sure the doclists are loaded for each phrase.
- */
- rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- /* Now that it is known how many phrases there are, allocate and zero
- ** the required space using malloc().
- */
- nByte = sizeof(SnippetPhrase) * nList;
- sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
- if( !sIter.aPhrase ){
- return SQLITE_NOMEM;
- }
- memset(sIter.aPhrase, 0, nByte);
-
- /* Initialize the contents of the SnippetIter object. Then iterate through
- ** the set of phrases in the expression to populate the aPhrase[] array.
- */
- sIter.pCsr = pCsr;
- sIter.iCol = iCol;
- sIter.nSnippet = nSnippet;
- sIter.nPhrase = nList;
- sIter.iCurrent = -1;
- (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
-
- /* Set the *pmSeen output variable. */
- for(i=0; i<nList; i++){
- if( sIter.aPhrase[i].pHead ){
- *pmSeen |= (u64)1 << i;
- }
- }
-
- /* Loop through all candidate snippets. Store the best snippet in
- ** *pFragment. Store its associated 'score' in iBestScore.
- */
- pFragment->iCol = iCol;
- while( !fts3SnippetNextCandidate(&sIter) ){
- int iPos;
- int iScore;
- u64 mCover;
- u64 mHighlight;
- fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
- assert( iScore>=0 );
- if( iScore>iBestScore ){
- pFragment->iPos = iPos;
- pFragment->hlmask = mHighlight;
- pFragment->covered = mCover;
- iBestScore = iScore;
- }
- }
-
- sqlite3_free(sIter.aPhrase);
- *piScore = iBestScore;
- return SQLITE_OK;
-}
-
-
-/*
-** Append a string to the string-buffer passed as the first argument.
-**
-** If nAppend is negative, then the length of the string zAppend is
-** determined using strlen().
-*/
-static int fts3StringAppend(
- StrBuffer *pStr, /* Buffer to append to */
- const char *zAppend, /* Pointer to data to append to buffer */
- int nAppend /* Size of zAppend in bytes (or -1) */
-){
- if( nAppend<0 ){
- nAppend = (int)strlen(zAppend);
- }
-
- /* If there is insufficient space allocated at StrBuffer.z, use realloc()
- ** to grow the buffer until so that it is big enough to accomadate the
- ** appended data.
- */
- if( pStr->n+nAppend+1>=pStr->nAlloc ){
- int nAlloc = pStr->nAlloc+nAppend+100;
- char *zNew = sqlite3_realloc(pStr->z, nAlloc);
- if( !zNew ){
- return SQLITE_NOMEM;
- }
- pStr->z = zNew;
- pStr->nAlloc = nAlloc;
- }
-
- /* Append the data to the string buffer. */
- memcpy(&pStr->z[pStr->n], zAppend, nAppend);
- pStr->n += nAppend;
- pStr->z[pStr->n] = '\0';
-
- return SQLITE_OK;
-}
-
-/*
-** The fts3BestSnippet() function often selects snippets that end with a
-** query term. That is, the final term of the snippet is always a term
-** that requires highlighting. For example, if 'X' is a highlighted term
-** and '.' is a non-highlighted term, BestSnippet() may select:
-**
-** ........X.....X
-**
-** This function "shifts" the beginning of the snippet forward in the
-** document so that there are approximately the same number of
-** non-highlighted terms to the right of the final highlighted term as there
-** are to the left of the first highlighted term. For example, to this:
-**
-** ....X.....X....
-**
-** This is done as part of extracting the snippet text, not when selecting
-** the snippet. Snippet selection is done based on doclists only, so there
-** is no way for fts3BestSnippet() to know whether or not the document
-** actually contains terms that follow the final highlighted term.
-*/
-static int fts3SnippetShift(
- Fts3Table *pTab, /* FTS3 table snippet comes from */
- int iLangid, /* Language id to use in tokenizing */
- int nSnippet, /* Number of tokens desired for snippet */
- const char *zDoc, /* Document text to extract snippet from */
- int nDoc, /* Size of buffer zDoc in bytes */
- int *piPos, /* IN/OUT: First token of snippet */
- u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
-){
- u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
-
- if( hlmask ){
- int nLeft; /* Tokens to the left of first highlight */
- int nRight; /* Tokens to the right of last highlight */
- int nDesired; /* Ideal number of tokens to shift forward */
-
- for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
- for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
- nDesired = (nLeft-nRight)/2;
-
- /* Ideally, the start of the snippet should be pushed forward in the
- ** document nDesired tokens. This block checks if there are actually
- ** nDesired tokens to the right of the snippet. If so, *piPos and
- ** *pHlMask are updated to shift the snippet nDesired tokens to the
- ** right. Otherwise, the snippet is shifted by the number of tokens
- ** available.
- */
- if( nDesired>0 ){
- int nShift; /* Number of tokens to shift snippet by */
- int iCurrent = 0; /* Token counter */
- int rc; /* Return Code */
- sqlite3_tokenizer_module *pMod;
- sqlite3_tokenizer_cursor *pC;
- pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
-
- /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
- ** or more tokens in zDoc/nDoc.
- */
- rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
- const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
- rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
- }
- pMod->xClose(pC);
- if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
-
- nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
- assert( nShift<=nDesired );
- if( nShift>0 ){
- *piPos += nShift;
- *pHlmask = hlmask >> nShift;
- }
- }
- }
- return SQLITE_OK;
-}
-
-/*
-** Extract the snippet text for fragment pFragment from cursor pCsr and
-** append it to string buffer pOut.
-*/
-static int fts3SnippetText(
- Fts3Cursor *pCsr, /* FTS3 Cursor */
- SnippetFragment *pFragment, /* Snippet to extract */
- int iFragment, /* Fragment number */
- int isLast, /* True for final fragment in snippet */
- int nSnippet, /* Number of tokens in extracted snippet */
- const char *zOpen, /* String inserted before highlighted term */
- const char *zClose, /* String inserted after highlighted term */
- const char *zEllipsis, /* String inserted between snippets */
- StrBuffer *pOut /* Write output here */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc; /* Return code */
- const char *zDoc; /* Document text to extract snippet from */
- int nDoc; /* Size of zDoc in bytes */
- int iCurrent = 0; /* Current token number of document */
- int iEnd = 0; /* Byte offset of end of current token */
- int isShiftDone = 0; /* True after snippet is shifted */
- int iPos = pFragment->iPos; /* First token of snippet */
- u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
- int iCol = pFragment->iCol+1; /* Query column to extract text from */
- sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
- sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
-
- zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
- if( zDoc==0 ){
- if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
- return SQLITE_NOMEM;
- }
- return SQLITE_OK;
- }
- nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
-
- /* Open a token cursor on the document. */
- pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
- rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- while( rc==SQLITE_OK ){
- const char *ZDUMMY; /* Dummy argument used with tokenizer */
- int DUMMY1 = -1; /* Dummy argument used with tokenizer */
- int iBegin = 0; /* Offset in zDoc of start of token */
- int iFin = 0; /* Offset in zDoc of end of token */
- int isHighlight = 0; /* True for highlighted terms */
-
- /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
- ** in the FTS code the variable that the third argument to xNext points to
- ** is initialized to zero before the first (*but not necessarily
- ** subsequent*) call to xNext(). This is done for a particular application
- ** that needs to know whether or not the tokenizer is being used for
- ** snippet generation or for some other purpose.
- **
- ** Extreme care is required when writing code to depend on this
- ** initialization. It is not a documented part of the tokenizer interface.
- ** If a tokenizer is used directly by any code outside of FTS, this
- ** convention might not be respected. */
- rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
- if( rc!=SQLITE_OK ){
- if( rc==SQLITE_DONE ){
- /* Special case - the last token of the snippet is also the last token
- ** of the column. Append any punctuation that occurred between the end
- ** of the previous token and the end of the document to the output.
- ** Then break out of the loop. */
- rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
- }
- break;
- }
- if( iCurrent<iPos ){ continue; }
-
- if( !isShiftDone ){
- int n = nDoc - iBegin;
- rc = fts3SnippetShift(
- pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
- );
- isShiftDone = 1;
-
- /* Now that the shift has been done, check if the initial "..." are
- ** required. They are required if (a) this is not the first fragment,
- ** or (b) this fragment does not begin at position 0 of its column.
- */
- if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
- rc = fts3StringAppend(pOut, zEllipsis, -1);
- }
- if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
- }
-
- if( iCurrent>=(iPos+nSnippet) ){
- if( isLast ){
- rc = fts3StringAppend(pOut, zEllipsis, -1);
- }
- break;
- }
-
- /* Set isHighlight to true if this term should be highlighted. */
- isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
-
- if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
- if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
- if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
- if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
-
- iEnd = iFin;
- }
-
- pMod->xClose(pC);
- return rc;
-}
-
-
-/*
-** This function is used to count the entries in a column-list (a
-** delta-encoded list of term offsets within a single column of a single
-** row). When this function is called, *ppCollist should point to the
-** beginning of the first varint in the column-list (the varint that
-** contains the position of the first matching term in the column data).
-** Before returning, *ppCollist is set to point to the first byte after
-** the last varint in the column-list (either the 0x00 signifying the end
-** of the position-list, or the 0x01 that precedes the column number of
-** the next column in the position-list).
-**
-** The number of elements in the column-list is returned.
-*/
-static int fts3ColumnlistCount(char **ppCollist){
- char *pEnd = *ppCollist;
- char c = 0;
- int nEntry = 0;
-
- /* A column-list is terminated by either a 0x01 or 0x00. */
- while( 0xFE & (*pEnd | c) ){
- c = *pEnd++ & 0x80;
- if( !c ) nEntry++;
- }
-
- *ppCollist = pEnd;
- return nEntry;
-}
-
-/*
-** fts3ExprIterate() callback used to collect the "global" matchinfo stats
-** for a single query.
-**
-** fts3ExprIterate() callback to load the 'global' elements of a
-** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
-** of the matchinfo array that are constant for all rows returned by the
-** current query.
-**
-** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
-** function populates Matchinfo.aMatchinfo[] as follows:
-**
-** for(iCol=0; iCol<nCol; iCol++){
-** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
-** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
-** }
-**
-** where X is the number of matches for phrase iPhrase is column iCol of all
-** rows of the table. Y is the number of rows for which column iCol contains
-** at least one instance of phrase iPhrase.
-**
-** If the phrase pExpr consists entirely of deferred tokens, then all X and
-** Y values are set to nDoc, where nDoc is the number of documents in the
-** file system. This is done because the full-text index doclist is required
-** to calculate these values properly, and the full-text index doclist is
-** not available for deferred tokens.
-*/
-static int fts3ExprGlobalHitsCb(
- Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase, /* Phrase number (numbered from zero) */
- void *pCtx /* Pointer to MatchInfo structure */
-){
- MatchInfo *p = (MatchInfo *)pCtx;
- return sqlite3Fts3EvalPhraseStats(
- p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
- );
-}
-
-/*
-** fts3ExprIterate() callback used to collect the "local" part of the
-** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
-** array that are different for each row returned by the query.
-*/
-static int fts3ExprLocalHitsCb(
- Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase, /* Phrase number */
- void *pCtx /* Pointer to MatchInfo structure */
-){
- int rc = SQLITE_OK;
- MatchInfo *p = (MatchInfo *)pCtx;
- int iStart = iPhrase * p->nCol * 3;
- int i;
-
- for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
- char *pCsr;
- rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
- if( pCsr ){
- p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
- }else{
- p->aMatchinfo[iStart+i*3] = 0;
- }
- }
-
- return rc;
-}
-
-static int fts3MatchinfoCheck(
- Fts3Table *pTab,
- char cArg,
- char **pzErr
-){
- if( (cArg==FTS3_MATCHINFO_NPHRASE)
- || (cArg==FTS3_MATCHINFO_NCOL)
- || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
- || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
- || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
- || (cArg==FTS3_MATCHINFO_LCS)
- || (cArg==FTS3_MATCHINFO_HITS)
- ){
- return SQLITE_OK;
- }
- *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
- return SQLITE_ERROR;
-}
-
-static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
- int nVal; /* Number of integers output by cArg */
-
- switch( cArg ){
- case FTS3_MATCHINFO_NDOC:
- case FTS3_MATCHINFO_NPHRASE:
- case FTS3_MATCHINFO_NCOL:
- nVal = 1;
- break;
-
- case FTS3_MATCHINFO_AVGLENGTH:
- case FTS3_MATCHINFO_LENGTH:
- case FTS3_MATCHINFO_LCS:
- nVal = pInfo->nCol;
- break;
-
- default:
- assert( cArg==FTS3_MATCHINFO_HITS );
- nVal = pInfo->nCol * pInfo->nPhrase * 3;
- break;
- }
-
- return nVal;
-}
-
-static int fts3MatchinfoSelectDoctotal(
- Fts3Table *pTab,
- sqlite3_stmt **ppStmt,
- sqlite3_int64 *pnDoc,
- const char **paLen
-){
- sqlite3_stmt *pStmt;
- const char *a;
- sqlite3_int64 nDoc;
-
- if( !*ppStmt ){
- int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
- if( rc!=SQLITE_OK ) return rc;
- }
- pStmt = *ppStmt;
- assert( sqlite3_data_count(pStmt)==1 );
-
- a = sqlite3_column_blob(pStmt, 0);
- a += sqlite3Fts3GetVarint(a, &nDoc);
- if( nDoc==0 ) return FTS_CORRUPT_VTAB;
- *pnDoc = (u32)nDoc;
-
- if( paLen ) *paLen = a;
- return SQLITE_OK;
-}
-
-/*
-** An instance of the following structure is used to store state while
-** iterating through a multi-column position-list corresponding to the
-** hits for a single phrase on a single row in order to calculate the
-** values for a matchinfo() FTS3_MATCHINFO_LCS request.
-*/
-typedef struct LcsIterator LcsIterator;
-struct LcsIterator {
- Fts3Expr *pExpr; /* Pointer to phrase expression */
- int iPosOffset; /* Tokens count up to end of this phrase */
- char *pRead; /* Cursor used to iterate through aDoclist */
- int iPos; /* Current position */
-};
-
-/*
-** If LcsIterator.iCol is set to the following value, the iterator has
-** finished iterating through all offsets for all columns.
-*/
-#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
-
-static int fts3MatchinfoLcsCb(
- Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase, /* Phrase number (numbered from zero) */
- void *pCtx /* Pointer to MatchInfo structure */
-){
- LcsIterator *aIter = (LcsIterator *)pCtx;
- aIter[iPhrase].pExpr = pExpr;
- return SQLITE_OK;
-}
-
-/*
-** Advance the iterator passed as an argument to the next position. Return
-** 1 if the iterator is at EOF or if it now points to the start of the
-** position list for the next column.
-*/
-static int fts3LcsIteratorAdvance(LcsIterator *pIter){
- char *pRead = pIter->pRead;
- sqlite3_int64 iRead;
- int rc = 0;
-
- pRead += sqlite3Fts3GetVarint(pRead, &iRead);
- if( iRead==0 || iRead==1 ){
- pRead = 0;
- rc = 1;
- }else{
- pIter->iPos += (int)(iRead-2);
- }
-
- pIter->pRead = pRead;
- return rc;
-}
-
-/*
-** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
-**
-** If the call is successful, the longest-common-substring lengths for each
-** column are written into the first nCol elements of the pInfo->aMatchinfo[]
-** array before returning. SQLITE_OK is returned in this case.
-**
-** Otherwise, if an error occurs, an SQLite error code is returned and the
-** data written to the first nCol elements of pInfo->aMatchinfo[] is
-** undefined.
-*/
-static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
- LcsIterator *aIter;
- int i;
- int iCol;
- int nToken = 0;
-
- /* Allocate and populate the array of LcsIterator objects. The array
- ** contains one element for each matchable phrase in the query.
- **/
- aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
- if( !aIter ) return SQLITE_NOMEM;
- memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
- (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
-
- for(i=0; i<pInfo->nPhrase; i++){
- LcsIterator *pIter = &aIter[i];
- nToken -= pIter->pExpr->pPhrase->nToken;
- pIter->iPosOffset = nToken;
- }
-
- for(iCol=0; iCol<pInfo->nCol; iCol++){
- int nLcs = 0; /* LCS value for this column */
- int nLive = 0; /* Number of iterators in aIter not at EOF */
-
- for(i=0; i<pInfo->nPhrase; i++){
- int rc;
- LcsIterator *pIt = &aIter[i];
- rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
- if( rc!=SQLITE_OK ) return rc;
- if( pIt->pRead ){
- pIt->iPos = pIt->iPosOffset;
- fts3LcsIteratorAdvance(&aIter[i]);
- nLive++;
- }
- }
-
- while( nLive>0 ){
- LcsIterator *pAdv = 0; /* The iterator to advance by one position */
- int nThisLcs = 0; /* LCS for the current iterator positions */
-
- for(i=0; i<pInfo->nPhrase; i++){
- LcsIterator *pIter = &aIter[i];
- if( pIter->pRead==0 ){
- /* This iterator is already at EOF for this column. */
- nThisLcs = 0;
- }else{
- if( pAdv==0 || pIter->iPos<pAdv->iPos ){
- pAdv = pIter;
- }
- if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
- nThisLcs++;
- }else{
- nThisLcs = 1;
- }
- if( nThisLcs>nLcs ) nLcs = nThisLcs;
- }
- }
- if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
- }
-
- pInfo->aMatchinfo[iCol] = nLcs;
- }
-
- sqlite3_free(aIter);
- return SQLITE_OK;
-}
-
-/*
-** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
-** be returned by the matchinfo() function. Argument zArg contains the
-** format string passed as the second argument to matchinfo (or the
-** default value "pcx" if no second argument was specified). The format
-** string has already been validated and the pInfo->aMatchinfo[] array
-** is guaranteed to be large enough for the output.
-**
-** If bGlobal is true, then populate all fields of the matchinfo() output.
-** If it is false, then assume that those fields that do not change between
-** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
-** have already been populated.
-**
-** Return SQLITE_OK if successful, or an SQLite error code if an error
-** occurs. If a value other than SQLITE_OK is returned, the state the
-** pInfo->aMatchinfo[] buffer is left in is undefined.
-*/
-static int fts3MatchinfoValues(
- Fts3Cursor *pCsr, /* FTS3 cursor object */
- int bGlobal, /* True to grab the global stats */
- MatchInfo *pInfo, /* Matchinfo context object */
- const char *zArg /* Matchinfo format string */
-){
- int rc = SQLITE_OK;
- int i;
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- sqlite3_stmt *pSelect = 0;
-
- for(i=0; rc==SQLITE_OK && zArg[i]; i++){
-
- switch( zArg[i] ){
- case FTS3_MATCHINFO_NPHRASE:
- if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
- break;
-
- case FTS3_MATCHINFO_NCOL:
- if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
- break;
-
- case FTS3_MATCHINFO_NDOC:
- if( bGlobal ){
- sqlite3_int64 nDoc = 0;
- rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
- pInfo->aMatchinfo[0] = (u32)nDoc;
- }
- break;
-
- case FTS3_MATCHINFO_AVGLENGTH:
- if( bGlobal ){
- sqlite3_int64 nDoc; /* Number of rows in table */
- const char *a; /* Aggregate column length array */
-
- rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
- if( rc==SQLITE_OK ){
- int iCol;
- for(iCol=0; iCol<pInfo->nCol; iCol++){
- u32 iVal;
- sqlite3_int64 nToken;
- a += sqlite3Fts3GetVarint(a, &nToken);
- iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
- pInfo->aMatchinfo[iCol] = iVal;
- }
- }
- }
- break;
-
- case FTS3_MATCHINFO_LENGTH: {
- sqlite3_stmt *pSelectDocsize = 0;
- rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
- if( rc==SQLITE_OK ){
- int iCol;
- const char *a = sqlite3_column_blob(pSelectDocsize, 0);
- for(iCol=0; iCol<pInfo->nCol; iCol++){
- sqlite3_int64 nToken;
- a += sqlite3Fts3GetVarint(a, &nToken);
- pInfo->aMatchinfo[iCol] = (u32)nToken;
- }
- }
- sqlite3_reset(pSelectDocsize);
- break;
- }
-
- case FTS3_MATCHINFO_LCS:
- rc = fts3ExprLoadDoclists(pCsr, 0, 0);
- if( rc==SQLITE_OK ){
- rc = fts3MatchinfoLcs(pCsr, pInfo);
- }
- break;
-
- default: {
- Fts3Expr *pExpr;
- assert( zArg[i]==FTS3_MATCHINFO_HITS );
- pExpr = pCsr->pExpr;
- rc = fts3ExprLoadDoclists(pCsr, 0, 0);
- if( rc!=SQLITE_OK ) break;
- if( bGlobal ){
- if( pCsr->pDeferred ){
- rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
- if( rc!=SQLITE_OK ) break;
- }
- rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
- if( rc!=SQLITE_OK ) break;
- }
- (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
- break;
- }
- }
-
- pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
- }
-
- sqlite3_reset(pSelect);
- return rc;
-}
-
-
-/*
-** Populate pCsr->aMatchinfo[] with data for the current row. The
-** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
-*/
-static int fts3GetMatchinfo(
- Fts3Cursor *pCsr, /* FTS3 Cursor object */
- const char *zArg /* Second argument to matchinfo() function */
-){
- MatchInfo sInfo;
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc = SQLITE_OK;
- int bGlobal = 0; /* Collect 'global' stats as well as local */
-
- memset(&sInfo, 0, sizeof(MatchInfo));
- sInfo.pCursor = pCsr;
- sInfo.nCol = pTab->nColumn;
-
- /* If there is cached matchinfo() data, but the format string for the
- ** cache does not match the format string for this request, discard
- ** the cached data. */
- if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
- assert( pCsr->aMatchinfo );
- sqlite3_free(pCsr->aMatchinfo);
- pCsr->zMatchinfo = 0;
- pCsr->aMatchinfo = 0;
- }
-
- /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
- ** matchinfo function has been called for this query. In this case
- ** allocate the array used to accumulate the matchinfo data and
- ** initialize those elements that are constant for every row.
- */
- if( pCsr->aMatchinfo==0 ){
- int nMatchinfo = 0; /* Number of u32 elements in match-info */
- int nArg; /* Bytes in zArg */
- int i; /* Used to iterate through zArg */
-
- /* Determine the number of phrases in the query */
- pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
- sInfo.nPhrase = pCsr->nPhrase;
-
- /* Determine the number of integers in the buffer returned by this call. */
- for(i=0; zArg[i]; i++){
- nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
- }
-
- /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
- nArg = (int)strlen(zArg);
- pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
- if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
-
- pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
- pCsr->nMatchinfo = nMatchinfo;
- memcpy(pCsr->zMatchinfo, zArg, nArg+1);
- memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
- pCsr->isMatchinfoNeeded = 1;
- bGlobal = 1;
- }
-
- sInfo.aMatchinfo = pCsr->aMatchinfo;
- sInfo.nPhrase = pCsr->nPhrase;
- if( pCsr->isMatchinfoNeeded ){
- rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
- pCsr->isMatchinfoNeeded = 0;
- }
-
- return rc;
-}
-
-/*
-** Implementation of snippet() function.
-*/
-void sqlite3Fts3Snippet(
- sqlite3_context *pCtx, /* SQLite function call context */
- Fts3Cursor *pCsr, /* Cursor object */
- const char *zStart, /* Snippet start text - "<b>" */
- const char *zEnd, /* Snippet end text - "</b>" */
- const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */
- int iCol, /* Extract snippet from this column */
- int nToken /* Approximate number of tokens in snippet */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc = SQLITE_OK;
- int i;
- StrBuffer res = {0, 0, 0};
-
- /* The returned text includes up to four fragments of text extracted from
- ** the data in the current row. The first iteration of the for(...) loop
- ** below attempts to locate a single fragment of text nToken tokens in
- ** size that contains at least one instance of all phrases in the query
- ** expression that appear in the current row. If such a fragment of text
- ** cannot be found, the second iteration of the loop attempts to locate
- ** a pair of fragments, and so on.
- */
- int nSnippet = 0; /* Number of fragments in this snippet */
- SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */
- int nFToken = -1; /* Number of tokens in each fragment */
-
- if( !pCsr->pExpr ){
- sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
- return;
- }
-
- for(nSnippet=1; 1; nSnippet++){
-
- int iSnip; /* Loop counter 0..nSnippet-1 */
- u64 mCovered = 0; /* Bitmask of phrases covered by snippet */
- u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */
-
- if( nToken>=0 ){
- nFToken = (nToken+nSnippet-1) / nSnippet;
- }else{
- nFToken = -1 * nToken;
- }
-
- for(iSnip=0; iSnip<nSnippet; iSnip++){
- int iBestScore = -1; /* Best score of columns checked so far */
- int iRead; /* Used to iterate through columns */
- SnippetFragment *pFragment = &aSnippet[iSnip];
-
- memset(pFragment, 0, sizeof(*pFragment));
-
- /* Loop through all columns of the table being considered for snippets.
- ** If the iCol argument to this function was negative, this means all
- ** columns of the FTS3 table. Otherwise, only column iCol is considered.
- */
- for(iRead=0; iRead<pTab->nColumn; iRead++){
- SnippetFragment sF = {0, 0, 0, 0};
- int iS;
- if( iCol>=0 && iRead!=iCol ) continue;
-
- /* Find the best snippet of nFToken tokens in column iRead. */
- rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
- if( rc!=SQLITE_OK ){
- goto snippet_out;
- }
- if( iS>iBestScore ){
- *pFragment = sF;
- iBestScore = iS;
- }
- }
-
- mCovered |= pFragment->covered;
- }
-
- /* If all query phrases seen by fts3BestSnippet() are present in at least
- ** one of the nSnippet snippet fragments, break out of the loop.
- */
- assert( (mCovered&mSeen)==mCovered );
- if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
- }
-
- assert( nFToken>0 );
-
- for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
- rc = fts3SnippetText(pCsr, &aSnippet[i],
- i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
- );
- }
-
- snippet_out:
- sqlite3Fts3SegmentsClose(pTab);
- if( rc!=SQLITE_OK ){
- sqlite3_result_error_code(pCtx, rc);
- sqlite3_free(res.z);
- }else{
- sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
- }
-}
-
-
-typedef struct TermOffset TermOffset;
-typedef struct TermOffsetCtx TermOffsetCtx;
-
-struct TermOffset {
- char *pList; /* Position-list */
- int iPos; /* Position just read from pList */
- int iOff; /* Offset of this term from read positions */
-};
-
-struct TermOffsetCtx {
- Fts3Cursor *pCsr;
- int iCol; /* Column of table to populate aTerm for */
- int iTerm;
- sqlite3_int64 iDocid;
- TermOffset *aTerm;
-};
-
-/*
-** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
-*/
-static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
- TermOffsetCtx *p = (TermOffsetCtx *)ctx;
- int nTerm; /* Number of tokens in phrase */
- int iTerm; /* For looping through nTerm phrase terms */
- char *pList; /* Pointer to position list for phrase */
- int iPos = 0; /* First position in position-list */
- int rc;
-
- UNUSED_PARAMETER(iPhrase);
- rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
- nTerm = pExpr->pPhrase->nToken;
- if( pList ){
- fts3GetDeltaPosition(&pList, &iPos);
- assert( iPos>=0 );
- }
-
- for(iTerm=0; iTerm<nTerm; iTerm++){
- TermOffset *pT = &p->aTerm[p->iTerm++];
- pT->iOff = nTerm-iTerm-1;
- pT->pList = pList;
- pT->iPos = iPos;
- }
-
- return rc;
-}
-
-/*
-** Implementation of offsets() function.
-*/
-void sqlite3Fts3Offsets(
- sqlite3_context *pCtx, /* SQLite function call context */
- Fts3Cursor *pCsr /* Cursor object */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
- int rc; /* Return Code */
- int nToken; /* Number of tokens in query */
- int iCol; /* Column currently being processed */
- StrBuffer res = {0, 0, 0}; /* Result string */
- TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
-
- if( !pCsr->pExpr ){
- sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
- return;
- }
-
- memset(&sCtx, 0, sizeof(sCtx));
- assert( pCsr->isRequireSeek==0 );
-
- /* Count the number of terms in the query */
- rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
- if( rc!=SQLITE_OK ) goto offsets_out;
-
- /* Allocate the array of TermOffset iterators. */
- sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
- if( 0==sCtx.aTerm ){
- rc = SQLITE_NOMEM;
- goto offsets_out;
- }
- sCtx.iDocid = pCsr->iPrevId;
- sCtx.pCsr = pCsr;
-
- /* Loop through the table columns, appending offset information to
- ** string-buffer res for each column.
- */
- for(iCol=0; iCol<pTab->nColumn; iCol++){
- sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
- const char *ZDUMMY; /* Dummy argument used with xNext() */
- int NDUMMY = 0; /* Dummy argument used with xNext() */
- int iStart = 0;
- int iEnd = 0;
- int iCurrent = 0;
- const char *zDoc;
- int nDoc;
-
- /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
- ** no way that this operation can fail, so the return code from
- ** fts3ExprIterate() can be discarded.
- */
- sCtx.iCol = iCol;
- sCtx.iTerm = 0;
- (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
-
- /* Retreive the text stored in column iCol. If an SQL NULL is stored
- ** in column iCol, jump immediately to the next iteration of the loop.
- ** If an OOM occurs while retrieving the data (this can happen if SQLite
- ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
- ** to the caller.
- */
- zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
- nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
- if( zDoc==0 ){
- if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
- continue;
- }
- rc = SQLITE_NOMEM;
- goto offsets_out;
- }
-
- /* Initialize a tokenizer iterator to iterate through column iCol. */
- rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
- zDoc, nDoc, &pC
- );
- if( rc!=SQLITE_OK ) goto offsets_out;
-
- rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
- while( rc==SQLITE_OK ){
- int i; /* Used to loop through terms */
- int iMinPos = 0x7FFFFFFF; /* Position of next token */
- TermOffset *pTerm = 0; /* TermOffset associated with next token */
-
- for(i=0; i<nToken; i++){
- TermOffset *pT = &sCtx.aTerm[i];
- if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
- iMinPos = pT->iPos-pT->iOff;
- pTerm = pT;
- }
- }
-
- if( !pTerm ){
- /* All offsets for this column have been gathered. */
- rc = SQLITE_DONE;
- }else{
- assert( iCurrent<=iMinPos );
- if( 0==(0xFE&*pTerm->pList) ){
- pTerm->pList = 0;
- }else{
- fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
- }
- while( rc==SQLITE_OK && iCurrent<iMinPos ){
- rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
- }
- if( rc==SQLITE_OK ){
- char aBuffer[64];
- sqlite3_snprintf(sizeof(aBuffer), aBuffer,
- "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
- );
- rc = fts3StringAppend(&res, aBuffer, -1);
- }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
- rc = FTS_CORRUPT_VTAB;
- }
- }
- }
- if( rc==SQLITE_DONE ){
- rc = SQLITE_OK;
- }
-
- pMod->xClose(pC);
- if( rc!=SQLITE_OK ) goto offsets_out;
- }
-
- offsets_out:
- sqlite3_free(sCtx.aTerm);
- assert( rc!=SQLITE_DONE );
- sqlite3Fts3SegmentsClose(pTab);
- if( rc!=SQLITE_OK ){
- sqlite3_result_error_code(pCtx, rc);
- sqlite3_free(res.z);
- }else{
- sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
- }
- return;
-}
-
-/*
-** Implementation of matchinfo() function.
-*/
-void sqlite3Fts3Matchinfo(
- sqlite3_context *pContext, /* Function call context */
- Fts3Cursor *pCsr, /* FTS3 table cursor */
- const char *zArg /* Second arg to matchinfo() function */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc;
- int i;
- const char *zFormat;
-
- if( zArg ){
- for(i=0; zArg[i]; i++){
- char *zErr = 0;
- if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
- sqlite3_result_error(pContext, zErr, -1);
- sqlite3_free(zErr);
- return;
- }
- }
- zFormat = zArg;
- }else{
- zFormat = FTS3_MATCHINFO_DEFAULT;
- }
-
- if( !pCsr->pExpr ){
- sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
- return;
- }
-
- /* Retrieve matchinfo() data. */
- rc = fts3GetMatchinfo(pCsr, zFormat);
- sqlite3Fts3SegmentsClose(pTab);
-
- if( rc!=SQLITE_OK ){
- sqlite3_result_error_code(pContext, rc);
- }else{
- int n = pCsr->nMatchinfo * sizeof(u32);
- sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
- }
-}
-
-#endif
diff --git a/src/libtracker-fts/fts3_term.c b/src/libtracker-fts/fts3_term.c
deleted file mode 100644
index c49d5cb65..000000000
--- a/src/libtracker-fts/fts3_term.c
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
-** 2011 Jan 27
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This file is not part of the production FTS code. It is only used for
-** testing. It contains a virtual table implementation that provides direct
-** access to the full-text index of an FTS table.
-*/
-
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-#ifdef SQLITE_TEST
-
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-
-typedef struct Fts3termTable Fts3termTable;
-typedef struct Fts3termCursor Fts3termCursor;
-
-struct Fts3termTable {
- sqlite3_vtab base; /* Base class used by SQLite core */
- int iIndex; /* Index for Fts3Table.aIndex[] */
- Fts3Table *pFts3Tab;
-};
-
-struct Fts3termCursor {
- sqlite3_vtab_cursor base; /* Base class used by SQLite core */
- Fts3MultiSegReader csr; /* Must be right after "base" */
- Fts3SegFilter filter;
-
- int isEof; /* True if cursor is at EOF */
- char *pNext;
-
- sqlite3_int64 iRowid; /* Current 'rowid' value */
- sqlite3_int64 iDocid; /* Current 'docid' value */
- int iCol; /* Current 'col' value */
- int iPos; /* Current 'pos' value */
-};
-
-/*
-** Schema of the terms table.
-*/
-#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, docid, col, pos)"
-
-/*
-** This function does all the work for both the xConnect and xCreate methods.
-** These tables have no persistent representation of their own, so xConnect
-** and xCreate are identical operations.
-*/
-static int fts3termConnectMethod(
- sqlite3 *db, /* Database connection */
- void *pCtx, /* Non-zero for an fts4prefix table */
- int argc, /* Number of elements in argv array */
- const char * const *argv, /* xCreate/xConnect argument array */
- sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
- char **pzErr /* OUT: sqlite3_malloc'd error message */
-){
- char const *zDb; /* Name of database (e.g. "main") */
- char const *zFts3; /* Name of fts3 table */
- int nDb; /* Result of strlen(zDb) */
- int nFts3; /* Result of strlen(zFts3) */
- int nByte; /* Bytes of space to allocate here */
- int rc; /* value returned by declare_vtab() */
- Fts3termTable *p; /* Virtual table object to return */
- int iIndex = 0;
-
- UNUSED_PARAMETER(pCtx);
- if( argc==5 ){
- iIndex = atoi(argv[4]);
- argc--;
- }
-
- /* The user should specify a single argument - the name of an fts3 table. */
- if( argc!=4 ){
- *pzErr = sqlite3_mprintf(
- "wrong number of arguments to fts4term constructor"
- );
- return SQLITE_ERROR;
- }
-
- zDb = argv[1];
- nDb = (int)strlen(zDb);
- zFts3 = argv[3];
- nFts3 = (int)strlen(zFts3);
-
- rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
- if( rc!=SQLITE_OK ) return rc;
-
- nByte = sizeof(Fts3termTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
- p = (Fts3termTable *)sqlite3_malloc(nByte);
- if( !p ) return SQLITE_NOMEM;
- memset(p, 0, nByte);
-
- p->pFts3Tab = (Fts3Table *)&p[1];
- p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
- p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
- p->pFts3Tab->db = db;
- p->pFts3Tab->nIndex = iIndex+1;
- p->iIndex = iIndex;
-
- memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
- memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
- sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
-
- *ppVtab = (sqlite3_vtab *)p;
- return SQLITE_OK;
-}
-
-/*
-** This function does the work for both the xDisconnect and xDestroy methods.
-** These tables have no persistent representation of their own, so xDisconnect
-** and xDestroy are identical operations.
-*/
-static int fts3termDisconnectMethod(sqlite3_vtab *pVtab){
- Fts3termTable *p = (Fts3termTable *)pVtab;
- Fts3Table *pFts3 = p->pFts3Tab;
- int i;
-
- /* Free any prepared statements held */
- for(i=0; i<SizeofArray(pFts3->aStmt); i++){
- sqlite3_finalize(pFts3->aStmt[i]);
- }
- sqlite3_free(pFts3->zSegmentsTbl);
- sqlite3_free(p);
- return SQLITE_OK;
-}
-
-#define FTS4AUX_EQ_CONSTRAINT 1
-#define FTS4AUX_GE_CONSTRAINT 2
-#define FTS4AUX_LE_CONSTRAINT 4
-
-/*
-** xBestIndex - Analyze a WHERE and ORDER BY clause.
-*/
-static int fts3termBestIndexMethod(
- sqlite3_vtab *pVTab,
- sqlite3_index_info *pInfo
-){
- UNUSED_PARAMETER(pVTab);
-
- /* This vtab naturally does "ORDER BY term, docid, col, pos". */
- if( pInfo->nOrderBy ){
- int i;
- for(i=0; i<pInfo->nOrderBy; i++){
- if( pInfo->aOrderBy[i].iColumn!=i || pInfo->aOrderBy[i].desc ) break;
- }
- if( i==pInfo->nOrderBy ){
- pInfo->orderByConsumed = 1;
- }
- }
-
- return SQLITE_OK;
-}
-
-/*
-** xOpen - Open a cursor.
-*/
-static int fts3termOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
- Fts3termCursor *pCsr; /* Pointer to cursor object to return */
-
- UNUSED_PARAMETER(pVTab);
-
- pCsr = (Fts3termCursor *)sqlite3_malloc(sizeof(Fts3termCursor));
- if( !pCsr ) return SQLITE_NOMEM;
- memset(pCsr, 0, sizeof(Fts3termCursor));
-
- *ppCsr = (sqlite3_vtab_cursor *)pCsr;
- return SQLITE_OK;
-}
-
-/*
-** xClose - Close a cursor.
-*/
-static int fts3termCloseMethod(sqlite3_vtab_cursor *pCursor){
- Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
- Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
-
- sqlite3Fts3SegmentsClose(pFts3);
- sqlite3Fts3SegReaderFinish(&pCsr->csr);
- sqlite3_free(pCsr);
- return SQLITE_OK;
-}
-
-/*
-** xNext - Advance the cursor to the next row, if any.
-*/
-static int fts3termNextMethod(sqlite3_vtab_cursor *pCursor){
- Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
- Fts3Table *pFts3 = ((Fts3termTable *)pCursor->pVtab)->pFts3Tab;
- int rc;
- sqlite3_int64 v;
-
- /* Increment our pretend rowid value. */
- pCsr->iRowid++;
-
- /* Advance to the next term in the full-text index. */
- if( pCsr->csr.aDoclist==0
- || pCsr->pNext>=&pCsr->csr.aDoclist[pCsr->csr.nDoclist-1]
- ){
- rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
- if( rc!=SQLITE_ROW ){
- pCsr->isEof = 1;
- return rc;
- }
-
- pCsr->iCol = 0;
- pCsr->iPos = 0;
- pCsr->iDocid = 0;
- pCsr->pNext = pCsr->csr.aDoclist;
-
- /* Read docid */
- pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &pCsr->iDocid);
- }
-
- pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
- if( v==0 ){
- pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
- pCsr->iDocid += v;
- pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
- pCsr->iCol = 0;
- pCsr->iPos = 0;
- }
-
- if( v==1 ){
- pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
- pCsr->iCol += (int)v;
- pCsr->iPos = 0;
- pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
- }
-
- pCsr->iPos += (int)(v - 2);
-
- return SQLITE_OK;
-}
-
-/*
-** xFilter - Initialize a cursor to point at the start of its data.
-*/
-static int fts3termFilterMethod(
- sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
- int idxNum, /* Strategy index */
- const char *idxStr, /* Unused */
- int nVal, /* Number of elements in apVal */
- sqlite3_value **apVal /* Arguments for the indexing scheme */
-){
- Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
- Fts3termTable *p = (Fts3termTable *)pCursor->pVtab;
- Fts3Table *pFts3 = p->pFts3Tab;
- int rc;
-
- UNUSED_PARAMETER(nVal);
- UNUSED_PARAMETER(idxNum);
- UNUSED_PARAMETER(idxStr);
- UNUSED_PARAMETER(apVal);
-
- assert( idxStr==0 && idxNum==0 );
-
- /* In case this cursor is being reused, close and zero it. */
- testcase(pCsr->filter.zTerm);
- sqlite3Fts3SegReaderFinish(&pCsr->csr);
- memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
-
- pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
- pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
-
- rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
- pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
- );
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
- }
- if( rc==SQLITE_OK ){
- rc = fts3termNextMethod(pCursor);
- }
- return rc;
-}
-
-/*
-** xEof - Return true if the cursor is at EOF, or false otherwise.
-*/
-static int fts3termEofMethod(sqlite3_vtab_cursor *pCursor){
- Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
- return pCsr->isEof;
-}
-
-/*
-** xColumn - Return a column value.
-*/
-static int fts3termColumnMethod(
- sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
- int iCol /* Index of column to read value from */
-){
- Fts3termCursor *p = (Fts3termCursor *)pCursor;
-
- assert( iCol>=0 && iCol<=3 );
- switch( iCol ){
- case 0:
- sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
- break;
- case 1:
- sqlite3_result_int64(pCtx, p->iDocid);
- break;
- case 2:
- sqlite3_result_int64(pCtx, p->iCol);
- break;
- default:
- sqlite3_result_int64(pCtx, p->iPos);
- break;
- }
-
- return SQLITE_OK;
-}
-
-/*
-** xRowid - Return the current rowid for the cursor.
-*/
-static int fts3termRowidMethod(
- sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite_int64 *pRowid /* OUT: Rowid value */
-){
- Fts3termCursor *pCsr = (Fts3termCursor *)pCursor;
- *pRowid = pCsr->iRowid;
- return SQLITE_OK;
-}
-
-/*
-** Register the fts3term module with database connection db. Return SQLITE_OK
-** if successful or an error code if sqlite3_create_module() fails.
-*/
-int sqlite3Fts3InitTerm(sqlite3 *db){
- static const sqlite3_module fts3term_module = {
- 0, /* iVersion */
- fts3termConnectMethod, /* xCreate */
- fts3termConnectMethod, /* xConnect */
- fts3termBestIndexMethod, /* xBestIndex */
- fts3termDisconnectMethod, /* xDisconnect */
- fts3termDisconnectMethod, /* xDestroy */
- fts3termOpenMethod, /* xOpen */
- fts3termCloseMethod, /* xClose */
- fts3termFilterMethod, /* xFilter */
- fts3termNextMethod, /* xNext */
- fts3termEofMethod, /* xEof */
- fts3termColumnMethod, /* xColumn */
- fts3termRowidMethod, /* xRowid */
- 0, /* xUpdate */
- 0, /* xBegin */
- 0, /* xSync */
- 0, /* xCommit */
- 0, /* xRollback */
- 0, /* xFindFunction */
- 0, /* xRename */
- 0, /* xSavepoint */
- 0, /* xRelease */
- 0 /* xRollbackTo */
- };
- int rc; /* Return code */
-
- rc = sqlite3_create_module(db, "fts4term", &fts3term_module, 0);
- return rc;
-}
-
-#endif
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_test.c b/src/libtracker-fts/fts3_test.c
deleted file mode 100644
index 4da0b8f13..000000000
--- a/src/libtracker-fts/fts3_test.c
+++ /dev/null
@@ -1,535 +0,0 @@
-/*
-** 2011 Jun 13
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This file is not part of the production FTS code. It is only used for
-** testing. It contains a Tcl command that can be used to test if a document
-** matches an FTS NEAR expression.
-**
-** As of March 2012, it also contains a version 1 tokenizer used for testing
-** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
-*/
-
-#include <tcl.h>
-#include <string.h>
-#include <assert.h>
-
-#if defined(SQLITE_TEST)
-#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
-
-/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
-#include "fts3Int.h"
-
-#define NM_MAX_TOKEN 12
-
-typedef struct NearPhrase NearPhrase;
-typedef struct NearDocument NearDocument;
-typedef struct NearToken NearToken;
-
-struct NearDocument {
- int nToken; /* Length of token in bytes */
- NearToken *aToken; /* Token array */
-};
-
-struct NearToken {
- int n; /* Length of token in bytes */
- const char *z; /* Pointer to token string */
-};
-
-struct NearPhrase {
- int nNear; /* Preceding NEAR value */
- int nToken; /* Number of tokens in this phrase */
- NearToken aToken[NM_MAX_TOKEN]; /* Array of tokens in this phrase */
-};
-
-static int nm_phrase_match(
- NearPhrase *p,
- NearToken *aToken
-){
- int ii;
-
- for(ii=0; ii<p->nToken; ii++){
- NearToken *pToken = &p->aToken[ii];
- if( pToken->n>0 && pToken->z[pToken->n-1]=='*' ){
- if( aToken[ii].n<(pToken->n-1) ) return 0;
- if( memcmp(aToken[ii].z, pToken->z, pToken->n-1) ) return 0;
- }else{
- if( aToken[ii].n!=pToken->n ) return 0;
- if( memcmp(aToken[ii].z, pToken->z, pToken->n) ) return 0;
- }
- }
-
- return 1;
-}
-
-static int nm_near_chain(
- int iDir, /* Direction to iterate through aPhrase[] */
- NearDocument *pDoc, /* Document to match against */
- int iPos, /* Position at which iPhrase was found */
- int nPhrase, /* Size of phrase array */
- NearPhrase *aPhrase, /* Phrase array */
- int iPhrase /* Index of phrase found */
-){
- int iStart;
- int iStop;
- int ii;
- int nNear;
- int iPhrase2;
- NearPhrase *p;
- NearPhrase *pPrev;
-
- assert( iDir==1 || iDir==-1 );
-
- if( iDir==1 ){
- if( (iPhrase+1)==nPhrase ) return 1;
- nNear = aPhrase[iPhrase+1].nNear;
- }else{
- if( iPhrase==0 ) return 1;
- nNear = aPhrase[iPhrase].nNear;
- }
- pPrev = &aPhrase[iPhrase];
- iPhrase2 = iPhrase+iDir;
- p = &aPhrase[iPhrase2];
-
- iStart = iPos - nNear - p->nToken;
- iStop = iPos + nNear + pPrev->nToken;
-
- if( iStart<0 ) iStart = 0;
- if( iStop > pDoc->nToken - p->nToken ) iStop = pDoc->nToken - p->nToken;
-
- for(ii=iStart; ii<=iStop; ii++){
- if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
- if( nm_near_chain(iDir, pDoc, ii, nPhrase, aPhrase, iPhrase2) ) return 1;
- }
- }
-
- return 0;
-}
-
-static int nm_match_count(
- NearDocument *pDoc, /* Document to match against */
- int nPhrase, /* Size of phrase array */
- NearPhrase *aPhrase, /* Phrase array */
- int iPhrase /* Index of phrase to count matches for */
-){
- int nOcc = 0;
- int ii;
- NearPhrase *p = &aPhrase[iPhrase];
-
- for(ii=0; ii<(pDoc->nToken + 1 - p->nToken); ii++){
- if( nm_phrase_match(p, &pDoc->aToken[ii]) ){
- /* Test forward NEAR chain (i>iPhrase) */
- if( 0==nm_near_chain(1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
-
- /* Test reverse NEAR chain (i<iPhrase) */
- if( 0==nm_near_chain(-1, pDoc, ii, nPhrase, aPhrase, iPhrase) ) continue;
-
- /* This is a real match. Increment the counter. */
- nOcc++;
- }
- }
-
- return nOcc;
-}
-
-/*
-** Tclcmd: fts3_near_match DOCUMENT EXPR ?OPTIONS?
-*/
-static int fts3_near_match_cmd(
- ClientData clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
- int nTotal = 0;
- int rc;
- int ii;
- int nPhrase;
- NearPhrase *aPhrase = 0;
- NearDocument doc = {0, 0};
- Tcl_Obj **apDocToken;
- Tcl_Obj *pRet;
- Tcl_Obj *pPhrasecount = 0;
-
- Tcl_Obj **apExprToken;
- int nExprToken;
-
- UNUSED_PARAMETER(clientData);
-
- /* Must have 3 or more arguments. */
- if( objc<3 || (objc%2)==0 ){
- Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
- rc = TCL_ERROR;
- goto near_match_out;
- }
-
- for(ii=3; ii<objc; ii+=2){
- enum NM_enum { NM_PHRASECOUNTS };
- struct TestnmSubcmd {
- char *zName;
- enum NM_enum eOpt;
- } aOpt[] = {
- { "-phrasecountvar", NM_PHRASECOUNTS },
- { 0, 0 }
- };
- int iOpt;
- if( Tcl_GetIndexFromObjStruct(
- interp, objv[ii], aOpt, sizeof(aOpt[0]), "option", 0, &iOpt)
- ){
- return TCL_ERROR;
- }
-
- switch( aOpt[iOpt].eOpt ){
- case NM_PHRASECOUNTS:
- pPhrasecount = objv[ii+1];
- break;
- }
- }
-
- rc = Tcl_ListObjGetElements(interp, objv[1], &doc.nToken, &apDocToken);
- if( rc!=TCL_OK ) goto near_match_out;
- doc.aToken = (NearToken *)ckalloc(doc.nToken*sizeof(NearToken));
- for(ii=0; ii<doc.nToken; ii++){
- doc.aToken[ii].z = Tcl_GetStringFromObj(apDocToken[ii], &doc.aToken[ii].n);
- }
-
- rc = Tcl_ListObjGetElements(interp, objv[2], &nExprToken, &apExprToken);
- if( rc!=TCL_OK ) goto near_match_out;
-
- nPhrase = (nExprToken + 1) / 2;
- aPhrase = (NearPhrase *)ckalloc(nPhrase * sizeof(NearPhrase));
- memset(aPhrase, 0, nPhrase * sizeof(NearPhrase));
- for(ii=0; ii<nPhrase; ii++){
- Tcl_Obj *pPhrase = apExprToken[ii*2];
- Tcl_Obj **apToken;
- int nToken;
- int jj;
-
- rc = Tcl_ListObjGetElements(interp, pPhrase, &nToken, &apToken);
- if( rc!=TCL_OK ) goto near_match_out;
- if( nToken>NM_MAX_TOKEN ){
- Tcl_AppendResult(interp, "Too many tokens in phrase", 0);
- rc = TCL_ERROR;
- goto near_match_out;
- }
- for(jj=0; jj<nToken; jj++){
- NearToken *pT = &aPhrase[ii].aToken[jj];
- pT->z = Tcl_GetStringFromObj(apToken[jj], &pT->n);
- }
- aPhrase[ii].nToken = nToken;
- }
- for(ii=1; ii<nPhrase; ii++){
- Tcl_Obj *pNear = apExprToken[2*ii-1];
- int nNear;
- rc = Tcl_GetIntFromObj(interp, pNear, &nNear);
- if( rc!=TCL_OK ) goto near_match_out;
- aPhrase[ii].nNear = nNear;
- }
-
- pRet = Tcl_NewObj();
- Tcl_IncrRefCount(pRet);
- for(ii=0; ii<nPhrase; ii++){
- int nOcc = nm_match_count(&doc, nPhrase, aPhrase, ii);
- Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(nOcc));
- nTotal += nOcc;
- }
- if( pPhrasecount ){
- Tcl_ObjSetVar2(interp, pPhrasecount, 0, pRet, 0);
- }
- Tcl_DecrRefCount(pRet);
- Tcl_SetObjResult(interp, Tcl_NewBooleanObj(nTotal>0));
-
- near_match_out:
- ckfree((char *)aPhrase);
- ckfree((char *)doc.aToken);
- return rc;
-}
-
-/*
-** Tclcmd: fts3_configure_incr_load ?CHUNKSIZE THRESHOLD?
-**
-** Normally, FTS uses hard-coded values to determine the minimum doclist
-** size eligible for incremental loading, and the size of the chunks loaded
-** when a doclist is incrementally loaded. This command allows the built-in
-** values to be overridden for testing purposes.
-**
-** If present, the first argument is the chunksize in bytes to load doclists
-** in. The second argument is the minimum doclist size in bytes to use
-** incremental loading with.
-**
-** Whether or not the arguments are present, this command returns a list of
-** two integers - the initial chunksize and threshold when the command is
-** invoked. This can be used to restore the default behaviour after running
-** tests. For example:
-**
-** # Override incr-load settings for testing:
-** set cfg [fts3_configure_incr_load $new_chunksize $new_threshold]
-**
-** .... run tests ....
-**
-** # Restore initial incr-load settings:
-** eval fts3_configure_incr_load $cfg
-*/
-static int fts3_configure_incr_load_cmd(
- ClientData clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
-#ifdef SQLITE_ENABLE_FTS3
- extern int test_fts3_node_chunksize;
- extern int test_fts3_node_chunk_threshold;
- Tcl_Obj *pRet;
-
- if( objc!=1 && objc!=3 ){
- Tcl_WrongNumArgs(interp, 1, objv, "?CHUNKSIZE THRESHOLD?");
- return TCL_ERROR;
- }
-
- pRet = Tcl_NewObj();
- Tcl_IncrRefCount(pRet);
- Tcl_ListObjAppendElement(
- interp, pRet, Tcl_NewIntObj(test_fts3_node_chunksize));
- Tcl_ListObjAppendElement(
- interp, pRet, Tcl_NewIntObj(test_fts3_node_chunk_threshold));
-
- if( objc==3 ){
- int iArg1;
- int iArg2;
- if( Tcl_GetIntFromObj(interp, objv[1], &iArg1)
- || Tcl_GetIntFromObj(interp, objv[2], &iArg2)
- ){
- Tcl_DecrRefCount(pRet);
- return TCL_ERROR;
- }
- test_fts3_node_chunksize = iArg1;
- test_fts3_node_chunk_threshold = iArg2;
- }
-
- Tcl_SetObjResult(interp, pRet);
- Tcl_DecrRefCount(pRet);
-#endif
- UNUSED_PARAMETER(clientData);
- return TCL_OK;
-}
-
-#ifdef SQLITE_ENABLE_FTS3
-/**************************************************************************
-** Beginning of test tokenizer code.
-**
-** For language 0, this tokenizer is similar to the default 'simple'
-** tokenizer. For other languages L, the following:
-**
-** * Odd numbered languages are case-sensitive. Even numbered
-** languages are not.
-**
-** * Language ids 100 or greater are considered an error.
-**
-** The implementation assumes that the input contains only ASCII characters
-** (i.e. those that may be encoded in UTF-8 using a single byte).
-*/
-typedef struct test_tokenizer {
- sqlite3_tokenizer base;
-} test_tokenizer;
-
-typedef struct test_tokenizer_cursor {
- sqlite3_tokenizer_cursor base;
- const char *aInput; /* Input being tokenized */
- int nInput; /* Size of the input in bytes */
- int iInput; /* Current offset in aInput */
- int iToken; /* Index of next token to be returned */
- char *aBuffer; /* Buffer containing current token */
- int nBuffer; /* Number of bytes allocated at pToken */
- int iLangid; /* Configured language id */
-} test_tokenizer_cursor;
-
-static int testTokenizerCreate(
- int argc, const char * const *argv,
- sqlite3_tokenizer **ppTokenizer
-){
- test_tokenizer *pNew;
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
-
- pNew = sqlite3_malloc(sizeof(test_tokenizer));
- if( !pNew ) return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(test_tokenizer));
-
- *ppTokenizer = (sqlite3_tokenizer *)pNew;
- return SQLITE_OK;
-}
-
-static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
- test_tokenizer *p = (test_tokenizer *)pTokenizer;
- sqlite3_free(p);
- return SQLITE_OK;
-}
-
-static int testTokenizerOpen(
- sqlite3_tokenizer *pTokenizer, /* The tokenizer */
- const char *pInput, int nBytes, /* String to be tokenized */
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
-){
- int rc = SQLITE_OK; /* Return code */
- test_tokenizer_cursor *pCsr; /* New cursor object */
-
- UNUSED_PARAMETER(pTokenizer);
-
- pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
- if( pCsr==0 ){
- rc = SQLITE_NOMEM;
- }else{
- memset(pCsr, 0, sizeof(test_tokenizer_cursor));
- pCsr->aInput = pInput;
- if( nBytes<0 ){
- pCsr->nInput = (int)strlen(pInput);
- }else{
- pCsr->nInput = nBytes;
- }
- }
-
- *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
- return rc;
-}
-
-static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
- test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
- sqlite3_free(pCsr->aBuffer);
- sqlite3_free(pCsr);
- return SQLITE_OK;
-}
-
-static int testIsTokenChar(char c){
- return (c>='a' && c<='z') || (c>='A' && c<='Z');
-}
-static int testTolower(char c){
- char ret = c;
- if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
- return ret;
-}
-
-static int testTokenizerNext(
- sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */
- const char **ppToken, /* OUT: *ppToken is the token text */
- int *pnBytes, /* OUT: Number of bytes in token */
- int *piStartOffset, /* OUT: Starting offset of token */
- int *piEndOffset, /* OUT: Ending offset of token */
- int *piPosition /* OUT: Position integer of token */
-){
- test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
- int rc = SQLITE_OK;
- const char *p;
- const char *pEnd;
-
- p = &pCsr->aInput[pCsr->iInput];
- pEnd = &pCsr->aInput[pCsr->nInput];
-
- /* Skip past any white-space */
- assert( p<=pEnd );
- while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
-
- if( p==pEnd ){
- rc = SQLITE_DONE;
- }else{
- /* Advance to the end of the token */
- const char *pToken = p;
- int nToken;
- while( p<pEnd && testIsTokenChar(*p) ) p++;
- nToken = (int)(p-pToken);
-
- /* Copy the token into the buffer */
- if( nToken>pCsr->nBuffer ){
- sqlite3_free(pCsr->aBuffer);
- pCsr->aBuffer = sqlite3_malloc(nToken);
- }
- if( pCsr->aBuffer==0 ){
- rc = SQLITE_NOMEM;
- }else{
- int i;
-
- if( pCsr->iLangid & 0x00000001 ){
- for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
- }else{
- for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
- }
- pCsr->iToken++;
- pCsr->iInput = (int)(p - pCsr->aInput);
-
- *ppToken = pCsr->aBuffer;
- *pnBytes = nToken;
- *piStartOffset = (int)(pToken - pCsr->aInput);
- *piEndOffset = (int)(p - pCsr->aInput);
- *piPosition = pCsr->iToken;
- }
- }
-
- return rc;
-}
-
-static int testTokenizerLanguage(
- sqlite3_tokenizer_cursor *pCursor,
- int iLangid
-){
- int rc = SQLITE_OK;
- test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
- pCsr->iLangid = iLangid;
- if( pCsr->iLangid>=100 ){
- rc = SQLITE_ERROR;
- }
- return rc;
-}
-#endif
-
-static int fts3_test_tokenizer_cmd(
- ClientData clientData,
- Tcl_Interp *interp,
- int objc,
- Tcl_Obj *CONST objv[]
-){
-#ifdef SQLITE_ENABLE_FTS3
- static const sqlite3_tokenizer_module testTokenizerModule = {
- 1,
- testTokenizerCreate,
- testTokenizerDestroy,
- testTokenizerOpen,
- testTokenizerClose,
- testTokenizerNext,
- testTokenizerLanguage
- };
- const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
- if( objc!=1 ){
- Tcl_WrongNumArgs(interp, 1, objv, "");
- return TCL_ERROR;
- }
- Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
- (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
- ));
-#endif
- UNUSED_PARAMETER(clientData);
- return TCL_OK;
-}
-
-/*
-** End of tokenizer code.
-**************************************************************************/
-
-int Sqlitetestfts3_Init(Tcl_Interp *interp){
- Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
- Tcl_CreateObjCommand(interp,
- "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
- );
- Tcl_CreateObjCommand(
- interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
- );
- return TCL_OK;
-}
-#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
-#endif /* ifdef SQLITE_TEST */
diff --git a/src/libtracker-fts/fts3_tokenizer.c b/src/libtracker-fts/fts3_tokenizer.c
deleted file mode 100644
index 8241be81f..000000000
--- a/src/libtracker-fts/fts3_tokenizer.c
+++ /dev/null
@@ -1,488 +0,0 @@
-/*
-** 2007 June 22
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This is part of an SQLite module implementing full-text search.
-** This particular file implements the generic tokenizer interface.
-*/
-
-/*
-** The code in this file is only compiled if:
-**
-** * The FTS3 module is being built as an extension
-** (in which case SQLITE_CORE is not defined), or
-**
-** * The FTS3 module is being built into the core of
-** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
-*/
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <assert.h>
-#include <string.h>
-
-/*
-** Implementation of the SQL scalar function for accessing the underlying
-** hash table. This function may be called as follows:
-**
-** SELECT <function-name>(<key-name>);
-** SELECT <function-name>(<key-name>, <pointer>);
-**
-** where <function-name> is the name passed as the second argument
-** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer').
-**
-** If the <pointer> argument is specified, it must be a blob value
-** containing a pointer to be stored as the hash data corresponding
-** to the string <key-name>. If <pointer> is not specified, then
-** the string <key-name> must already exist in the has table. Otherwise,
-** an error is returned.
-**
-** Whether or not the <pointer> argument is specified, the value returned
-** is a blob containing the pointer stored as the hash data corresponding
-** to string <key-name> (after the hash-table is updated, if applicable).
-*/
-static void scalarFunc(
- sqlite3_context *context,
- int argc,
- sqlite3_value **argv
-){
- Fts3Hash *pHash;
- void *pPtr = 0;
- const unsigned char *zName;
- int nName;
-
- assert( argc==1 || argc==2 );
-
- pHash = (Fts3Hash *)sqlite3_user_data(context);
-
- zName = sqlite3_value_text(argv[0]);
- nName = sqlite3_value_bytes(argv[0])+1;
-
- if( argc==2 ){
- void *pOld;
- int n = sqlite3_value_bytes(argv[1]);
- if( n!=sizeof(pPtr) ){
- sqlite3_result_error(context, "argument type mismatch", -1);
- return;
- }
- pPtr = *(void **)sqlite3_value_blob(argv[1]);
- pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr);
- if( pOld==pPtr ){
- sqlite3_result_error(context, "out of memory", -1);
- return;
- }
- }else{
- pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
- if( !pPtr ){
- char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
- sqlite3_result_error(context, zErr, -1);
- sqlite3_free(zErr);
- return;
- }
- }
-
- sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
-}
-
-int sqlite3Fts3IsIdChar(char c){
- static const char isFtsIdChar[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
- };
- return (c&0x80 || isFtsIdChar[(int)(c)]);
-}
-
-const char *sqlite3Fts3NextToken(const char *zStr, int *pn){
- const char *z1;
- const char *z2 = 0;
-
- /* Find the start of the next token. */
- z1 = zStr;
- while( z2==0 ){
- char c = *z1;
- switch( c ){
- case '\0': return 0; /* No more tokens here */
- case '\'':
- case '"':
- case '`': {
- z2 = z1;
- while( *++z2 && (*z2!=c || *++z2==c) );
- break;
- }
- case '[':
- z2 = &z1[1];
- while( *z2 && z2[0]!=']' ) z2++;
- if( *z2 ) z2++;
- break;
-
- default:
- if( sqlite3Fts3IsIdChar(*z1) ){
- z2 = &z1[1];
- while( sqlite3Fts3IsIdChar(*z2) ) z2++;
- }else{
- z1++;
- }
- }
- }
-
- *pn = (int)(z2-z1);
- return z1;
-}
-
-int sqlite3Fts3InitTokenizer(
- Fts3Hash *pHash, /* Tokenizer hash table */
- const char *zArg, /* Tokenizer name */
- sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */
- char **pzErr /* OUT: Set to malloced error message */
-){
- int rc;
- char *z = (char *)zArg;
- int n = 0;
- char *zCopy;
- char *zEnd; /* Pointer to nul-term of zCopy */
- sqlite3_tokenizer_module *m;
-
- zCopy = sqlite3_mprintf("%s", zArg);
- if( !zCopy ) return SQLITE_NOMEM;
- zEnd = &zCopy[strlen(zCopy)];
-
- z = (char *)sqlite3Fts3NextToken(zCopy, &n);
- z[n] = '\0';
- sqlite3Fts3Dequote(z);
-
- m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
- if( !m ){
- *pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
- rc = SQLITE_ERROR;
- }else{
- char const **aArg = 0;
- int iArg = 0;
- z = &z[n+1];
- while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){
- int nNew = sizeof(char *)*(iArg+1);
- char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew);
- if( !aNew ){
- sqlite3_free(zCopy);
- sqlite3_free((void *)aArg);
- return SQLITE_NOMEM;
- }
- aArg = aNew;
- aArg[iArg++] = z;
- z[n] = '\0';
- sqlite3Fts3Dequote(z);
- z = &z[n+1];
- }
- rc = m->xCreate(iArg, aArg, ppTok);
- assert( rc!=SQLITE_OK || *ppTok );
- if( rc!=SQLITE_OK ){
- *pzErr = sqlite3_mprintf("unknown tokenizer");
- }else{
- (*ppTok)->pModule = m;
- }
- sqlite3_free((void *)aArg);
- }
-
- sqlite3_free(zCopy);
- return rc;
-}
-
-
-#ifdef SQLITE_TEST
-
-#include <tcl.h>
-#include <string.h>
-
-/*
-** Implementation of a special SQL scalar function for testing tokenizers
-** designed to be used in concert with the Tcl testing framework. This
-** function must be called with two or more arguments:
-**
-** SELECT <function-name>(<key-name>, ..., <input-string>);
-**
-** where <function-name> is the name passed as the second argument
-** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
-** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test').
-**
-** The return value is a string that may be interpreted as a Tcl
-** list. For each token in the <input-string>, three elements are
-** added to the returned list. The first is the token position, the
-** second is the token text (folded, stemmed, etc.) and the third is the
-** substring of <input-string> associated with the token. For example,
-** using the built-in "simple" tokenizer:
-**
-** SELECT fts_tokenizer_test('simple', 'I don't see how');
-**
-** will return the string:
-**
-** "{0 i I 1 dont don't 2 see see 3 how how}"
-**
-*/
-static void testFunc(
- sqlite3_context *context,
- int argc,
- sqlite3_value **argv
-){
- Fts3Hash *pHash;
- sqlite3_tokenizer_module *p;
- sqlite3_tokenizer *pTokenizer = 0;
- sqlite3_tokenizer_cursor *pCsr = 0;
-
- const char *zErr = 0;
-
- const char *zName;
- int nName;
- const char *zInput;
- int nInput;
-
- const char *azArg[64];
-
- const char *zToken;
- int nToken = 0;
- int iStart = 0;
- int iEnd = 0;
- int iPos = 0;
- int i;
-
- Tcl_Obj *pRet;
-
- if( argc<2 ){
- sqlite3_result_error(context, "insufficient arguments", -1);
- return;
- }
-
- nName = sqlite3_value_bytes(argv[0]);
- zName = (const char *)sqlite3_value_text(argv[0]);
- nInput = sqlite3_value_bytes(argv[argc-1]);
- zInput = (const char *)sqlite3_value_text(argv[argc-1]);
-
- pHash = (Fts3Hash *)sqlite3_user_data(context);
- p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
-
- if( !p ){
- char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
- sqlite3_result_error(context, zErr, -1);
- sqlite3_free(zErr);
- return;
- }
-
- pRet = Tcl_NewObj();
- Tcl_IncrRefCount(pRet);
-
- for(i=1; i<argc-1; i++){
- azArg[i-1] = (const char *)sqlite3_value_text(argv[i]);
- }
-
- if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){
- zErr = "error in xCreate()";
- goto finish;
- }
- pTokenizer->pModule = p;
- if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
- zErr = "error in xOpen()";
- goto finish;
- }
-
- while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
- Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
- Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
- zToken = &zInput[iStart];
- nToken = iEnd-iStart;
- Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
- }
-
- if( SQLITE_OK!=p->xClose(pCsr) ){
- zErr = "error in xClose()";
- goto finish;
- }
- if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
- zErr = "error in xDestroy()";
- goto finish;
- }
-
-finish:
- if( zErr ){
- sqlite3_result_error(context, zErr, -1);
- }else{
- sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
- }
- Tcl_DecrRefCount(pRet);
-}
-
-static
-int registerTokenizer(
- sqlite3 *db,
- char *zName,
- const sqlite3_tokenizer_module *p
-){
- int rc;
- sqlite3_stmt *pStmt;
- const char zSql[] = "SELECT fts3_tokenizer(?, ?)";
-
- rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
- sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
- sqlite3_step(pStmt);
-
- return sqlite3_finalize(pStmt);
-}
-
-static
-int queryTokenizer(
- sqlite3 *db,
- char *zName,
- const sqlite3_tokenizer_module **pp
-){
- int rc;
- sqlite3_stmt *pStmt;
- const char zSql[] = "SELECT fts3_tokenizer(?)";
-
- *pp = 0;
- rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
- if( SQLITE_ROW==sqlite3_step(pStmt) ){
- if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
- memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
- }
- }
-
- return sqlite3_finalize(pStmt);
-}
-
-void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
-
-/*
-** Implementation of the scalar function fts3_tokenizer_internal_test().
-** This function is used for testing only, it is not included in the
-** build unless SQLITE_TEST is defined.
-**
-** The purpose of this is to test that the fts3_tokenizer() function
-** can be used as designed by the C-code in the queryTokenizer and
-** registerTokenizer() functions above. These two functions are repeated
-** in the README.tokenizer file as an example, so it is important to
-** test them.
-**
-** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar
-** function with no arguments. An assert() will fail if a problem is
-** detected. i.e.:
-**
-** SELECT fts3_tokenizer_internal_test();
-**
-*/
-static void intTestFunc(
- sqlite3_context *context,
- int argc,
- sqlite3_value **argv
-){
- int rc;
- const sqlite3_tokenizer_module *p1;
- const sqlite3_tokenizer_module *p2;
- sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
-
- UNUSED_PARAMETER(argc);
- UNUSED_PARAMETER(argv);
-
- /* Test the query function */
- sqlite3Fts3SimpleTokenizerModule(&p1);
- rc = queryTokenizer(db, "simple", &p2);
- assert( rc==SQLITE_OK );
- assert( p1==p2 );
- rc = queryTokenizer(db, "nosuchtokenizer", &p2);
- assert( rc==SQLITE_ERROR );
- assert( p2==0 );
- assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
-
- /* Test the storage function */
- rc = registerTokenizer(db, "nosuchtokenizer", p1);
- assert( rc==SQLITE_OK );
- rc = queryTokenizer(db, "nosuchtokenizer", &p2);
- assert( rc==SQLITE_OK );
- assert( p2==p1 );
-
- sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
-}
-
-#endif
-
-/*
-** Set up SQL objects in database db used to access the contents of
-** the hash table pointed to by argument pHash. The hash table must
-** been initialised to use string keys, and to take a private copy
-** of the key when a value is inserted. i.e. by a call similar to:
-**
-** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
-**
-** This function adds a scalar function (see header comment above
-** scalarFunc() in this file for details) and, if ENABLE_TABLE is
-** defined at compilation time, a temporary virtual table (see header
-** comment above struct HashTableVtab) to the database schema. Both
-** provide read/write access to the contents of *pHash.
-**
-** The third argument to this function, zName, is used as the name
-** of both the scalar and, if created, the virtual table.
-*/
-int sqlite3Fts3InitHashTable(
- sqlite3 *db,
- Fts3Hash *pHash,
- const char *zName
-){
- int rc = SQLITE_OK;
- void *p = (void *)pHash;
- const int any = SQLITE_ANY;
-
-#ifdef SQLITE_TEST
- char *zTest = 0;
- char *zTest2 = 0;
- void *pdb = (void *)db;
- zTest = sqlite3_mprintf("%s_test", zName);
- zTest2 = sqlite3_mprintf("%s_internal_test", zName);
- if( !zTest || !zTest2 ){
- rc = SQLITE_NOMEM;
- }
-#endif
-
- if( SQLITE_OK==rc ){
- rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0);
- }
- if( SQLITE_OK==rc ){
- rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0);
- }
-#ifdef SQLITE_TEST
- if( SQLITE_OK==rc ){
- rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0);
- }
- if( SQLITE_OK==rc ){
- rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0);
- }
-#endif
-
-#ifdef SQLITE_TEST
- sqlite3_free(zTest);
- sqlite3_free(zTest2);
-#endif
-
- return rc;
-}
-
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_tokenizer.h b/src/libtracker-fts/fts3_tokenizer.h
deleted file mode 100644
index c91c7ed79..000000000
--- a/src/libtracker-fts/fts3_tokenizer.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
-** 2006 July 10
-**
-** The author disclaims copyright to this source code.
-**
-*************************************************************************
-** Defines the interface to tokenizers used by fulltext-search. There
-** are three basic components:
-**
-** sqlite3_tokenizer_module is a singleton defining the tokenizer
-** interface functions. This is essentially the class structure for
-** tokenizers.
-**
-** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
-** including customization information defined at creation time.
-**
-** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
-** tokens from a particular input.
-*/
-#ifndef _FTS3_TOKENIZER_H_
-#define _FTS3_TOKENIZER_H_
-
-/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
-** If tokenizers are to be allowed to call sqlite3_*() functions, then
-** we will need a way to register the API consistently.
-*/
-#include "sqlite3.h"
-
-/*
-** Structures used by the tokenizer interface. When a new tokenizer
-** implementation is registered, the caller provides a pointer to
-** an sqlite3_tokenizer_module containing pointers to the callback
-** functions that make up an implementation.
-**
-** When an fts3 table is created, it passes any arguments passed to
-** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
-** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
-** implementation. The xCreate() function in turn returns an
-** sqlite3_tokenizer structure representing the specific tokenizer to
-** be used for the fts3 table (customized by the tokenizer clause arguments).
-**
-** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
-** method is called. It returns an sqlite3_tokenizer_cursor object
-** that may be used to tokenize a specific input buffer based on
-** the tokenization rules supplied by a specific sqlite3_tokenizer
-** object.
-*/
-typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
-typedef struct sqlite3_tokenizer sqlite3_tokenizer;
-typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
-
-struct sqlite3_tokenizer_module {
-
- /*
- ** Structure version. Should always be set to 0 or 1.
- */
- int iVersion;
-
- /*
- ** Create a new tokenizer. The values in the argv[] array are the
- ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
- ** TABLE statement that created the fts3 table. For example, if
- ** the following SQL is executed:
- **
- ** CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
- **
- ** then argc is set to 2, and the argv[] array contains pointers
- ** to the strings "arg1" and "arg2".
- **
- ** This method should return either SQLITE_OK (0), or an SQLite error
- ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
- ** to point at the newly created tokenizer structure. The generic
- ** sqlite3_tokenizer.pModule variable should not be initialised by
- ** this callback. The caller will do so.
- */
- int (*xCreate)(
- int argc, /* Size of argv array */
- const char *const*argv, /* Tokenizer argument strings */
- sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
- );
-
- /*
- ** Destroy an existing tokenizer. The fts3 module calls this method
- ** exactly once for each successful call to xCreate().
- */
- int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
-
- /*
- ** Create a tokenizer cursor to tokenize an input buffer. The caller
- ** is responsible for ensuring that the input buffer remains valid
- ** until the cursor is closed (using the xClose() method).
- */
- int (*xOpen)(
- sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
- const char *pInput, int nBytes, /* Input buffer */
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
- );
-
- /*
- ** Destroy an existing tokenizer cursor. The fts3 module calls this
- ** method exactly once for each successful call to xOpen().
- */
- int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
-
- /*
- ** Retrieve the next token from the tokenizer cursor pCursor. This
- ** method should either return SQLITE_OK and set the values of the
- ** "OUT" variables identified below, or SQLITE_DONE to indicate that
- ** the end of the buffer has been reached, or an SQLite error code.
- **
- ** *ppToken should be set to point at a buffer containing the
- ** normalized version of the token (i.e. after any case-folding and/or
- ** stemming has been performed). *pnBytes should be set to the length
- ** of this buffer in bytes. The input text that generated the token is
- ** identified by the byte offsets returned in *piStartOffset and
- ** *piEndOffset. *piStartOffset should be set to the index of the first
- ** byte of the token in the input buffer. *piEndOffset should be set
- ** to the index of the first byte just past the end of the token in
- ** the input buffer.
- **
- ** The buffer *ppToken is set to point at is managed by the tokenizer
- ** implementation. It is only required to be valid until the next call
- ** to xNext() or xClose().
- */
- /* TODO(shess) current implementation requires pInput to be
- ** nul-terminated. This should either be fixed, or pInput/nBytes
- ** should be converted to zInput.
- */
- int (*xNext)(
- sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
- const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
- int *piStartOffset, /* OUT: Byte offset of token in input buffer */
- int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
- int *piPosition /* OUT: Number of tokens returned before this one */
- );
-
- /***********************************************************************
- ** Methods below this point are only available if iVersion>=1.
- */
-
- /*
- ** Configure the language id of a tokenizer cursor.
- */
- int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
-};
-
-struct sqlite3_tokenizer {
- const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
- /* Tokenizer implementations will typically add additional fields */
-};
-
-struct sqlite3_tokenizer_cursor {
- sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
- /* Tokenizer implementations will typically add additional fields */
-};
-
-int fts3_global_term_cnt(int iTerm, int iCol);
-int fts3_term_cnt(int iTerm, int iCol);
-
-
-#endif /* _FTS3_TOKENIZER_H_ */
diff --git a/src/libtracker-fts/fts3_tokenizer1.c b/src/libtracker-fts/fts3_tokenizer1.c
deleted file mode 100644
index deea06d92..000000000
--- a/src/libtracker-fts/fts3_tokenizer1.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
-** 2006 Oct 10
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** Implementation of the "simple" full-text-search tokenizer.
-*/
-
-/*
-** The code in this file is only compiled if:
-**
-** * The FTS3 module is being built as an extension
-** (in which case SQLITE_CORE is not defined), or
-**
-** * The FTS3 module is being built into the core of
-** SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
-*/
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "fts3_tokenizer.h"
-
-typedef struct simple_tokenizer {
- sqlite3_tokenizer base;
- char delim[128]; /* flag ASCII delimiters */
-} simple_tokenizer;
-
-typedef struct simple_tokenizer_cursor {
- sqlite3_tokenizer_cursor base;
- const char *pInput; /* input we are tokenizing */
- int nBytes; /* size of the input */
- int iOffset; /* current position in pInput */
- int iToken; /* index of next token to be returned */
- char *pToken; /* storage for current token */
- int nTokenAllocated; /* space allocated to zToken buffer */
-} simple_tokenizer_cursor;
-
-
-static int simpleDelim(simple_tokenizer *t, unsigned char c){
- return c<0x80 && t->delim[c];
-}
-static int fts3_isalnum(int x){
- return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z');
-}
-
-/*
-** Create a new tokenizer instance.
-*/
-static int simpleCreate(
- int argc, const char * const *argv,
- sqlite3_tokenizer **ppTokenizer
-){
- simple_tokenizer *t;
-
- t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
- if( t==NULL ) return SQLITE_NOMEM;
- memset(t, 0, sizeof(*t));
-
- /* TODO(shess) Delimiters need to remain the same from run to run,
- ** else we need to reindex. One solution would be a meta-table to
- ** track such information in the database, then we'd only want this
- ** information on the initial create.
- */
- if( argc>1 ){
- int i, n = (int)strlen(argv[1]);
- for(i=0; i<n; i++){
- unsigned char ch = argv[1][i];
- /* We explicitly don't support UTF-8 delimiters for now. */
- if( ch>=0x80 ){
- sqlite3_free(t);
- return SQLITE_ERROR;
- }
- t->delim[ch] = 1;
- }
- } else {
- /* Mark non-alphanumeric ASCII characters as delimiters */
- int i;
- for(i=1; i<0x80; i++){
- t->delim[i] = !fts3_isalnum(i) ? -1 : 0;
- }
- }
-
- *ppTokenizer = &t->base;
- return SQLITE_OK;
-}
-
-/*
-** Destroy a tokenizer
-*/
-static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
- sqlite3_free(pTokenizer);
- return SQLITE_OK;
-}
-
-/*
-** Prepare to begin tokenizing a particular string. The input
-** string to be tokenized is pInput[0..nBytes-1]. A cursor
-** used to incrementally tokenize this string is returned in
-** *ppCursor.
-*/
-static int simpleOpen(
- sqlite3_tokenizer *pTokenizer, /* The tokenizer */
- const char *pInput, int nBytes, /* String to be tokenized */
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
-){
- simple_tokenizer_cursor *c;
-
- UNUSED_PARAMETER(pTokenizer);
-
- c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
- if( c==NULL ) return SQLITE_NOMEM;
-
- c->pInput = pInput;
- if( pInput==0 ){
- c->nBytes = 0;
- }else if( nBytes<0 ){
- c->nBytes = (int)strlen(pInput);
- }else{
- c->nBytes = nBytes;
- }
- c->iOffset = 0; /* start tokenizing at the beginning */
- c->iToken = 0;
- c->pToken = NULL; /* no space allocated, yet. */
- c->nTokenAllocated = 0;
-
- *ppCursor = &c->base;
- return SQLITE_OK;
-}
-
-/*
-** Close a tokenization cursor previously opened by a call to
-** simpleOpen() above.
-*/
-static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
- simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
- sqlite3_free(c->pToken);
- sqlite3_free(c);
- return SQLITE_OK;
-}
-
-/*
-** Extract the next token from a tokenization cursor. The cursor must
-** have been opened by a prior call to simpleOpen().
-*/
-static int simpleNext(
- sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
- const char **ppToken, /* OUT: *ppToken is the token text */
- int *pnBytes, /* OUT: Number of bytes in token */
- int *piStartOffset, /* OUT: Starting offset of token */
- int *piEndOffset, /* OUT: Ending offset of token */
- int *piPosition /* OUT: Position integer of token */
-){
- simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
- simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
- unsigned char *p = (unsigned char *)c->pInput;
-
- while( c->iOffset<c->nBytes ){
- int iStartOffset;
-
- /* Scan past delimiter characters */
- while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
- c->iOffset++;
- }
-
- /* Count non-delimiter characters. */
- iStartOffset = c->iOffset;
- while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
- c->iOffset++;
- }
-
- if( c->iOffset>iStartOffset ){
- int i, n = c->iOffset-iStartOffset;
- if( n>c->nTokenAllocated ){
- char *pNew;
- c->nTokenAllocated = n+20;
- pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated);
- if( !pNew ) return SQLITE_NOMEM;
- c->pToken = pNew;
- }
- for(i=0; i<n; i++){
- /* TODO(shess) This needs expansion to handle UTF-8
- ** case-insensitivity.
- */
- unsigned char ch = p[iStartOffset+i];
- c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch);
- }
- *ppToken = c->pToken;
- *pnBytes = n;
- *piStartOffset = iStartOffset;
- *piEndOffset = c->iOffset;
- *piPosition = c->iToken++;
-
- return SQLITE_OK;
- }
- }
- return SQLITE_DONE;
-}
-
-/*
-** The set of routines that implement the simple tokenizer
-*/
-static const sqlite3_tokenizer_module simpleTokenizerModule = {
- 0,
- simpleCreate,
- simpleDestroy,
- simpleOpen,
- simpleClose,
- simpleNext,
- 0,
-};
-
-/*
-** Allocate a new simple tokenizer. Return a pointer to the new
-** tokenizer in *ppModule
-*/
-void sqlite3Fts3SimpleTokenizerModule(
- sqlite3_tokenizer_module const**ppModule
-){
- *ppModule = &simpleTokenizerModule;
-}
-
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
diff --git a/src/libtracker-fts/fts3_unicode.c b/src/libtracker-fts/fts3_unicode.c
deleted file mode 100644
index 79941edbb..000000000
--- a/src/libtracker-fts/fts3_unicode.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
-** 2012 May 24
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** Implementation of the "unicode" full-text-search tokenizer.
-*/
-
-#ifdef SQLITE_ENABLE_FTS4_UNICODE61
-
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "fts3_tokenizer.h"
-
-/*
-** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
-** from the sqlite3 source file utf.c. If this file is compiled as part
-** of the amalgamation, they are not required.
-*/
-#ifndef SQLITE_AMALGAMATION
-
-static const unsigned char sqlite3Utf8Trans1[] = {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
- 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
-};
-
-#define READ_UTF8(zIn, zTerm, c) \
- c = *(zIn++); \
- if( c>=0xc0 ){ \
- c = sqlite3Utf8Trans1[c-0xc0]; \
- while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
- c = (c<<6) + (0x3f & *(zIn++)); \
- } \
- if( c<0x80 \
- || (c&0xFFFFF800)==0xD800 \
- || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
- }
-
-#define WRITE_UTF8(zOut, c) { \
- if( c<0x00080 ){ \
- *zOut++ = (u8)(c&0xFF); \
- } \
- else if( c<0x00800 ){ \
- *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \
- *zOut++ = 0x80 + (u8)(c & 0x3F); \
- } \
- else if( c<0x10000 ){ \
- *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \
- *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
- *zOut++ = 0x80 + (u8)(c & 0x3F); \
- }else{ \
- *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \
- *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \
- *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
- *zOut++ = 0x80 + (u8)(c & 0x3F); \
- } \
-}
-
-#endif /* ifndef SQLITE_AMALGAMATION */
-
-typedef struct unicode_tokenizer unicode_tokenizer;
-typedef struct unicode_cursor unicode_cursor;
-
-struct unicode_tokenizer {
- sqlite3_tokenizer base;
- int bRemoveDiacritic;
- int nException;
- int *aiException;
-};
-
-struct unicode_cursor {
- sqlite3_tokenizer_cursor base;
- const unsigned char *aInput; /* Input text being tokenized */
- int nInput; /* Size of aInput[] in bytes */
- int iOff; /* Current offset within aInput[] */
- int iToken; /* Index of next token to be returned */
- char *zToken; /* storage for current token */
- int nAlloc; /* space allocated at zToken */
-};
-
-
-/*
-** Destroy a tokenizer allocated by unicodeCreate().
-*/
-static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
- if( pTokenizer ){
- unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer;
- sqlite3_free(p->aiException);
- sqlite3_free(p);
- }
- return SQLITE_OK;
-}
-
-/*
-** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE
-** statement has specified that the tokenizer for this table shall consider
-** all characters in string zIn/nIn to be separators (if bAlnum==0) or
-** token characters (if bAlnum==1).
-**
-** For each codepoint in the zIn/nIn string, this function checks if the
-** sqlite3FtsUnicodeIsalnum() function already returns the desired result.
-** If so, no action is taken. Otherwise, the codepoint is added to the
-** unicode_tokenizer.aiException[] array. For the purposes of tokenization,
-** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all
-** codepoints in the aiException[] array.
-**
-** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()
-** identifies as a diacritic) occurs in the zIn/nIn string it is ignored.
-** It is not possible to change the behaviour of the tokenizer with respect
-** to these codepoints.
-*/
-static int unicodeAddExceptions(
- unicode_tokenizer *p, /* Tokenizer to add exceptions to */
- int bAlnum, /* Replace Isalnum() return value with this */
- const char *zIn, /* Array of characters to make exceptions */
- int nIn /* Length of z in bytes */
-){
- const unsigned char *z = (const unsigned char *)zIn;
- const unsigned char *zTerm = &z[nIn];
- int iCode;
- int nEntry = 0;
-
- assert( bAlnum==0 || bAlnum==1 );
-
- while( z<zTerm ){
- READ_UTF8(z, zTerm, iCode);
- assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
- if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum
- && sqlite3FtsUnicodeIsdiacritic(iCode)==0
- ){
- nEntry++;
- }
- }
-
- if( nEntry ){
- int *aNew; /* New aiException[] array */
- int nNew; /* Number of valid entries in array aNew[] */
-
- aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int));
- if( aNew==0 ) return SQLITE_NOMEM;
- nNew = p->nException;
-
- z = (const unsigned char *)zIn;
- while( z<zTerm ){
- READ_UTF8(z, zTerm, iCode);
- if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum
- && sqlite3FtsUnicodeIsdiacritic(iCode)==0
- ){
- int i, j;
- for(i=0; i<nNew && aNew[i]<iCode; i++);
- for(j=nNew; j>i; j--) aNew[j] = aNew[j-1];
- aNew[i] = iCode;
- nNew++;
- }
- }
- p->aiException = aNew;
- p->nException = nNew;
- }
-
- return SQLITE_OK;
-}
-
-/*
-** Return true if the p->aiException[] array contains the value iCode.
-*/
-static int unicodeIsException(unicode_tokenizer *p, int iCode){
- if( p->nException>0 ){
- int *a = p->aiException;
- int iLo = 0;
- int iHi = p->nException-1;
-
- while( iHi>=iLo ){
- int iTest = (iHi + iLo) / 2;
- if( iCode==a[iTest] ){
- return 1;
- }else if( iCode>a[iTest] ){
- iLo = iTest+1;
- }else{
- iHi = iTest-1;
- }
- }
- }
-
- return 0;
-}
-
-/*
-** Return true if, for the purposes of tokenization, codepoint iCode is
-** considered a token character (not a separator).
-*/
-static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){
- assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
- return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode);
-}
-
-/*
-** Create a new tokenizer instance.
-*/
-static int unicodeCreate(
- int nArg, /* Size of array argv[] */
- const char * const *azArg, /* Tokenizer creation arguments */
- sqlite3_tokenizer **pp /* OUT: New tokenizer handle */
-){
- unicode_tokenizer *pNew; /* New tokenizer object */
- int i;
- int rc = SQLITE_OK;
-
- pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
- if( pNew==NULL ) return SQLITE_NOMEM;
- memset(pNew, 0, sizeof(unicode_tokenizer));
- pNew->bRemoveDiacritic = 1;
-
- for(i=0; rc==SQLITE_OK && i<nArg; i++){
- const char *z = azArg[i];
- int n = strlen(z);
-
- if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
- pNew->bRemoveDiacritic = 1;
- }
- else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
- pNew->bRemoveDiacritic = 0;
- }
- else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
- rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
- }
- else if( n>=11 && memcmp("separators=", z, 11)==0 ){
- rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
- }
- else{
- /* Unrecognized argument */
- rc = SQLITE_ERROR;
- }
- }
-
- if( rc!=SQLITE_OK ){
- unicodeDestroy((sqlite3_tokenizer *)pNew);
- pNew = 0;
- }
- *pp = (sqlite3_tokenizer *)pNew;
- return rc;
-}
-
-/*
-** Prepare to begin tokenizing a particular string. The input
-** string to be tokenized is pInput[0..nBytes-1]. A cursor
-** used to incrementally tokenize this string is returned in
-** *ppCursor.
-*/
-static int unicodeOpen(
- sqlite3_tokenizer *p, /* The tokenizer */
- const char *aInput, /* Input string */
- int nInput, /* Size of string aInput in bytes */
- sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */
-){
- unicode_cursor *pCsr;
-
- pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
- if( pCsr==0 ){
- return SQLITE_NOMEM;
- }
- memset(pCsr, 0, sizeof(unicode_cursor));
-
- pCsr->aInput = (const unsigned char *)aInput;
- if( aInput==0 ){
- pCsr->nInput = 0;
- }else if( nInput<0 ){
- pCsr->nInput = (int)strlen(aInput);
- }else{
- pCsr->nInput = nInput;
- }
-
- *pp = &pCsr->base;
- UNUSED_PARAMETER(p);
- return SQLITE_OK;
-}
-
-/*
-** Close a tokenization cursor previously opened by a call to
-** simpleOpen() above.
-*/
-static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
- unicode_cursor *pCsr = (unicode_cursor *) pCursor;
- sqlite3_free(pCsr->zToken);
- sqlite3_free(pCsr);
- return SQLITE_OK;
-}
-
-/*
-** Extract the next token from a tokenization cursor. The cursor must
-** have been opened by a prior call to simpleOpen().
-*/
-static int unicodeNext(
- sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */
- const char **paToken, /* OUT: Token text */
- int *pnToken, /* OUT: Number of bytes at *paToken */
- int *piStart, /* OUT: Starting offset of token */
- int *piEnd, /* OUT: Ending offset of token */
- int *piPos /* OUT: Position integer of token */
-){
- unicode_cursor *pCsr = (unicode_cursor *)pC;
- unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
- int iCode;
- char *zOut;
- const unsigned char *z = &pCsr->aInput[pCsr->iOff];
- const unsigned char *zStart = z;
- const unsigned char *zEnd;
- const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
-
- /* Scan past any delimiter characters before the start of the next token.
- ** Return SQLITE_DONE early if this takes us all the way to the end of
- ** the input. */
- while( z<zTerm ){
- READ_UTF8(z, zTerm, iCode);
- if( unicodeIsAlnum(p, iCode) ) break;
- zStart = z;
- }
- if( zStart>=zTerm ) return SQLITE_DONE;
-
- zOut = pCsr->zToken;
- do {
- int iOut;
-
- /* Grow the output buffer if required. */
- if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
- char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64);
- if( !zNew ) return SQLITE_NOMEM;
- zOut = &zNew[zOut - pCsr->zToken];
- pCsr->zToken = zNew;
- pCsr->nAlloc += 64;
- }
-
- /* Write the folded case of the last character read to the output */
- zEnd = z;
- iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic);
- if( iOut ){
- WRITE_UTF8(zOut, iOut);
- }
-
- /* If the cursor is not at EOF, read the next character */
- if( z>=zTerm ) break;
- READ_UTF8(z, zTerm, iCode);
- }while( unicodeIsAlnum(p, iCode)
- || sqlite3FtsUnicodeIsdiacritic(iCode)
- );
-
- /* Set the output variables and return. */
- pCsr->iOff = (z - pCsr->aInput);
- *paToken = pCsr->zToken;
- *pnToken = zOut - pCsr->zToken;
- *piStart = (zStart - pCsr->aInput);
- *piEnd = (zEnd - pCsr->aInput);
- *piPos = pCsr->iToken++;
- return SQLITE_OK;
-}
-
-/*
-** Set *ppModule to a pointer to the sqlite3_tokenizer_module
-** structure for the unicode tokenizer.
-*/
-void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
- static const sqlite3_tokenizer_module module = {
- 0,
- unicodeCreate,
- unicodeDestroy,
- unicodeOpen,
- unicodeClose,
- unicodeNext,
- 0,
- };
- *ppModule = &module;
-}
-
-#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
-#endif /* ifndef SQLITE_ENABLE_FTS4_UNICODE61 */
diff --git a/src/libtracker-fts/fts3_unicode2.c b/src/libtracker-fts/fts3_unicode2.c
deleted file mode 100644
index 3c2456902..000000000
--- a/src/libtracker-fts/fts3_unicode2.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
-** 2012 May 25
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-*/
-
-/*
-** DO NOT EDIT THIS MACHINE GENERATED FILE.
-*/
-
-#if defined(SQLITE_ENABLE_FTS4_UNICODE61)
-#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
-
-#include <assert.h>
-
-/*
-** Return true if the argument corresponds to a unicode codepoint
-** classified as either a letter or a number. Otherwise false.
-**
-** The results are undefined if the value passed to this function
-** is less than zero.
-*/
-int sqlite3FtsUnicodeIsalnum(int c){
- /* Each unsigned integer in the following array corresponds to a contiguous
- ** range of unicode codepoints that are not either letters or numbers (i.e.
- ** codepoints for which this function should return 0).
- **
- ** The most significant 22 bits in each 32-bit value contain the first
- ** codepoint in the range. The least significant 10 bits are used to store
- ** the size of the range (always at least 1). In other words, the value
- ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
- ** C. It is not possible to represent a range larger than 1023 codepoints
- ** using this format.
- */
- const static unsigned int aEntry[] = {
- 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
- 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
- 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
- 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
- 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
- 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
- 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
- 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
- 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
- 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
- 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
- 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
- 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
- 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
- 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
- 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
- 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
- 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
- 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
- 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
- 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
- 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
- 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
- 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
- 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
- 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
- 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
- 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
- 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
- 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
- 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
- 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
- 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
- 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
- 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
- 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
- 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
- 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
- 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
- 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
- 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
- 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
- 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
- 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
- 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
- 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
- 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
- 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
- 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
- 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
- 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
- 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
- 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
- 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
- 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
- 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
- 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
- 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
- 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
- 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
- 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
- 0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810,
- 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023,
- 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807,
- 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405,
- 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E,
- 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01,
- 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01,
- 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016,
- 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004,
- 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004,
- 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5,
- 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01,
- 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401,
- 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064,
- 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F,
- 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009,
- 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014,
- 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001,
- 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018,
- 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401,
- 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001,
- 0x43FFF401,
- };
- static const unsigned int aAscii[4] = {
- 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
- };
-
- if( c<128 ){
- return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
- }else if( c<(1<<22) ){
- unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
- int iRes;
- int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
- int iLo = 0;
- while( iHi>=iLo ){
- int iTest = (iHi + iLo) / 2;
- if( key >= aEntry[iTest] ){
- iRes = iTest;
- iLo = iTest+1;
- }else{
- iHi = iTest-1;
- }
- }
- assert( aEntry[0]<key );
- assert( key>=aEntry[iRes] );
- return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
- }
- return 1;
-}
-
-
-/*
-** If the argument is a codepoint corresponding to a lowercase letter
-** in the ASCII range with a diacritic added, return the codepoint
-** of the ASCII letter only. For example, if passed 235 - "LATIN
-** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
-** E"). The resuls of passing a codepoint that corresponds to an
-** uppercase letter are undefined.
-*/
-static int remove_diacritic(int c){
- unsigned short aDia[] = {
- 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
- 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
- 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
- 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
- 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
- 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
- 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
- 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
- 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
- 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
- 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
- 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
- 62924, 63050, 63082, 63274, 63390,
- };
- char aChar[] = {
- '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
- 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
- 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
- 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
- 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
- '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
- 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
- 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
- 'e', 'i', 'o', 'u', 'y',
- };
-
- unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
- int iRes = 0;
- int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
- int iLo = 0;
- while( iHi>=iLo ){
- int iTest = (iHi + iLo) / 2;
- if( key >= aDia[iTest] ){
- iRes = iTest;
- iLo = iTest+1;
- }else{
- iHi = iTest-1;
- }
- }
- assert( key>=aDia[iRes] );
- return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
-};
-
-
-/*
-** Return true if the argument interpreted as a unicode codepoint
-** is a diacritical modifier character.
-*/
-int sqlite3FtsUnicodeIsdiacritic(int c){
- unsigned int mask0 = 0x08029FDF;
- unsigned int mask1 = 0x000361F8;
- if( c<768 || c>817 ) return 0;
- return (c < 768+32) ?
- (mask0 & (1 << (c-768))) :
- (mask1 & (1 << (c-768-32)));
-}
-
-
-/*
-** Interpret the argument as a unicode codepoint. If the codepoint
-** is an upper case character that has a lower case equivalent,
-** return the codepoint corresponding to the lower case version.
-** Otherwise, return a copy of the argument.
-**
-** The results are undefined if the value passed to this function
-** is less than zero.
-*/
-int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
- /* Each entry in the following array defines a rule for folding a range
- ** of codepoints to lower case. The rule applies to a range of nRange
- ** codepoints starting at codepoint iCode.
- **
- ** If the least significant bit in flags is clear, then the rule applies
- ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
- ** need to be folded). Or, if it is set, then the rule only applies to
- ** every second codepoint in the range, starting with codepoint C.
- **
- ** The 7 most significant bits in flags are an index into the aiOff[]
- ** array. If a specific codepoint C does require folding, then its lower
- ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
- **
- ** The contents of this array are generated by parsing the CaseFolding.txt
- ** file distributed as part of the "Unicode Character Database". See
- ** http://www.unicode.org for details.
- */
- static const struct TableEntry {
- unsigned short iCode;
- unsigned char flags;
- unsigned char nRange;
- } aEntry[] = {
- {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
- {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
- {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
- {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
- {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
- {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
- {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
- {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
- {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
- {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
- {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
- {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
- {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
- {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
- {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
- {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
- {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
- {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
- {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
- {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
- {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
- {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
- {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
- {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
- {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
- {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
- {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
- {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
- {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
- {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
- {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
- {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
- {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
- {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
- {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
- {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
- {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
- {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
- {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
- {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
- {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
- {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
- {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
- {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
- {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
- {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
- {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
- {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
- {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
- {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
- {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
- {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
- {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
- {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
- {65313, 14, 26},
- };
- static const unsigned short aiOff[] = {
- 1, 2, 8, 15, 16, 26, 28, 32,
- 37, 38, 40, 48, 63, 64, 69, 71,
- 79, 80, 116, 202, 203, 205, 206, 207,
- 209, 210, 211, 213, 214, 217, 218, 219,
- 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
- 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
- 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
- 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
- 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
- 65514, 65521, 65527, 65528, 65529,
- };
-
- int ret = c;
-
- assert( c>=0 );
- assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
-
- if( c<128 ){
- if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
- }else if( c<65536 ){
- int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
- int iLo = 0;
- int iRes = -1;
-
- while( iHi>=iLo ){
- int iTest = (iHi + iLo) / 2;
- int cmp = (c - aEntry[iTest].iCode);
- if( cmp>=0 ){
- iRes = iTest;
- iLo = iTest+1;
- }else{
- iHi = iTest-1;
- }
- }
- assert( iRes<0 || c>=aEntry[iRes].iCode );
-
- if( iRes>=0 ){
- const struct TableEntry *p = &aEntry[iRes];
- if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
- ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
- assert( ret>0 );
- }
- }
-
- if( bRemoveDiacritic ) ret = remove_diacritic(ret);
- }
-
- else if( c>=66560 && c<66600 ){
- ret = c + 40;
- }
-
- return ret;
-}
-#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
-#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */
diff --git a/src/libtracker-fts/fts3_write.c b/src/libtracker-fts/fts3_write.c
deleted file mode 100644
index bda7fbbe1..000000000
--- a/src/libtracker-fts/fts3_write.c
+++ /dev/null
@@ -1,5402 +0,0 @@
-/*
-** 2009 Oct 23
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This file is part of the SQLite FTS3 extension module. Specifically,
-** this file contains code to insert, update and delete rows from FTS3
-** tables. It also contains code to merge FTS3 b-tree segments. Some
-** of the sub-routines used to merge segments are also used by the query
-** code in fts3.c.
-*/
-
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <string.h>
-#include <assert.h>
-#include <stdlib.h>
-
-
-#define FTS_MAX_APPENDABLE_HEIGHT 16
-
-/*
-** When full-text index nodes are loaded from disk, the buffer that they
-** are loaded into has the following number of bytes of padding at the end
-** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer
-** of 920 bytes is allocated for it.
-**
-** This means that if we have a pointer into a buffer containing node data,
-** it is always safe to read up to two varints from it without risking an
-** overread, even if the node data is corrupted.
-*/
-#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2)
-
-/*
-** Under certain circumstances, b-tree nodes (doclists) can be loaded into
-** memory incrementally instead of all at once. This can be a big performance
-** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext()
-** method before retrieving all query results (as may happen, for example,
-** if a query has a LIMIT clause).
-**
-** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD
-** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes.
-** The code is written so that the hard lower-limit for each of these values
-** is 1. Clearly such small values would be inefficient, but can be useful
-** for testing purposes.
-**
-** If this module is built with SQLITE_TEST defined, these constants may
-** be overridden at runtime for testing purposes. File fts3_test.c contains
-** a Tcl interface to read and write the values.
-*/
-#ifdef SQLITE_TEST
-int test_fts3_node_chunksize = (4*1024);
-int test_fts3_node_chunk_threshold = (4*1024)*4;
-# define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize
-# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold
-#else
-# define FTS3_NODE_CHUNKSIZE (4*1024)
-# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4)
-#endif
-
-/*
-** The two values that may be meaningfully bound to the :1 parameter in
-** statements SQL_REPLACE_STAT and SQL_SELECT_STAT.
-*/
-#define FTS_STAT_DOCTOTAL 0
-#define FTS_STAT_INCRMERGEHINT 1
-#define FTS_STAT_AUTOINCRMERGE 2
-
-/*
-** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic
-** and incremental merge operation that takes place. This is used for
-** debugging FTS only, it should not usually be turned on in production
-** systems.
-*/
-#ifdef FTS3_LOG_MERGES
-static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){
- sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel);
-}
-#else
-#define fts3LogMerge(x, y)
-#endif
-
-
-typedef struct PendingList PendingList;
-typedef struct SegmentNode SegmentNode;
-typedef struct SegmentWriter SegmentWriter;
-
-/*
-** An instance of the following data structure is used to build doclists
-** incrementally. See function fts3PendingListAppend() for details.
-*/
-struct PendingList {
- int nData;
- char *aData;
- int nSpace;
- sqlite3_int64 iLastDocid;
- sqlite3_int64 iLastCol;
- sqlite3_int64 iLastPos;
-};
-
-
-/*
-** Each cursor has a (possibly empty) linked list of the following objects.
-*/
-struct Fts3DeferredToken {
- Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */
- int iCol; /* Column token must occur in */
- Fts3DeferredToken *pNext; /* Next in list of deferred tokens */
- PendingList *pList; /* Doclist is assembled here */
-};
-
-/*
-** An instance of this structure is used to iterate through the terms on
-** a contiguous set of segment b-tree leaf nodes. Although the details of
-** this structure are only manipulated by code in this file, opaque handles
-** of type Fts3SegReader* are also used by code in fts3.c to iterate through
-** terms when querying the full-text index. See functions:
-**
-** sqlite3Fts3SegReaderNew()
-** sqlite3Fts3SegReaderFree()
-** sqlite3Fts3SegReaderIterate()
-**
-** Methods used to manipulate Fts3SegReader structures:
-**
-** fts3SegReaderNext()
-** fts3SegReaderFirstDocid()
-** fts3SegReaderNextDocid()
-*/
-struct Fts3SegReader {
- int iIdx; /* Index within level, or 0x7FFFFFFF for PT */
- u8 bLookup; /* True for a lookup only */
- u8 rootOnly; /* True for a root-only reader */
-
- sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */
- sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */
- sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */
- sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */
-
- char *aNode; /* Pointer to node data (or NULL) */
- int nNode; /* Size of buffer at aNode (or 0) */
- int nPopulate; /* If >0, bytes of buffer aNode[] loaded */
- sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */
-
- Fts3HashElem **ppNextElem;
-
- /* Variables set by fts3SegReaderNext(). These may be read directly
- ** by the caller. They are valid from the time SegmentReaderNew() returns
- ** until SegmentReaderNext() returns something other than SQLITE_OK
- ** (i.e. SQLITE_DONE).
- */
- int nTerm; /* Number of bytes in current term */
- char *zTerm; /* Pointer to current term */
- int nTermAlloc; /* Allocated size of zTerm buffer */
- char *aDoclist; /* Pointer to doclist of current entry */
- int nDoclist; /* Size of doclist in current entry */
-
- /* The following variables are used by fts3SegReaderNextDocid() to iterate
- ** through the current doclist (aDoclist/nDoclist).
- */
- char *pOffsetList;
- int nOffsetList; /* For descending pending seg-readers only */
- sqlite3_int64 iDocid;
-};
-
-#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)
-#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0)
-
-/*
-** An instance of this structure is used to create a segment b-tree in the
-** database. The internal details of this type are only accessed by the
-** following functions:
-**
-** fts3SegWriterAdd()
-** fts3SegWriterFlush()
-** fts3SegWriterFree()
-*/
-struct SegmentWriter {
- SegmentNode *pTree; /* Pointer to interior tree structure */
- sqlite3_int64 iFirst; /* First slot in %_segments written */
- sqlite3_int64 iFree; /* Next free slot in %_segments */
- char *zTerm; /* Pointer to previous term buffer */
- int nTerm; /* Number of bytes in zTerm */
- int nMalloc; /* Size of malloc'd buffer at zMalloc */
- char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
- int nSize; /* Size of allocation at aData */
- int nData; /* Bytes of data in aData */
- char *aData; /* Pointer to block from malloc() */
-};
-
-/*
-** Type SegmentNode is used by the following three functions to create
-** the interior part of the segment b+-tree structures (everything except
-** the leaf nodes). These functions and type are only ever used by code
-** within the fts3SegWriterXXX() family of functions described above.
-**
-** fts3NodeAddTerm()
-** fts3NodeWrite()
-** fts3NodeFree()
-**
-** When a b+tree is written to the database (either as a result of a merge
-** or the pending-terms table being flushed), leaves are written into the
-** database file as soon as they are completely populated. The interior of
-** the tree is assembled in memory and written out only once all leaves have
-** been populated and stored. This is Ok, as the b+-tree fanout is usually
-** very large, meaning that the interior of the tree consumes relatively
-** little memory.
-*/
-struct SegmentNode {
- SegmentNode *pParent; /* Parent node (or NULL for root node) */
- SegmentNode *pRight; /* Pointer to right-sibling */
- SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */
- int nEntry; /* Number of terms written to node so far */
- char *zTerm; /* Pointer to previous term buffer */
- int nTerm; /* Number of bytes in zTerm */
- int nMalloc; /* Size of malloc'd buffer at zMalloc */
- char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
- int nData; /* Bytes of valid data so far */
- char *aData; /* Node data */
-};
-
-/*
-** Valid values for the second argument to fts3SqlStmt().
-*/
-#define SQL_DELETE_CONTENT 0
-#define SQL_IS_EMPTY 1
-#define SQL_DELETE_ALL_CONTENT 2
-#define SQL_DELETE_ALL_SEGMENTS 3
-#define SQL_DELETE_ALL_SEGDIR 4
-#define SQL_DELETE_ALL_DOCSIZE 5
-#define SQL_DELETE_ALL_STAT 6
-#define SQL_SELECT_CONTENT_BY_ROWID 7
-#define SQL_NEXT_SEGMENT_INDEX 8
-#define SQL_INSERT_SEGMENTS 9
-#define SQL_NEXT_SEGMENTS_ID 10
-#define SQL_INSERT_SEGDIR 11
-#define SQL_SELECT_LEVEL 12
-#define SQL_SELECT_LEVEL_RANGE 13
-#define SQL_SELECT_LEVEL_COUNT 14
-#define SQL_SELECT_SEGDIR_MAX_LEVEL 15
-#define SQL_DELETE_SEGDIR_LEVEL 16
-#define SQL_DELETE_SEGMENTS_RANGE 17
-#define SQL_CONTENT_INSERT 18
-#define SQL_DELETE_DOCSIZE 19
-#define SQL_REPLACE_DOCSIZE 20
-#define SQL_SELECT_DOCSIZE 21
-#define SQL_SELECT_STAT 22
-#define SQL_REPLACE_STAT 23
-
-#define SQL_SELECT_ALL_PREFIX_LEVEL 24
-#define SQL_DELETE_ALL_TERMS_SEGDIR 25
-#define SQL_DELETE_SEGDIR_RANGE 26
-#define SQL_SELECT_ALL_LANGID 27
-#define SQL_FIND_MERGE_LEVEL 28
-#define SQL_MAX_LEAF_NODE_ESTIMATE 29
-#define SQL_DELETE_SEGDIR_ENTRY 30
-#define SQL_SHIFT_SEGDIR_ENTRY 31
-#define SQL_SELECT_SEGDIR 32
-#define SQL_CHOMP_SEGDIR 33
-#define SQL_SEGMENT_IS_APPENDABLE 34
-#define SQL_SELECT_INDEXES 35
-#define SQL_SELECT_MXLEVEL 36
-
-/*
-** This function is used to obtain an SQLite prepared statement handle
-** for the statement identified by the second argument. If successful,
-** *pp is set to the requested statement handle and SQLITE_OK returned.
-** Otherwise, an SQLite error code is returned and *pp is set to 0.
-**
-** If argument apVal is not NULL, then it must point to an array with
-** at least as many entries as the requested statement has bound
-** parameters. The values are bound to the statements parameters before
-** returning.
-*/
-static int fts3SqlStmt(
- Fts3Table *p, /* Virtual table handle */
- int eStmt, /* One of the SQL_XXX constants above */
- sqlite3_stmt **pp, /* OUT: Statement handle */
- sqlite3_value **apVal /* Values to bind to statement */
-){
- const char *azSql[] = {
-/* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?",
-/* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)",
-/* 2 */ "DELETE FROM %Q.'%q_content'",
-/* 3 */ "DELETE FROM %Q.'%q_segments'",
-/* 4 */ "DELETE FROM %Q.'%q_segdir'",
-/* 5 */ "DELETE FROM %Q.'%q_docsize'",
-/* 6 */ "DELETE FROM %Q.'%q_stat'",
-/* 7 */ "SELECT %s WHERE rowid=?",
-/* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1",
-/* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
-/* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)",
-/* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)",
-
- /* Return segments in order from oldest to newest.*/
-/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
- "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC",
-/* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
- "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?"
- "ORDER BY level DESC, idx ASC",
-
-/* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?",
-/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
-
-/* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?",
-/* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?",
-/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)",
-/* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?",
-/* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",
-/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
-/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?",
-/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)",
-/* 24 */ "",
-/* 25 */ "",
-
-/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
-/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",
-
-/* This statement is used to determine which level to read the input from
-** when performing an incremental merge. It returns the absolute level number
-** of the oldest level in the db that contains at least ? segments. Or,
-** if no level in the FTS index contains more than ? segments, the statement
-** returns zero rows. */
-/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?"
- " ORDER BY (level %% 1024) ASC LIMIT 1",
-
-/* Estimate the upper limit on the number of leaf nodes in a new segment
-** created by merging the oldest :2 segments from absolute level :1. See
-** function sqlite3Fts3Incrmerge() for details. */
-/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) "
- " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?",
-
-/* SQL_DELETE_SEGDIR_ENTRY
-** Delete the %_segdir entry on absolute level :1 with index :2. */
-/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
-
-/* SQL_SHIFT_SEGDIR_ENTRY
-** Modify the idx value for the segment with idx=:3 on absolute level :2
-** to :1. */
-/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?",
-
-/* SQL_SELECT_SEGDIR
-** Read a single entry from the %_segdir table. The entry from absolute
-** level :1 with index value :2. */
-/* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
- "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
-
-/* SQL_CHOMP_SEGDIR
-** Update the start_block (:1) and root (:2) fields of the %_segdir
-** entry located on absolute level :3 with index :4. */
-/* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?"
- "WHERE level = ? AND idx = ?",
-
-/* SQL_SEGMENT_IS_APPENDABLE
-** Return a single row if the segment with end_block=? is appendable. Or
-** no rows otherwise. */
-/* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL",
-
-/* SQL_SELECT_INDEXES
-** Return the list of valid segment indexes for absolute level ? */
-/* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC",
-
-/* SQL_SELECT_MXLEVEL
-** Return the largest relative level in the FTS index or indexes. */
-/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'"
- };
- int rc = SQLITE_OK;
- sqlite3_stmt *pStmt;
-
- assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
- assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
-
- pStmt = p->aStmt[eStmt];
- if( !pStmt ){
- char *zSql;
- if( eStmt==SQL_CONTENT_INSERT ){
- zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist);
- }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){
- zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist);
- }else{
- zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName);
- }
- if( !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL);
- sqlite3_free(zSql);
- assert( rc==SQLITE_OK || pStmt==0 );
- p->aStmt[eStmt] = pStmt;
- }
- }
- if( apVal ){
- int i;
- int nParam = sqlite3_bind_parameter_count(pStmt);
- for(i=0; rc==SQLITE_OK && i<nParam; i++){
- rc = sqlite3_bind_value(pStmt, i+1, apVal[i]);
- }
- }
- *pp = pStmt;
- return rc;
-}
-
-
-static int fts3SelectDocsize(
- Fts3Table *pTab, /* FTS3 table handle */
- sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */
- sqlite3_stmt **ppStmt /* OUT: Statement handle */
-){
- sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */
- int rc; /* Return code */
-
- rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pStmt, 1, iDocid);
- rc = sqlite3_step(pStmt);
- if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){
- rc = sqlite3_reset(pStmt);
- if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
- pStmt = 0;
- }else{
- rc = SQLITE_OK;
- }
- }
-
- *ppStmt = pStmt;
- return rc;
-}
-
-int sqlite3Fts3SelectDoctotal(
- Fts3Table *pTab, /* Fts3 table handle */
- sqlite3_stmt **ppStmt /* OUT: Statement handle */
-){
- sqlite3_stmt *pStmt = 0;
- int rc;
- rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
- if( sqlite3_step(pStmt)!=SQLITE_ROW
- || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB
- ){
- rc = sqlite3_reset(pStmt);
- if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
- pStmt = 0;
- }
- }
- *ppStmt = pStmt;
- return rc;
-}
-
-int sqlite3Fts3SelectDocsize(
- Fts3Table *pTab, /* Fts3 table handle */
- sqlite3_int64 iDocid, /* Docid to read size data for */
- sqlite3_stmt **ppStmt /* OUT: Statement handle */
-){
- return fts3SelectDocsize(pTab, iDocid, ppStmt);
-}
-
-/*
-** Similar to fts3SqlStmt(). Except, after binding the parameters in
-** array apVal[] to the SQL statement identified by eStmt, the statement
-** is executed.
-**
-** Returns SQLITE_OK if the statement is successfully executed, or an
-** SQLite error code otherwise.
-*/
-static void fts3SqlExec(
- int *pRC, /* Result code */
- Fts3Table *p, /* The FTS3 table */
- int eStmt, /* Index of statement to evaluate */
- sqlite3_value **apVal /* Parameters to bind */
-){
- sqlite3_stmt *pStmt;
- int rc;
- if( *pRC ) return;
- rc = fts3SqlStmt(p, eStmt, &pStmt, apVal);
- if( rc==SQLITE_OK ){
- sqlite3_step(pStmt);
- rc = sqlite3_reset(pStmt);
- }
- *pRC = rc;
-}
-
-
-/*
-** This function ensures that the caller has obtained a shared-cache
-** table-lock on the %_content table. This is required before reading
-** data from the fts3 table. If this lock is not acquired first, then
-** the caller may end up holding read-locks on the %_segments and %_segdir
-** tables, but no read-lock on the %_content table. If this happens
-** a second connection will be able to write to the fts3 table, but
-** attempting to commit those writes might return SQLITE_LOCKED or
-** SQLITE_LOCKED_SHAREDCACHE (because the commit attempts to obtain
-** write-locks on the %_segments and %_segdir ** tables).
-**
-** We try to avoid this because if FTS3 returns any error when committing
-** a transaction, the whole transaction will be rolled back. And this is
-** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can
-** still happen if the user reads data directly from the %_segments or
-** %_segdir tables instead of going through FTS3 though.
-**
-** This reasoning does not apply to a content=xxx table.
-*/
-int sqlite3Fts3ReadLock(Fts3Table *p){
- int rc; /* Return code */
- sqlite3_stmt *pStmt; /* Statement used to obtain lock */
-
- if( p->zContentTbl==0 ){
- rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_null(pStmt, 1);
- sqlite3_step(pStmt);
- rc = sqlite3_reset(pStmt);
- }
- }else{
- rc = SQLITE_OK;
- }
-
- return rc;
-}
-
-/*
-** FTS maintains a separate indexes for each language-id (a 32-bit integer).
-** Within each language id, a separate index is maintained to store the
-** document terms, and each configured prefix size (configured the FTS
-** "prefix=" option). And each index consists of multiple levels ("relative
-** levels").
-**
-** All three of these values (the language id, the specific index and the
-** level within the index) are encoded in 64-bit integer values stored
-** in the %_segdir table on disk. This function is used to convert three
-** separate component values into the single 64-bit integer value that
-** can be used to query the %_segdir table.
-**
-** Specifically, each language-id/index combination is allocated 1024
-** 64-bit integer level values ("absolute levels"). The main terms index
-** for language-id 0 is allocate values 0-1023. The first prefix index
-** (if any) for language-id 0 is allocated values 1024-2047. And so on.
-** Language 1 indexes are allocated immediately following language 0.
-**
-** So, for a system with nPrefix prefix indexes configured, the block of
-** absolute levels that corresponds to language-id iLangid and index
-** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024).
-*/
-static sqlite3_int64 getAbsoluteLevel(
- Fts3Table *p, /* FTS3 table handle */
- int iLangid, /* Language id */
- int iIndex, /* Index in p->aIndex[] */
- int iLevel /* Level of segments */
-){
- sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */
- assert( iLangid>=0 );
- assert( p->nIndex>0 );
- assert( iIndex>=0 && iIndex<p->nIndex );
-
- iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
- return iBase + iLevel;
-}
-
-/*
-** Set *ppStmt to a statement handle that may be used to iterate through
-** all rows in the %_segdir table, from oldest to newest. If successful,
-** return SQLITE_OK. If an error occurs while preparing the statement,
-** return an SQLite error code.
-**
-** There is only ever one instance of this SQL statement compiled for
-** each FTS3 table.
-**
-** The statement returns the following columns from the %_segdir table:
-**
-** 0: idx
-** 1: start_block
-** 2: leaves_end_block
-** 3: end_block
-** 4: root
-*/
-int sqlite3Fts3AllSegdirs(
- Fts3Table *p, /* FTS3 table */
- int iLangid, /* Language being queried */
- int iIndex, /* Index for p->aIndex[] */
- int iLevel, /* Level to select (relative level) */
- sqlite3_stmt **ppStmt /* OUT: Compiled statement */
-){
- int rc;
- sqlite3_stmt *pStmt = 0;
-
- assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 );
- assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
- assert( iIndex>=0 && iIndex<p->nIndex );
-
- if( iLevel<0 ){
- /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
- rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
- sqlite3_bind_int64(pStmt, 2,
- getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
- );
- }
- }else{
- /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
- rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
- }
- }
- *ppStmt = pStmt;
- return rc;
-}
-
-
-/*
-** Append a single varint to a PendingList buffer. SQLITE_OK is returned
-** if successful, or an SQLite error code otherwise.
-**
-** This function also serves to allocate the PendingList structure itself.
-** For example, to create a new PendingList structure containing two
-** varints:
-**
-** PendingList *p = 0;
-** fts3PendingListAppendVarint(&p, 1);
-** fts3PendingListAppendVarint(&p, 2);
-*/
-static int fts3PendingListAppendVarint(
- PendingList **pp, /* IN/OUT: Pointer to PendingList struct */
- sqlite3_int64 i /* Value to append to data */
-){
- PendingList *p = *pp;
-
- /* Allocate or grow the PendingList as required. */
- if( !p ){
- p = sqlite3_malloc(sizeof(*p) + 100);
- if( !p ){
- return SQLITE_NOMEM;
- }
- p->nSpace = 100;
- p->aData = (char *)&p[1];
- p->nData = 0;
- }
- else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){
- int nNew = p->nSpace * 2;
- p = sqlite3_realloc(p, sizeof(*p) + nNew);
- if( !p ){
- sqlite3_free(*pp);
- *pp = 0;
- return SQLITE_NOMEM;
- }
- p->nSpace = nNew;
- p->aData = (char *)&p[1];
- }
-
- /* Append the new serialized varint to the end of the list. */
- p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i);
- p->aData[p->nData] = '\0';
- *pp = p;
- return SQLITE_OK;
-}
-
-/*
-** Add a docid/column/position entry to a PendingList structure. Non-zero
-** is returned if the structure is sqlite3_realloced as part of adding
-** the entry. Otherwise, zero.
-**
-** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning.
-** Zero is always returned in this case. Otherwise, if no OOM error occurs,
-** it is set to SQLITE_OK.
-*/
-static int fts3PendingListAppend(
- PendingList **pp, /* IN/OUT: PendingList structure */
- sqlite3_int64 iDocid, /* Docid for entry to add */
- sqlite3_int64 iCol, /* Column for entry to add */
- sqlite3_int64 iPos, /* Position of term for entry to add */
- int *pRc /* OUT: Return code */
-){
- PendingList *p = *pp;
- int rc = SQLITE_OK;
-
- assert( !p || p->iLastDocid<=iDocid );
-
- if( !p || p->iLastDocid!=iDocid ){
- sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0);
- if( p ){
- assert( p->nData<p->nSpace );
- assert( p->aData[p->nData]==0 );
- p->nData++;
- }
- if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){
- goto pendinglistappend_out;
- }
- p->iLastCol = -1;
- p->iLastPos = 0;
- p->iLastDocid = iDocid;
- }
- if( iCol>0 && p->iLastCol!=iCol ){
- if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1))
- || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol))
- ){
- goto pendinglistappend_out;
- }
- p->iLastCol = iCol;
- p->iLastPos = 0;
- }
- if( iCol>=0 ){
- assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) );
- rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos);
- if( rc==SQLITE_OK ){
- p->iLastPos = iPos;
- }
- }
-
- pendinglistappend_out:
- *pRc = rc;
- if( p!=*pp ){
- *pp = p;
- return 1;
- }
- return 0;
-}
-
-/*
-** Free a PendingList object allocated by fts3PendingListAppend().
-*/
-static void fts3PendingListDelete(PendingList *pList){
- sqlite3_free(pList);
-}
-
-/*
-** Add an entry to one of the pending-terms hash tables.
-*/
-static int fts3PendingTermsAddOne(
- Fts3Table *p,
- int iCol,
- int iPos,
- Fts3Hash *pHash, /* Pending terms hash table to add entry to */
- const char *zToken,
- int nToken
-){
- PendingList *pList;
- int rc = SQLITE_OK;
-
- pList = (PendingList *)fts3HashFind(pHash, zToken, nToken);
- if( pList ){
- p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
- }
- if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
- if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){
- /* Malloc failed while inserting the new entry. This can only
- ** happen if there was no previous entry for this token.
- */
- assert( 0==fts3HashFind(pHash, zToken, nToken) );
- sqlite3_free(pList);
- rc = SQLITE_NOMEM;
- }
- }
- if( rc==SQLITE_OK ){
- p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
- }
- return rc;
-}
-
-/*
-** Tokenize the nul-terminated string zText and add all tokens to the
-** pending-terms hash-table. The docid used is that currently stored in
-** p->iPrevDocid, and the column is specified by argument iCol.
-**
-** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
-*/
-static int fts3PendingTermsAdd(
- Fts3Table *p, /* Table into which text will be inserted */
- int iLangid, /* Language id to use */
- const char *zText, /* Text of document to be inserted */
- int iCol, /* Column into which text is being inserted */
- u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */
-){
- int rc;
- int iStart = 0;
- int iEnd = 0;
- int iPos = 0;
- int nWord = 0;
-
- char const *zToken;
- int nToken = 0;
-
- sqlite3_tokenizer *pTokenizer = p->pTokenizer;
- sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
- sqlite3_tokenizer_cursor *pCsr;
- int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
- const char**,int*,int*,int*,int*);
-
- assert( pTokenizer && pModule );
-
- /* If the user has inserted a NULL value, this function may be called with
- ** zText==0. In this case, add zero token entries to the hash table and
- ** return early. */
- if( zText==0 ){
- *pnWord = 0;
- return SQLITE_OK;
- }
-
- rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- xNext = pModule->xNext;
- while( SQLITE_OK==rc
- && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
- ){
- int i;
- if( iPos>=nWord ) nWord = iPos+1;
-
- /* Positions cannot be negative; we use -1 as a terminator internally.
- ** Tokens must have a non-zero length.
- */
- if( iPos<0 || !zToken || nToken<=0 ){
- rc = SQLITE_ERROR;
- break;
- }
-
- /* Add the term to the terms index */
- rc = fts3PendingTermsAddOne(
- p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken
- );
-
- /* Add the term to each of the prefix indexes that it is not too
- ** short for. */
- for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){
- struct Fts3Index *pIndex = &p->aIndex[i];
- if( nToken<pIndex->nPrefix ) continue;
- rc = fts3PendingTermsAddOne(
- p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix
- );
- }
- }
-
- pModule->xClose(pCsr);
- *pnWord += nWord;
- return (rc==SQLITE_DONE ? SQLITE_OK : rc);
-}
-
-/*
-** Calling this function indicates that subsequent calls to
-** fts3PendingTermsAdd() are to add term/position-list pairs for the
-** contents of the document with docid iDocid.
-*/
-static int fts3PendingTermsDocid(
- Fts3Table *p, /* Full-text table handle */
- int iLangid, /* Language id of row being written */
- sqlite_int64 iDocid /* Docid of row being written */
-){
- assert( iLangid>=0 );
-
- /* TODO(shess) Explore whether partially flushing the buffer on
- ** forced-flush would provide better performance. I suspect that if
- ** we ordered the doclists by size and flushed the largest until the
- ** buffer was half empty, that would let the less frequent terms
- ** generate longer doclists.
- */
- if( iDocid<=p->iPrevDocid
- || p->iPrevLangid!=iLangid
- || p->nPendingData>p->nMaxPendingData
- ){
- int rc = sqlite3Fts3PendingTermsFlush(p);
- if( rc!=SQLITE_OK ) return rc;
- }
- p->iPrevDocid = iDocid;
- p->iPrevLangid = iLangid;
- return SQLITE_OK;
-}
-
-/*
-** Discard the contents of the pending-terms hash tables.
-*/
-void sqlite3Fts3PendingTermsClear(Fts3Table *p){
- int i;
- for(i=0; i<p->nIndex; i++){
- Fts3HashElem *pElem;
- Fts3Hash *pHash = &p->aIndex[i].hPending;
- for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){
- PendingList *pList = (PendingList *)fts3HashData(pElem);
- fts3PendingListDelete(pList);
- }
- fts3HashClear(pHash);
- }
- p->nPendingData = 0;
-}
-
-/*
-** This function is called by the xUpdate() method as part of an INSERT
-** operation. It adds entries for each term in the new record to the
-** pendingTerms hash table.
-**
-** Argument apVal is the same as the similarly named argument passed to
-** fts3InsertData(). Parameter iDocid is the docid of the new row.
-*/
-static int fts3InsertTerms(
- Fts3Table *p,
- int iLangid,
- sqlite3_value **apVal,
- u32 *aSz
-){
- int i; /* Iterator variable */
- for(i=2; i<p->nColumn+2; i++){
- const char *zText = (const char *)sqlite3_value_text(apVal[i]);
- int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
- }
- return SQLITE_OK;
-}
-
-/*
-** This function is called by the xUpdate() method for an INSERT operation.
-** The apVal parameter is passed a copy of the apVal argument passed by
-** SQLite to the xUpdate() method. i.e:
-**
-** apVal[0] Not used for INSERT.
-** apVal[1] rowid
-** apVal[2] Left-most user-defined column
-** ...
-** apVal[p->nColumn+1] Right-most user-defined column
-** apVal[p->nColumn+2] Hidden column with same name as table
-** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
-** apVal[p->nColumn+4] Hidden languageid column
-*/
-static int fts3InsertData(
- Fts3Table *p, /* Full-text table */
- sqlite3_value **apVal, /* Array of values to insert */
- sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */
-){
- int rc; /* Return code */
- sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */
-
- if( p->zContentTbl ){
- sqlite3_value *pRowid = apVal[p->nColumn+3];
- if( sqlite3_value_type(pRowid)==SQLITE_NULL ){
- pRowid = apVal[1];
- }
- if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){
- return SQLITE_CONSTRAINT;
- }
- *piDocid = sqlite3_value_int64(pRowid);
- return SQLITE_OK;
- }
-
- /* Locate the statement handle used to insert data into the %_content
- ** table. The SQL for this statement is:
- **
- ** INSERT INTO %_content VALUES(?, ?, ?, ...)
- **
- ** The statement features N '?' variables, where N is the number of user
- ** defined columns in the FTS3 table, plus one for the docid field.
- */
- rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
- if( rc==SQLITE_OK && p->zLanguageid ){
- rc = sqlite3_bind_int(
- pContentInsert, p->nColumn+2,
- sqlite3_value_int(apVal[p->nColumn+4])
- );
- }
- if( rc!=SQLITE_OK ) return rc;
-
- /* There is a quirk here. The users INSERT statement may have specified
- ** a value for the "rowid" field, for the "docid" field, or for both.
- ** Which is a problem, since "rowid" and "docid" are aliases for the
- ** same value. For example:
- **
- ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);
- **
- ** In FTS3, this is an error. It is an error to specify non-NULL values
- ** for both docid and some other rowid alias.
- */
- if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){
- if( SQLITE_NULL==sqlite3_value_type(apVal[0])
- && SQLITE_NULL!=sqlite3_value_type(apVal[1])
- ){
- /* A rowid/docid conflict. */
- return SQLITE_ERROR;
- }
- rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]);
- if( rc!=SQLITE_OK ) return rc;
- }
-
- /* Execute the statement to insert the record. Set *piDocid to the
- ** new docid value.
- */
- sqlite3_step(pContentInsert);
- rc = sqlite3_reset(pContentInsert);
-
- *piDocid = sqlite3_last_insert_rowid(p->db);
- return rc;
-}
-
-
-
-/*
-** Remove all data from the FTS3 table. Clear the hash table containing
-** pending terms.
-*/
-static int fts3DeleteAll(Fts3Table *p, int bContent){
- int rc = SQLITE_OK; /* Return code */
-
- /* Discard the contents of the pending-terms hash table. */
- sqlite3Fts3PendingTermsClear(p);
-
- /* Delete everything from the shadow tables. Except, leave %_content as
- ** is if bContent is false. */
- assert( p->zContentTbl==0 || bContent==0 );
- if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
- fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
- fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
- if( p->bHasDocsize ){
- fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
- }
- if( p->bHasStat ){
- fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
- }
- return rc;
-}
-
-/*
-**
-*/
-static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
- int iLangid = 0;
- if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
- return iLangid;
-}
-
-/*
-** The first element in the apVal[] array is assumed to contain the docid
-** (an integer) of a row about to be deleted. Remove all terms from the
-** full-text index.
-*/
-static void fts3DeleteTerms(
- int *pRC, /* Result code */
- Fts3Table *p, /* The FTS table to delete from */
- sqlite3_value *pRowid, /* The docid to be deleted */
- u32 *aSz, /* Sizes of deleted document written here */
- int *pbFound /* OUT: Set to true if row really does exist */
-){
- int rc;
- sqlite3_stmt *pSelect;
-
- assert( *pbFound==0 );
- if( *pRC ) return;
- rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
- if( rc==SQLITE_OK ){
- if( SQLITE_ROW==sqlite3_step(pSelect) ){
- int i;
- int iLangid = langidFromSelect(p, pSelect);
- rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
- for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
- const char *zText = (const char *)sqlite3_column_text(pSelect, i);
- rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
- aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
- }
- if( rc!=SQLITE_OK ){
- sqlite3_reset(pSelect);
- *pRC = rc;
- return;
- }
- *pbFound = 1;
- }
- rc = sqlite3_reset(pSelect);
- }else{
- sqlite3_reset(pSelect);
- }
- *pRC = rc;
-}
-
-/*
-** Forward declaration to account for the circular dependency between
-** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
-*/
-static int fts3SegmentMerge(Fts3Table *, int, int, int);
-
-/*
-** This function allocates a new level iLevel index in the segdir table.
-** Usually, indexes are allocated within a level sequentially starting
-** with 0, so the allocated index is one greater than the value returned
-** by:
-**
-** SELECT max(idx) FROM %_segdir WHERE level = :iLevel
-**
-** However, if there are already FTS3_MERGE_COUNT indexes at the requested
-** level, they are merged into a single level (iLevel+1) segment and the
-** allocated index is 0.
-**
-** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
-** returned. Otherwise, an SQLite error code is returned.
-*/
-static int fts3AllocateSegdirIdx(
- Fts3Table *p,
- int iLangid, /* Language id */
- int iIndex, /* Index for p->aIndex */
- int iLevel,
- int *piIdx
-){
- int rc; /* Return Code */
- sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
- int iNext = 0; /* Result of query pNextIdx */
-
- assert( iLangid>=0 );
- assert( p->nIndex>=1 );
-
- /* Set variable iNext to the next available segdir index at level iLevel. */
- rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(
- pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
- );
- if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
- iNext = sqlite3_column_int(pNextIdx, 0);
- }
- rc = sqlite3_reset(pNextIdx);
- }
-
- if( rc==SQLITE_OK ){
- /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
- ** full, merge all segments in level iLevel into a single iLevel+1
- ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
- ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
- */
- if( iNext>=FTS3_MERGE_COUNT ){
- fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
- rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
- *piIdx = 0;
- }else{
- *piIdx = iNext;
- }
- }
-
- return rc;
-}
-
-/*
-** The %_segments table is declared as follows:
-**
-** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB)
-**
-** This function reads data from a single row of the %_segments table. The
-** specific row is identified by the iBlockid parameter. If paBlob is not
-** NULL, then a buffer is allocated using sqlite3_malloc() and populated
-** with the contents of the blob stored in the "block" column of the
-** identified table row is. Whether or not paBlob is NULL, *pnBlob is set
-** to the size of the blob in bytes before returning.
-**
-** If an error occurs, or the table does not contain the specified row,
-** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If
-** paBlob is non-NULL, then it is the responsibility of the caller to
-** eventually free the returned buffer.
-**
-** This function may leave an open sqlite3_blob* handle in the
-** Fts3Table.pSegments variable. This handle is reused by subsequent calls
-** to this function. The handle may be closed by calling the
-** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy
-** performance improvement, but the blob handle should always be closed
-** before control is returned to the user (to prevent a lock being held
-** on the database file for longer than necessary). Thus, any virtual table
-** method (xFilter etc.) that may directly or indirectly call this function
-** must call sqlite3Fts3SegmentsClose() before returning.
-*/
-int sqlite3Fts3ReadBlock(
- Fts3Table *p, /* FTS3 table handle */
- sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */
- char **paBlob, /* OUT: Blob data in malloc'd buffer */
- int *pnBlob, /* OUT: Size of blob data */
- int *pnLoad /* OUT: Bytes actually loaded */
-){
- int rc; /* Return code */
-
- /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */
- assert( pnBlob );
-
- if( p->pSegments ){
- rc = sqlite3_blob_reopen(p->pSegments, iBlockid);
- }else{
- if( 0==p->zSegmentsTbl ){
- p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName);
- if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM;
- }
- rc = sqlite3_blob_open(
- p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments
- );
- }
-
- if( rc==SQLITE_OK ){
- int nByte = sqlite3_blob_bytes(p->pSegments);
- *pnBlob = nByte;
- if( paBlob ){
- char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING);
- if( !aByte ){
- rc = SQLITE_NOMEM;
- }else{
- if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){
- nByte = FTS3_NODE_CHUNKSIZE;
- *pnLoad = nByte;
- }
- rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0);
- memset(&aByte[nByte], 0, FTS3_NODE_PADDING);
- if( rc!=SQLITE_OK ){
- sqlite3_free(aByte);
- aByte = 0;
- }
- }
- *paBlob = aByte;
- }
- }
-
- return rc;
-}
-
-/*
-** Close the blob handle at p->pSegments, if it is open. See comments above
-** the sqlite3Fts3ReadBlock() function for details.
-*/
-void sqlite3Fts3SegmentsClose(Fts3Table *p){
- sqlite3_blob_close(p->pSegments);
- p->pSegments = 0;
-}
-
-static int fts3SegReaderIncrRead(Fts3SegReader *pReader){
- int nRead; /* Number of bytes to read */
- int rc; /* Return code */
-
- nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE);
- rc = sqlite3_blob_read(
- pReader->pBlob,
- &pReader->aNode[pReader->nPopulate],
- nRead,
- pReader->nPopulate
- );
-
- if( rc==SQLITE_OK ){
- pReader->nPopulate += nRead;
- memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING);
- if( pReader->nPopulate==pReader->nNode ){
- sqlite3_blob_close(pReader->pBlob);
- pReader->pBlob = 0;
- pReader->nPopulate = 0;
- }
- }
- return rc;
-}
-
-static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){
- int rc = SQLITE_OK;
- assert( !pReader->pBlob
- || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode])
- );
- while( pReader->pBlob && rc==SQLITE_OK
- && (pFrom - pReader->aNode + nByte)>pReader->nPopulate
- ){
- rc = fts3SegReaderIncrRead(pReader);
- }
- return rc;
-}
-
-/*
-** Set an Fts3SegReader cursor to point at EOF.
-*/
-static void fts3SegReaderSetEof(Fts3SegReader *pSeg){
- if( !fts3SegReaderIsRootOnly(pSeg) ){
- sqlite3_free(pSeg->aNode);
- sqlite3_blob_close(pSeg->pBlob);
- pSeg->pBlob = 0;
- }
- pSeg->aNode = 0;
-}
-
-/*
-** Move the iterator passed as the first argument to the next term in the
-** segment. If successful, SQLITE_OK is returned. If there is no next term,
-** SQLITE_DONE. Otherwise, an SQLite error code.
-*/
-static int fts3SegReaderNext(
- Fts3Table *p,
- Fts3SegReader *pReader,
- int bIncr
-){
- int rc; /* Return code of various sub-routines */
- char *pNext; /* Cursor variable */
- int nPrefix; /* Number of bytes in term prefix */
- int nSuffix; /* Number of bytes in term suffix */
-
- if( !pReader->aDoclist ){
- pNext = pReader->aNode;
- }else{
- pNext = &pReader->aDoclist[pReader->nDoclist];
- }
-
- if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
-
- if( fts3SegReaderIsPending(pReader) ){
- Fts3HashElem *pElem = *(pReader->ppNextElem);
- if( pElem==0 ){
- pReader->aNode = 0;
- }else{
- PendingList *pList = (PendingList *)fts3HashData(pElem);
- pReader->zTerm = (char *)fts3HashKey(pElem);
- pReader->nTerm = fts3HashKeysize(pElem);
- pReader->nNode = pReader->nDoclist = pList->nData + 1;
- pReader->aNode = pReader->aDoclist = pList->aData;
- pReader->ppNextElem++;
- assert( pReader->aNode );
- }
- return SQLITE_OK;
- }
-
- fts3SegReaderSetEof(pReader);
-
- /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf
- ** blocks have already been traversed. */
- assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock );
- if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){
- return SQLITE_OK;
- }
-
- rc = sqlite3Fts3ReadBlock(
- p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode,
- (bIncr ? &pReader->nPopulate : 0)
- );
- if( rc!=SQLITE_OK ) return rc;
- assert( pReader->pBlob==0 );
- if( bIncr && pReader->nPopulate<pReader->nNode ){
- pReader->pBlob = p->pSegments;
- p->pSegments = 0;
- }
- pNext = pReader->aNode;
- }
-
- assert( !fts3SegReaderIsPending(pReader) );
-
- rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2);
- if( rc!=SQLITE_OK ) return rc;
-
- /* Because of the FTS3_NODE_PADDING bytes of padding, the following is
- ** safe (no risk of overread) even if the node data is corrupted. */
- pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
- pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);
- if( nPrefix<0 || nSuffix<=0
- || &pNext[nSuffix]>&pReader->aNode[pReader->nNode]
- ){
- return FTS_CORRUPT_VTAB;
- }
-
- if( nPrefix+nSuffix>pReader->nTermAlloc ){
- int nNew = (nPrefix+nSuffix)*2;
- char *zNew = sqlite3_realloc(pReader->zTerm, nNew);
- if( !zNew ){
- return SQLITE_NOMEM;
- }
- pReader->zTerm = zNew;
- pReader->nTermAlloc = nNew;
- }
-
- rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX);
- if( rc!=SQLITE_OK ) return rc;
-
- memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix);
- pReader->nTerm = nPrefix+nSuffix;
- pNext += nSuffix;
- pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist);
- pReader->aDoclist = pNext;
- pReader->pOffsetList = 0;
-
- /* Check that the doclist does not appear to extend past the end of the
- ** b-tree node. And that the final byte of the doclist is 0x00. If either
- ** of these statements is untrue, then the data structure is corrupt.
- */
- if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode]
- || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1])
- ){
- return FTS_CORRUPT_VTAB;
- }
- return SQLITE_OK;
-}
-
-/*
-** Set the SegReader to point to the first docid in the doclist associated
-** with the current term.
-*/
-static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){
- int rc = SQLITE_OK;
- assert( pReader->aDoclist );
- assert( !pReader->pOffsetList );
- if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){
- u8 bEof = 0;
- pReader->iDocid = 0;
- pReader->nOffsetList = 0;
- sqlite3Fts3DoclistPrev(0,
- pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList,
- &pReader->iDocid, &pReader->nOffsetList, &bEof
- );
- }else{
- rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX);
- if( rc==SQLITE_OK ){
- int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid);
- pReader->pOffsetList = &pReader->aDoclist[n];
- }
- }
- return rc;
-}
-
-/*
-** Advance the SegReader to point to the next docid in the doclist
-** associated with the current term.
-**
-** If arguments ppOffsetList and pnOffsetList are not NULL, then
-** *ppOffsetList is set to point to the first column-offset list
-** in the doclist entry (i.e. immediately past the docid varint).
-** *pnOffsetList is set to the length of the set of column-offset
-** lists, not including the nul-terminator byte. For example:
-*/
-static int fts3SegReaderNextDocid(
- Fts3Table *pTab,
- Fts3SegReader *pReader, /* Reader to advance to next docid */
- char **ppOffsetList, /* OUT: Pointer to current position-list */
- int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */
-){
- int rc = SQLITE_OK;
- char *p = pReader->pOffsetList;
- char c = 0;
-
- assert( p );
-
- if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){
- /* A pending-terms seg-reader for an FTS4 table that uses order=desc.
- ** Pending-terms doclists are always built up in ascending order, so
- ** we have to iterate through them backwards here. */
- u8 bEof = 0;
- if( ppOffsetList ){
- *ppOffsetList = pReader->pOffsetList;
- *pnOffsetList = pReader->nOffsetList - 1;
- }
- sqlite3Fts3DoclistPrev(0,
- pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid,
- &pReader->nOffsetList, &bEof
- );
- if( bEof ){
- pReader->pOffsetList = 0;
- }else{
- pReader->pOffsetList = p;
- }
- }else{
- char *pEnd = &pReader->aDoclist[pReader->nDoclist];
-
- /* Pointer p currently points at the first byte of an offset list. The
- ** following block advances it to point one byte past the end of
- ** the same offset list. */
- while( 1 ){
-
- /* The following line of code (and the "p++" below the while() loop) is
- ** normally all that is required to move pointer p to the desired
- ** position. The exception is if this node is being loaded from disk
- ** incrementally and pointer "p" now points to the first byte passed
- ** the populated part of pReader->aNode[].
- */
- while( *p | c ) c = *p++ & 0x80;
- assert( *p==0 );
-
- if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break;
- rc = fts3SegReaderIncrRead(pReader);
- if( rc!=SQLITE_OK ) return rc;
- }
- p++;
-
- /* If required, populate the output variables with a pointer to and the
- ** size of the previous offset-list.
- */
- if( ppOffsetList ){
- *ppOffsetList = pReader->pOffsetList;
- *pnOffsetList = (int)(p - pReader->pOffsetList - 1);
- }
-
- while( p<pEnd && *p==0 ) p++;
-
- /* If there are no more entries in the doclist, set pOffsetList to
- ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and
- ** Fts3SegReader.pOffsetList to point to the next offset list before
- ** returning.
- */
- if( p>=pEnd ){
- pReader->pOffsetList = 0;
- }else{
- rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX);
- if( rc==SQLITE_OK ){
- sqlite3_int64 iDelta;
- pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta);
- if( pTab->bDescIdx ){
- pReader->iDocid -= iDelta;
- }else{
- pReader->iDocid += iDelta;
- }
- }
- }
- }
-
- return SQLITE_OK;
-}
-
-
-int sqlite3Fts3MsrOvfl(
- Fts3Cursor *pCsr,
- Fts3MultiSegReader *pMsr,
- int *pnOvfl
-){
- Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
- int nOvfl = 0;
- int ii;
- int rc = SQLITE_OK;
- int pgsz = p->nPgsz;
-
- assert( p->bFts4 );
- assert( pgsz>0 );
-
- for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){
- Fts3SegReader *pReader = pMsr->apSegment[ii];
- if( !fts3SegReaderIsPending(pReader)
- && !fts3SegReaderIsRootOnly(pReader)
- ){
- sqlite3_int64 jj;
- for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){
- int nBlob;
- rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0);
- if( rc!=SQLITE_OK ) break;
- if( (nBlob+35)>pgsz ){
- nOvfl += (nBlob + 34)/pgsz;
- }
- }
- }
- }
- *pnOvfl = nOvfl;
- return rc;
-}
-
-/*
-** Free all allocations associated with the iterator passed as the
-** second argument.
-*/
-void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){
- if( pReader && !fts3SegReaderIsPending(pReader) ){
- sqlite3_free(pReader->zTerm);
- if( !fts3SegReaderIsRootOnly(pReader) ){
- sqlite3_free(pReader->aNode);
- sqlite3_blob_close(pReader->pBlob);
- }
- }
- sqlite3_free(pReader);
-}
-
-/*
-** Allocate a new SegReader object.
-*/
-int sqlite3Fts3SegReaderNew(
- int iAge, /* Segment "age". */
- int bLookup, /* True for a lookup only */
- sqlite3_int64 iStartLeaf, /* First leaf to traverse */
- sqlite3_int64 iEndLeaf, /* Final leaf to traverse */
- sqlite3_int64 iEndBlock, /* Final block of segment */
- const char *zRoot, /* Buffer containing root node */
- int nRoot, /* Size of buffer containing root node */
- Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */
-){
- Fts3SegReader *pReader; /* Newly allocated SegReader object */
- int nExtra = 0; /* Bytes to allocate segment root node */
-
- assert( iStartLeaf<=iEndLeaf );
- if( iStartLeaf==0 ){
- nExtra = nRoot + FTS3_NODE_PADDING;
- }
-
- pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
- if( !pReader ){
- return SQLITE_NOMEM;
- }
- memset(pReader, 0, sizeof(Fts3SegReader));
- pReader->iIdx = iAge;
- pReader->bLookup = bLookup!=0;
- pReader->iStartBlock = iStartLeaf;
- pReader->iLeafEndBlock = iEndLeaf;
- pReader->iEndBlock = iEndBlock;
-
- if( nExtra ){
- /* The entire segment is stored in the root node. */
- pReader->aNode = (char *)&pReader[1];
- pReader->rootOnly = 1;
- pReader->nNode = nRoot;
- memcpy(pReader->aNode, zRoot, nRoot);
- memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING);
- }else{
- pReader->iCurrentBlock = iStartLeaf-1;
- }
- *ppReader = pReader;
- return SQLITE_OK;
-}
-
-/*
-** This is a comparison function used as a qsort() callback when sorting
-** an array of pending terms by term. This occurs as part of flushing
-** the contents of the pending-terms hash table to the database.
-*/
-static int fts3CompareElemByTerm(const void *lhs, const void *rhs){
- char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
- char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
- int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
- int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs);
-
- int n = (n1<n2 ? n1 : n2);
- int c = memcmp(z1, z2, n);
- if( c==0 ){
- c = n1 - n2;
- }
- return c;
-}
-
-/*
-** This function is used to allocate an Fts3SegReader that iterates through
-** a subset of the terms stored in the Fts3Table.pendingTerms array.
-**
-** If the isPrefixIter parameter is zero, then the returned SegReader iterates
-** through each term in the pending-terms table. Or, if isPrefixIter is
-** non-zero, it iterates through each term and its prefixes. For example, if
-** the pending terms hash table contains the terms "sqlite", "mysql" and
-** "firebird", then the iterator visits the following 'terms' (in the order
-** shown):
-**
-** f fi fir fire fireb firebi firebir firebird
-** m my mys mysq mysql
-** s sq sql sqli sqlit sqlite
-**
-** Whereas if isPrefixIter is zero, the terms visited are:
-**
-** firebird mysql sqlite
-*/
-int sqlite3Fts3SegReaderPending(
- Fts3Table *p, /* Virtual table handle */
- int iIndex, /* Index for p->aIndex */
- const char *zTerm, /* Term to search for */
- int nTerm, /* Size of buffer zTerm */
- int bPrefix, /* True for a prefix iterator */
- Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */
-){
- Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */
- Fts3HashElem *pE; /* Iterator variable */
- Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */
- int nElem = 0; /* Size of array at aElem */
- int rc = SQLITE_OK; /* Return Code */
- Fts3Hash *pHash;
-
- pHash = &p->aIndex[iIndex].hPending;
- if( bPrefix ){
- int nAlloc = 0; /* Size of allocated array at aElem */
-
- for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
- char *zKey = (char *)fts3HashKey(pE);
- int nKey = fts3HashKeysize(pE);
- if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){
- if( nElem==nAlloc ){
- Fts3HashElem **aElem2;
- nAlloc += 16;
- aElem2 = (Fts3HashElem **)sqlite3_realloc(
- aElem, nAlloc*sizeof(Fts3HashElem *)
- );
- if( !aElem2 ){
- rc = SQLITE_NOMEM;
- nElem = 0;
- break;
- }
- aElem = aElem2;
- }
-
- aElem[nElem++] = pE;
- }
- }
-
- /* If more than one term matches the prefix, sort the Fts3HashElem
- ** objects in term order using qsort(). This uses the same comparison
- ** callback as is used when flushing terms to disk.
- */
- if( nElem>1 ){
- qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);
- }
-
- }else{
- /* The query is a simple term lookup that matches at most one term in
- ** the index. All that is required is a straight hash-lookup.
- **
- ** Because the stack address of pE may be accessed via the aElem pointer
- ** below, the "Fts3HashElem *pE" must be declared so that it is valid
- ** within this entire function, not just this "else{...}" block.
- */
- pE = fts3HashFindElem(pHash, zTerm, nTerm);
- if( pE ){
- aElem = &pE;
- nElem = 1;
- }
- }
-
- if( nElem>0 ){
- int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *);
- pReader = (Fts3SegReader *)sqlite3_malloc(nByte);
- if( !pReader ){
- rc = SQLITE_NOMEM;
- }else{
- memset(pReader, 0, nByte);
- pReader->iIdx = 0x7FFFFFFF;
- pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
- memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
- }
- }
-
- if( bPrefix ){
- sqlite3_free(aElem);
- }
- *ppReader = pReader;
- return rc;
-}
-
-/*
-** Compare the entries pointed to by two Fts3SegReader structures.
-** Comparison is as follows:
-**
-** 1) EOF is greater than not EOF.
-**
-** 2) The current terms (if any) are compared using memcmp(). If one
-** term is a prefix of another, the longer term is considered the
-** larger.
-**
-** 3) By segment age. An older segment is considered larger.
-*/
-static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
- int rc;
- if( pLhs->aNode && pRhs->aNode ){
- int rc2 = pLhs->nTerm - pRhs->nTerm;
- if( rc2<0 ){
- rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm);
- }else{
- rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm);
- }
- if( rc==0 ){
- rc = rc2;
- }
- }else{
- rc = (pLhs->aNode==0) - (pRhs->aNode==0);
- }
- if( rc==0 ){
- rc = pRhs->iIdx - pLhs->iIdx;
- }
- assert( rc!=0 );
- return rc;
-}
-
-/*
-** A different comparison function for SegReader structures. In this
-** version, it is assumed that each SegReader points to an entry in
-** a doclist for identical terms. Comparison is made as follows:
-**
-** 1) EOF (end of doclist in this case) is greater than not EOF.
-**
-** 2) By current docid.
-**
-** 3) By segment age. An older segment is considered larger.
-*/
-static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
- int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
- if( rc==0 ){
- if( pLhs->iDocid==pRhs->iDocid ){
- rc = pRhs->iIdx - pLhs->iIdx;
- }else{
- rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1;
- }
- }
- assert( pLhs->aNode && pRhs->aNode );
- return rc;
-}
-static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
- int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
- if( rc==0 ){
- if( pLhs->iDocid==pRhs->iDocid ){
- rc = pRhs->iIdx - pLhs->iIdx;
- }else{
- rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1;
- }
- }
- assert( pLhs->aNode && pRhs->aNode );
- return rc;
-}
-
-/*
-** Compare the term that the Fts3SegReader object passed as the first argument
-** points to with the term specified by arguments zTerm and nTerm.
-**
-** If the pSeg iterator is already at EOF, return 0. Otherwise, return
-** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are
-** equal, or +ve if the pSeg term is greater than zTerm/nTerm.
-*/
-static int fts3SegReaderTermCmp(
- Fts3SegReader *pSeg, /* Segment reader object */
- const char *zTerm, /* Term to compare to */
- int nTerm /* Size of term zTerm in bytes */
-){
- int res = 0;
- if( pSeg->aNode ){
- if( pSeg->nTerm>nTerm ){
- res = memcmp(pSeg->zTerm, zTerm, nTerm);
- }else{
- res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm);
- }
- if( res==0 ){
- res = pSeg->nTerm-nTerm;
- }
- }
- return res;
-}
-
-/*
-** Argument apSegment is an array of nSegment elements. It is known that
-** the final (nSegment-nSuspect) members are already in sorted order
-** (according to the comparison function provided). This function shuffles
-** the array around until all entries are in sorted order.
-*/
-static void fts3SegReaderSort(
- Fts3SegReader **apSegment, /* Array to sort entries of */
- int nSegment, /* Size of apSegment array */
- int nSuspect, /* Unsorted entry count */
- int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */
-){
- int i; /* Iterator variable */
-
- assert( nSuspect<=nSegment );
-
- if( nSuspect==nSegment ) nSuspect--;
- for(i=nSuspect-1; i>=0; i--){
- int j;
- for(j=i; j<(nSegment-1); j++){
- Fts3SegReader *pTmp;
- if( xCmp(apSegment[j], apSegment[j+1])<0 ) break;
- pTmp = apSegment[j+1];
- apSegment[j+1] = apSegment[j];
- apSegment[j] = pTmp;
- }
- }
-
-#ifndef NDEBUG
- /* Check that the list really is sorted now. */
- for(i=0; i<(nSuspect-1); i++){
- assert( xCmp(apSegment[i], apSegment[i+1])<0 );
- }
-#endif
-}
-
-/*
-** Insert a record into the %_segments table.
-*/
-static int fts3WriteSegment(
- Fts3Table *p, /* Virtual table handle */
- sqlite3_int64 iBlock, /* Block id for new block */
- char *z, /* Pointer to buffer containing block data */
- int n /* Size of buffer z in bytes */
-){
- sqlite3_stmt *pStmt;
- int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pStmt, 1, iBlock);
- sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC);
- sqlite3_step(pStmt);
- rc = sqlite3_reset(pStmt);
- }
- return rc;
-}
-
-/*
-** Find the largest relative level number in the table. If successful, set
-** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs,
-** set *pnMax to zero and return an SQLite error code.
-*/
-int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){
- int rc;
- int mxLevel = 0;
- sqlite3_stmt *pStmt = 0;
-
- rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0);
- if( rc==SQLITE_OK ){
- if( SQLITE_ROW==sqlite3_step(pStmt) ){
- mxLevel = sqlite3_column_int(pStmt, 0);
- }
- rc = sqlite3_reset(pStmt);
- }
- *pnMax = mxLevel;
- return rc;
-}
-
-/*
-** Insert a record into the %_segdir table.
-*/
-static int fts3WriteSegdir(
- Fts3Table *p, /* Virtual table handle */
- sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */
- int iIdx, /* Value for "idx" field */
- sqlite3_int64 iStartBlock, /* Value for "start_block" field */
- sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
- sqlite3_int64 iEndBlock, /* Value for "end_block" field */
- char *zRoot, /* Blob value for "root" field */
- int nRoot /* Number of bytes in buffer zRoot */
-){
- sqlite3_stmt *pStmt;
- int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pStmt, 1, iLevel);
- sqlite3_bind_int(pStmt, 2, iIdx);
- sqlite3_bind_int64(pStmt, 3, iStartBlock);
- sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
- sqlite3_bind_int64(pStmt, 5, iEndBlock);
- sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
- sqlite3_step(pStmt);
- rc = sqlite3_reset(pStmt);
- }
- return rc;
-}
-
-/*
-** Return the size of the common prefix (if any) shared by zPrev and
-** zNext, in bytes. For example,
-**
-** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3
-** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2
-** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0
-*/
-static int fts3PrefixCompress(
- const char *zPrev, /* Buffer containing previous term */
- int nPrev, /* Size of buffer zPrev in bytes */
- const char *zNext, /* Buffer containing next term */
- int nNext /* Size of buffer zNext in bytes */
-){
- int n;
- UNUSED_PARAMETER(nNext);
- for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++);
- return n;
-}
-
-/*
-** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger
-** (according to memcmp) than the previous term.
-*/
-static int fts3NodeAddTerm(
- Fts3Table *p, /* Virtual table handle */
- SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */
- int isCopyTerm, /* True if zTerm/nTerm is transient */
- const char *zTerm, /* Pointer to buffer containing term */
- int nTerm /* Size of term in bytes */
-){
- SegmentNode *pTree = *ppTree;
- int rc;
- SegmentNode *pNew;
-
- /* First try to append the term to the current node. Return early if
- ** this is possible.
- */
- if( pTree ){
- int nData = pTree->nData; /* Current size of node in bytes */
- int nReq = nData; /* Required space after adding zTerm */
- int nPrefix; /* Number of bytes of prefix compression */
- int nSuffix; /* Suffix length */
-
- nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm);
- nSuffix = nTerm-nPrefix;
-
- nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix;
- if( nReq<=p->nNodeSize || !pTree->zTerm ){
-
- if( nReq>p->nNodeSize ){
- /* An unusual case: this is the first term to be added to the node
- ** and the static node buffer (p->nNodeSize bytes) is not large
- ** enough. Use a separately malloced buffer instead This wastes
- ** p->nNodeSize bytes, but since this scenario only comes about when
- ** the database contain two terms that share a prefix of almost 2KB,
- ** this is not expected to be a serious problem.
- */
- assert( pTree->aData==(char *)&pTree[1] );
- pTree->aData = (char *)sqlite3_malloc(nReq);
- if( !pTree->aData ){
- return SQLITE_NOMEM;
- }
- }
-
- if( pTree->zTerm ){
- /* There is no prefix-length field for first term in a node */
- nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix);
- }
-
- nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix);
- memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix);
- pTree->nData = nData + nSuffix;
- pTree->nEntry++;
-
- if( isCopyTerm ){
- if( pTree->nMalloc<nTerm ){
- char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2);
- if( !zNew ){
- return SQLITE_NOMEM;
- }
- pTree->nMalloc = nTerm*2;
- pTree->zMalloc = zNew;
- }
- pTree->zTerm = pTree->zMalloc;
- memcpy(pTree->zTerm, zTerm, nTerm);
- pTree->nTerm = nTerm;
- }else{
- pTree->zTerm = (char *)zTerm;
- pTree->nTerm = nTerm;
- }
- return SQLITE_OK;
- }
- }
-
- /* If control flows to here, it was not possible to append zTerm to the
- ** current node. Create a new node (a right-sibling of the current node).
- ** If this is the first node in the tree, the term is added to it.
- **
- ** Otherwise, the term is not added to the new node, it is left empty for
- ** now. Instead, the term is inserted into the parent of pTree. If pTree
- ** has no parent, one is created here.
- */
- pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize);
- if( !pNew ){
- return SQLITE_NOMEM;
- }
- memset(pNew, 0, sizeof(SegmentNode));
- pNew->nData = 1 + FTS3_VARINT_MAX;
- pNew->aData = (char *)&pNew[1];
-
- if( pTree ){
- SegmentNode *pParent = pTree->pParent;
- rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm);
- if( pTree->pParent==0 ){
- pTree->pParent = pParent;
- }
- pTree->pRight = pNew;
- pNew->pLeftmost = pTree->pLeftmost;
- pNew->pParent = pParent;
- pNew->zMalloc = pTree->zMalloc;
- pNew->nMalloc = pTree->nMalloc;
- pTree->zMalloc = 0;
- }else{
- pNew->pLeftmost = pNew;
- rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm);
- }
-
- *ppTree = pNew;
- return rc;
-}
-
-/*
-** Helper function for fts3NodeWrite().
-*/
-static int fts3TreeFinishNode(
- SegmentNode *pTree,
- int iHeight,
- sqlite3_int64 iLeftChild
-){
- int nStart;
- assert( iHeight>=1 && iHeight<128 );
- nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild);
- pTree->aData[nStart] = (char)iHeight;
- sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild);
- return nStart;
-}
-
-/*
-** Write the buffer for the segment node pTree and all of its peers to the
-** database. Then call this function recursively to write the parent of
-** pTree and its peers to the database.
-**
-** Except, if pTree is a root node, do not write it to the database. Instead,
-** set output variables *paRoot and *pnRoot to contain the root node.
-**
-** If successful, SQLITE_OK is returned and output variable *piLast is
-** set to the largest blockid written to the database (or zero if no
-** blocks were written to the db). Otherwise, an SQLite error code is
-** returned.
-*/
-static int fts3NodeWrite(
- Fts3Table *p, /* Virtual table handle */
- SegmentNode *pTree, /* SegmentNode handle */
- int iHeight, /* Height of this node in tree */
- sqlite3_int64 iLeaf, /* Block id of first leaf node */
- sqlite3_int64 iFree, /* Block id of next free slot in %_segments */
- sqlite3_int64 *piLast, /* OUT: Block id of last entry written */
- char **paRoot, /* OUT: Data for root node */
- int *pnRoot /* OUT: Size of root node in bytes */
-){
- int rc = SQLITE_OK;
-
- if( !pTree->pParent ){
- /* Root node of the tree. */
- int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf);
- *piLast = iFree-1;
- *pnRoot = pTree->nData - nStart;
- *paRoot = &pTree->aData[nStart];
- }else{
- SegmentNode *pIter;
- sqlite3_int64 iNextFree = iFree;
- sqlite3_int64 iNextLeaf = iLeaf;
- for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){
- int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf);
- int nWrite = pIter->nData - nStart;
-
- rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite);
- iNextFree++;
- iNextLeaf += (pIter->nEntry+1);
- }
- if( rc==SQLITE_OK ){
- assert( iNextLeaf==iFree );
- rc = fts3NodeWrite(
- p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot
- );
- }
- }
-
- return rc;
-}
-
-/*
-** Free all memory allocations associated with the tree pTree.
-*/
-static void fts3NodeFree(SegmentNode *pTree){
- if( pTree ){
- SegmentNode *p = pTree->pLeftmost;
- fts3NodeFree(p->pParent);
- while( p ){
- SegmentNode *pRight = p->pRight;
- if( p->aData!=(char *)&p[1] ){
- sqlite3_free(p->aData);
- }
- assert( pRight==0 || p->zMalloc==0 );
- sqlite3_free(p->zMalloc);
- sqlite3_free(p);
- p = pRight;
- }
- }
-}
-
-/*
-** Add a term to the segment being constructed by the SegmentWriter object
-** *ppWriter. When adding the first term to a segment, *ppWriter should
-** be passed NULL. This function will allocate a new SegmentWriter object
-** and return it via the input/output variable *ppWriter in this case.
-**
-** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
-*/
-static int fts3SegWriterAdd(
- Fts3Table *p, /* Virtual table handle */
- SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */
- int isCopyTerm, /* True if buffer zTerm must be copied */
- const char *zTerm, /* Pointer to buffer containing term */
- int nTerm, /* Size of term in bytes */
- const char *aDoclist, /* Pointer to buffer containing doclist */
- int nDoclist /* Size of doclist in bytes */
-){
- int nPrefix; /* Size of term prefix in bytes */
- int nSuffix; /* Size of term suffix in bytes */
- int nReq; /* Number of bytes required on leaf page */
- int nData;
- SegmentWriter *pWriter = *ppWriter;
-
- if( !pWriter ){
- int rc;
- sqlite3_stmt *pStmt;
-
- /* Allocate the SegmentWriter structure */
- pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter));
- if( !pWriter ) return SQLITE_NOMEM;
- memset(pWriter, 0, sizeof(SegmentWriter));
- *ppWriter = pWriter;
-
- /* Allocate a buffer in which to accumulate data */
- pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize);
- if( !pWriter->aData ) return SQLITE_NOMEM;
- pWriter->nSize = p->nNodeSize;
-
- /* Find the next free blockid in the %_segments table */
- rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0);
- if( rc!=SQLITE_OK ) return rc;
- if( SQLITE_ROW==sqlite3_step(pStmt) ){
- pWriter->iFree = sqlite3_column_int64(pStmt, 0);
- pWriter->iFirst = pWriter->iFree;
- }
- rc = sqlite3_reset(pStmt);
- if( rc!=SQLITE_OK ) return rc;
- }
- nData = pWriter->nData;
-
- nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm);
- nSuffix = nTerm-nPrefix;
-
- /* Figure out how many bytes are required by this new entry */
- nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */
- sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */
- nSuffix + /* Term suffix */
- sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
- nDoclist; /* Doclist data */
-
- if( nData>0 && nData+nReq>p->nNodeSize ){
- int rc;
-
- /* The current leaf node is full. Write it out to the database. */
- rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData);
- if( rc!=SQLITE_OK ) return rc;
- p->nLeafAdd++;
-
- /* Add the current term to the interior node tree. The term added to
- ** the interior tree must:
- **
- ** a) be greater than the largest term on the leaf node just written
- ** to the database (still available in pWriter->zTerm), and
- **
- ** b) be less than or equal to the term about to be added to the new
- ** leaf node (zTerm/nTerm).
- **
- ** In other words, it must be the prefix of zTerm 1 byte longer than
- ** the common prefix (if any) of zTerm and pWriter->zTerm.
- */
- assert( nPrefix<nTerm );
- rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1);
- if( rc!=SQLITE_OK ) return rc;
-
- nData = 0;
- pWriter->nTerm = 0;
-
- nPrefix = 0;
- nSuffix = nTerm;
- nReq = 1 + /* varint containing prefix size */
- sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */
- nTerm + /* Term suffix */
- sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
- nDoclist; /* Doclist data */
- }
-
- /* If the buffer currently allocated is too small for this entry, realloc
- ** the buffer to make it large enough.
- */
- if( nReq>pWriter->nSize ){
- char *aNew = sqlite3_realloc(pWriter->aData, nReq);
- if( !aNew ) return SQLITE_NOMEM;
- pWriter->aData = aNew;
- pWriter->nSize = nReq;
- }
- assert( nData+nReq<=pWriter->nSize );
-
- /* Append the prefix-compressed term and doclist to the buffer. */
- nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix);
- nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix);
- memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix);
- nData += nSuffix;
- nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist);
- memcpy(&pWriter->aData[nData], aDoclist, nDoclist);
- pWriter->nData = nData + nDoclist;
-
- /* Save the current term so that it can be used to prefix-compress the next.
- ** If the isCopyTerm parameter is true, then the buffer pointed to by
- ** zTerm is transient, so take a copy of the term data. Otherwise, just
- ** store a copy of the pointer.
- */
- if( isCopyTerm ){
- if( nTerm>pWriter->nMalloc ){
- char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2);
- if( !zNew ){
- return SQLITE_NOMEM;
- }
- pWriter->nMalloc = nTerm*2;
- pWriter->zMalloc = zNew;
- pWriter->zTerm = zNew;
- }
- assert( pWriter->zTerm==pWriter->zMalloc );
- memcpy(pWriter->zTerm, zTerm, nTerm);
- }else{
- pWriter->zTerm = (char *)zTerm;
- }
- pWriter->nTerm = nTerm;
-
- return SQLITE_OK;
-}
-
-/*
-** Flush all data associated with the SegmentWriter object pWriter to the
-** database. This function must be called after all terms have been added
-** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is
-** returned. Otherwise, an SQLite error code.
-*/
-static int fts3SegWriterFlush(
- Fts3Table *p, /* Virtual table handle */
- SegmentWriter *pWriter, /* SegmentWriter to flush to the db */
- sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */
- int iIdx /* Value for 'idx' column of %_segdir */
-){
- int rc; /* Return code */
- if( pWriter->pTree ){
- sqlite3_int64 iLast = 0; /* Largest block id written to database */
- sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */
- char *zRoot = NULL; /* Pointer to buffer containing root node */
- int nRoot = 0; /* Size of buffer zRoot */
-
- iLastLeaf = pWriter->iFree;
- rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData);
- if( rc==SQLITE_OK ){
- rc = fts3NodeWrite(p, pWriter->pTree, 1,
- pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
- }
- if( rc==SQLITE_OK ){
- rc = fts3WriteSegdir(
- p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot);
- }
- }else{
- /* The entire tree fits on the root node. Write it to the segdir table. */
- rc = fts3WriteSegdir(
- p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
- }
- p->nLeafAdd++;
- return rc;
-}
-
-/*
-** Release all memory held by the SegmentWriter object passed as the
-** first argument.
-*/
-static void fts3SegWriterFree(SegmentWriter *pWriter){
- if( pWriter ){
- sqlite3_free(pWriter->aData);
- sqlite3_free(pWriter->zMalloc);
- fts3NodeFree(pWriter->pTree);
- sqlite3_free(pWriter);
- }
-}
-
-/*
-** The first value in the apVal[] array is assumed to contain an integer.
-** This function tests if there exist any documents with docid values that
-** are different from that integer. i.e. if deleting the document with docid
-** pRowid would mean the FTS3 table were empty.
-**
-** If successful, *pisEmpty is set to true if the table is empty except for
-** document pRowid, or false otherwise, and SQLITE_OK is returned. If an
-** error occurs, an SQLite error code is returned.
-*/
-static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
- sqlite3_stmt *pStmt;
- int rc;
- if( p->zContentTbl ){
- /* If using the content=xxx option, assume the table is never empty */
- *pisEmpty = 0;
- rc = SQLITE_OK;
- }else{
- rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
- if( rc==SQLITE_OK ){
- if( SQLITE_ROW==sqlite3_step(pStmt) ){
- *pisEmpty = sqlite3_column_int(pStmt, 0);
- }
- rc = sqlite3_reset(pStmt);
- }
- }
- return rc;
-}
-
-/*
-** Set *pnMax to the largest segment level in the database for the index
-** iIndex.
-**
-** Segment levels are stored in the 'level' column of the %_segdir table.
-**
-** Return SQLITE_OK if successful, or an SQLite error code if not.
-*/
-static int fts3SegmentMaxLevel(
- Fts3Table *p,
- int iLangid,
- int iIndex,
- sqlite3_int64 *pnMax
-){
- sqlite3_stmt *pStmt;
- int rc;
- assert( iIndex>=0 && iIndex<p->nIndex );
-
- /* Set pStmt to the compiled version of:
- **
- ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
- **
- ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
- */
- rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
- if( rc!=SQLITE_OK ) return rc;
- sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
- sqlite3_bind_int64(pStmt, 2,
- getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
- );
- if( SQLITE_ROW==sqlite3_step(pStmt) ){
- *pnMax = sqlite3_column_int64(pStmt, 0);
- }
- return sqlite3_reset(pStmt);
-}
-
-/*
-** Delete all entries in the %_segments table associated with the segment
-** opened with seg-reader pSeg. This function does not affect the contents
-** of the %_segdir table.
-*/
-static int fts3DeleteSegment(
- Fts3Table *p, /* FTS table handle */
- Fts3SegReader *pSeg /* Segment to delete */
-){
- int rc = SQLITE_OK; /* Return code */
- if( pSeg->iStartBlock ){
- sqlite3_stmt *pDelete; /* SQL statement to delete rows */
- rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock);
- sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock);
- sqlite3_step(pDelete);
- rc = sqlite3_reset(pDelete);
- }
- }
- return rc;
-}
-
-/*
-** This function is used after merging multiple segments into a single large
-** segment to delete the old, now redundant, segment b-trees. Specifically,
-** it:
-**
-** 1) Deletes all %_segments entries for the segments associated with
-** each of the SegReader objects in the array passed as the third
-** argument, and
-**
-** 2) deletes all %_segdir entries with level iLevel, or all %_segdir
-** entries regardless of level if (iLevel<0).
-**
-** SQLITE_OK is returned if successful, otherwise an SQLite error code.
-*/
-static int fts3DeleteSegdir(
- Fts3Table *p, /* Virtual table handle */
- int iLangid, /* Language id */
- int iIndex, /* Index for p->aIndex */
- int iLevel, /* Level of %_segdir entries to delete */
- Fts3SegReader **apSegment, /* Array of SegReader objects */
- int nReader /* Size of array apSegment */
-){
- int rc = SQLITE_OK; /* Return Code */
- int i; /* Iterator variable */
- sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */
-
- for(i=0; rc==SQLITE_OK && i<nReader; i++){
- rc = fts3DeleteSegment(p, apSegment[i]);
- }
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
- if( iLevel==FTS3_SEGCURSOR_ALL ){
- rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
- sqlite3_bind_int64(pDelete, 2,
- getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
- );
- }
- }else{
- rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(
- pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
- );
- }
- }
-
- if( rc==SQLITE_OK ){
- sqlite3_step(pDelete);
- rc = sqlite3_reset(pDelete);
- }
-
- return rc;
-}
-
-/*
-** When this function is called, buffer *ppList (size *pnList bytes) contains
-** a position list that may (or may not) feature multiple columns. This
-** function adjusts the pointer *ppList and the length *pnList so that they
-** identify the subset of the position list that corresponds to column iCol.
-**
-** If there are no entries in the input position list for column iCol, then
-** *pnList is set to zero before returning.
-*/
-static void fts3ColumnFilter(
- int iCol, /* Column to filter on */
- char **ppList, /* IN/OUT: Pointer to position list */
- int *pnList /* IN/OUT: Size of buffer *ppList in bytes */
-){
- char *pList = *ppList;
- int nList = *pnList;
- char *pEnd = &pList[nList];
- int iCurrent = 0;
- char *p = pList;
-
- assert( iCol>=0 );
- while( 1 ){
- char c = 0;
- while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
-
- if( iCol==iCurrent ){
- nList = (int)(p - pList);
- break;
- }
-
- nList -= (int)(p - pList);
- pList = p;
- if( nList==0 ){
- break;
- }
- p = &pList[1];
- p += sqlite3Fts3GetVarint32(p, &iCurrent);
- }
-
- *ppList = pList;
- *pnList = nList;
-}
-
-/*
-** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any
-** existing data). Grow the buffer if required.
-**
-** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered
-** trying to resize the buffer, return SQLITE_NOMEM.
-*/
-static int fts3MsrBufferData(
- Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */
- char *pList,
- int nList
-){
- if( nList>pMsr->nBuffer ){
- char *pNew;
- pMsr->nBuffer = nList*2;
- pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer);
- if( !pNew ) return SQLITE_NOMEM;
- pMsr->aBuffer = pNew;
- }
-
- memcpy(pMsr->aBuffer, pList, nList);
- return SQLITE_OK;
-}
-
-int sqlite3Fts3MsrIncrNext(
- Fts3Table *p, /* Virtual table handle */
- Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */
- sqlite3_int64 *piDocid, /* OUT: Docid value */
- char **paPoslist, /* OUT: Pointer to position list */
- int *pnPoslist /* OUT: Size of position list in bytes */
-){
- int nMerge = pMsr->nAdvance;
- Fts3SegReader **apSegment = pMsr->apSegment;
- int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
- p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
- );
-
- if( nMerge==0 ){
- *paPoslist = 0;
- return SQLITE_OK;
- }
-
- while( 1 ){
- Fts3SegReader *pSeg;
- pSeg = pMsr->apSegment[0];
-
- if( pSeg->pOffsetList==0 ){
- *paPoslist = 0;
- break;
- }else{
- int rc;
- char *pList;
- int nList;
- int j;
- sqlite3_int64 iDocid = apSegment[0]->iDocid;
-
- rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList);
- j = 1;
- while( rc==SQLITE_OK
- && j<nMerge
- && apSegment[j]->pOffsetList
- && apSegment[j]->iDocid==iDocid
- ){
- rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0);
- j++;
- }
- if( rc!=SQLITE_OK ) return rc;
- fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp);
-
- if( pMsr->iColFilter>=0 ){
- fts3ColumnFilter(pMsr->iColFilter, &pList, &nList);
- }
-
- if( nList>0 ){
- if( fts3SegReaderIsPending(apSegment[0]) ){
- rc = fts3MsrBufferData(pMsr, pList, nList+1);
- if( rc!=SQLITE_OK ) return rc;
- *paPoslist = pMsr->aBuffer;
- assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 );
- }else{
- *paPoslist = pList;
- }
- *piDocid = iDocid;
- *pnPoslist = nList;
- break;
- }
- }
- }
-
- return SQLITE_OK;
-}
-
-static int fts3SegReaderStart(
- Fts3Table *p, /* Virtual table handle */
- Fts3MultiSegReader *pCsr, /* Cursor object */
- const char *zTerm, /* Term searched for (or NULL) */
- int nTerm /* Length of zTerm in bytes */
-){
- int i;
- int nSeg = pCsr->nSegment;
-
- /* If the Fts3SegFilter defines a specific term (or term prefix) to search
- ** for, then advance each segment iterator until it points to a term of
- ** equal or greater value than the specified term. This prevents many
- ** unnecessary merge/sort operations for the case where single segment
- ** b-tree leaf nodes contain more than one term.
- */
- for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){
- int res = 0;
- Fts3SegReader *pSeg = pCsr->apSegment[i];
- do {
- int rc = fts3SegReaderNext(p, pSeg, 0);
- if( rc!=SQLITE_OK ) return rc;
- }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 );
-
- if( pSeg->bLookup && res!=0 ){
- fts3SegReaderSetEof(pSeg);
- }
- }
- fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp);
-
- return SQLITE_OK;
-}
-
-int sqlite3Fts3SegReaderStart(
- Fts3Table *p, /* Virtual table handle */
- Fts3MultiSegReader *pCsr, /* Cursor object */
- Fts3SegFilter *pFilter /* Restrictions on range of iteration */
-){
- pCsr->pFilter = pFilter;
- return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm);
-}
-
-int sqlite3Fts3MsrIncrStart(
- Fts3Table *p, /* Virtual table handle */
- Fts3MultiSegReader *pCsr, /* Cursor object */
- int iCol, /* Column to match on. */
- const char *zTerm, /* Term to iterate through a doclist for */
- int nTerm /* Number of bytes in zTerm */
-){
- int i;
- int rc;
- int nSegment = pCsr->nSegment;
- int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
- p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
- );
-
- assert( pCsr->pFilter==0 );
- assert( zTerm && nTerm>0 );
-
- /* Advance each segment iterator until it points to the term zTerm/nTerm. */
- rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm);
- if( rc!=SQLITE_OK ) return rc;
-
- /* Determine how many of the segments actually point to zTerm/nTerm. */
- for(i=0; i<nSegment; i++){
- Fts3SegReader *pSeg = pCsr->apSegment[i];
- if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){
- break;
- }
- }
- pCsr->nAdvance = i;
-
- /* Advance each of the segments to point to the first docid. */
- for(i=0; i<pCsr->nAdvance; i++){
- rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]);
- if( rc!=SQLITE_OK ) return rc;
- }
- fts3SegReaderSort(pCsr->apSegment, i, i, xCmp);
-
- assert( iCol<0 || iCol<p->nColumn );
- pCsr->iColFilter = iCol;
-
- return SQLITE_OK;
-}
-
-/*
-** This function is called on a MultiSegReader that has been started using
-** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also
-** have been made. Calling this function puts the MultiSegReader in such
-** a state that if the next two calls are:
-**
-** sqlite3Fts3SegReaderStart()
-** sqlite3Fts3SegReaderStep()
-**
-** then the entire doclist for the term is available in
-** MultiSegReader.aDoclist/nDoclist.
-*/
-int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){
- int i; /* Used to iterate through segment-readers */
-
- assert( pCsr->zTerm==0 );
- assert( pCsr->nTerm==0 );
- assert( pCsr->aDoclist==0 );
- assert( pCsr->nDoclist==0 );
-
- pCsr->nAdvance = 0;
- pCsr->bRestart = 1;
- for(i=0; i<pCsr->nSegment; i++){
- pCsr->apSegment[i]->pOffsetList = 0;
- pCsr->apSegment[i]->nOffsetList = 0;
- pCsr->apSegment[i]->iDocid = 0;
- }
-
- return SQLITE_OK;
-}
-
-
-int sqlite3Fts3SegReaderStep(
- Fts3Table *p, /* Virtual table handle */
- Fts3MultiSegReader *pCsr /* Cursor object */
-){
- int rc = SQLITE_OK;
-
- int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
- int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
- int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
- int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
- int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);
- int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST);
-
- Fts3SegReader **apSegment = pCsr->apSegment;
- int nSegment = pCsr->nSegment;
- Fts3SegFilter *pFilter = pCsr->pFilter;
- int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = (
- p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp
- );
-
- if( pCsr->nSegment==0 ) return SQLITE_OK;
-
- do {
- int nMerge;
- int i;
-
- /* Advance the first pCsr->nAdvance entries in the apSegment[] array
- ** forward. Then sort the list in order of current term again.
- */
- for(i=0; i<pCsr->nAdvance; i++){
- Fts3SegReader *pSeg = apSegment[i];
- if( pSeg->bLookup ){
- fts3SegReaderSetEof(pSeg);
- }else{
- rc = fts3SegReaderNext(p, pSeg, 0);
- }
- if( rc!=SQLITE_OK ) return rc;
- }
- fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp);
- pCsr->nAdvance = 0;
-
- /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */
- assert( rc==SQLITE_OK );
- if( apSegment[0]->aNode==0 ) break;
-
- pCsr->nTerm = apSegment[0]->nTerm;
- pCsr->zTerm = apSegment[0]->zTerm;
-
- /* If this is a prefix-search, and if the term that apSegment[0] points
- ** to does not share a suffix with pFilter->zTerm/nTerm, then all
- ** required callbacks have been made. In this case exit early.
- **
- ** Similarly, if this is a search for an exact match, and the first term
- ** of segment apSegment[0] is not a match, exit early.
- */
- if( pFilter->zTerm && !isScan ){
- if( pCsr->nTerm<pFilter->nTerm
- || (!isPrefix && pCsr->nTerm>pFilter->nTerm)
- || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm)
- ){
- break;
- }
- }
-
- nMerge = 1;
- while( nMerge<nSegment
- && apSegment[nMerge]->aNode
- && apSegment[nMerge]->nTerm==pCsr->nTerm
- && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm)
- ){
- nMerge++;
- }
-
- assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
- if( nMerge==1
- && !isIgnoreEmpty
- && !isFirst
- && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0)
- ){
- pCsr->nDoclist = apSegment[0]->nDoclist;
- if( fts3SegReaderIsPending(apSegment[0]) ){
- rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist);
- pCsr->aDoclist = pCsr->aBuffer;
- }else{
- pCsr->aDoclist = apSegment[0]->aDoclist;
- }
- if( rc==SQLITE_OK ) rc = SQLITE_ROW;
- }else{
- int nDoclist = 0; /* Size of doclist */
- sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */
-
- /* The current term of the first nMerge entries in the array
- ** of Fts3SegReader objects is the same. The doclists must be merged
- ** and a single term returned with the merged doclist.
- */
- for(i=0; i<nMerge; i++){
- fts3SegReaderFirstDocid(p, apSegment[i]);
- }
- fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp);
- while( apSegment[0]->pOffsetList ){
- int j; /* Number of segments that share a docid */
- char *pList;
- int nList;
- int nByte;
- sqlite3_int64 iDocid = apSegment[0]->iDocid;
- fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList);
- j = 1;
- while( j<nMerge
- && apSegment[j]->pOffsetList
- && apSegment[j]->iDocid==iDocid
- ){
- fts3SegReaderNextDocid(p, apSegment[j], 0, 0);
- j++;
- }
-
- if( isColFilter ){
- fts3ColumnFilter(pFilter->iCol, &pList, &nList);
- }
-
- if( !isIgnoreEmpty || nList>0 ){
-
- /* Calculate the 'docid' delta value to write into the merged
- ** doclist. */
- sqlite3_int64 iDelta;
- if( p->bDescIdx && nDoclist>0 ){
- iDelta = iPrev - iDocid;
- }else{
- iDelta = iDocid - iPrev;
- }
- assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) );
- assert( nDoclist>0 || iDelta==iDocid );
-
- nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0);
- if( nDoclist+nByte>pCsr->nBuffer ){
- char *aNew;
- pCsr->nBuffer = (nDoclist+nByte)*2;
- aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer);
- if( !aNew ){
- return SQLITE_NOMEM;
- }
- pCsr->aBuffer = aNew;
- }
-
- if( isFirst ){
- char *a = &pCsr->aBuffer[nDoclist];
- int nWrite;
-
- nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a);
- if( nWrite ){
- iPrev = iDocid;
- nDoclist += nWrite;
- }
- }else{
- nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
- iPrev = iDocid;
- if( isRequirePos ){
- memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
- nDoclist += nList;
- pCsr->aBuffer[nDoclist++] = '\0';
- }
- }
- }
-
- fts3SegReaderSort(apSegment, nMerge, j, xCmp);
- }
- if( nDoclist>0 ){
- pCsr->aDoclist = pCsr->aBuffer;
- pCsr->nDoclist = nDoclist;
- rc = SQLITE_ROW;
- }
- }
- pCsr->nAdvance = nMerge;
- }while( rc==SQLITE_OK );
-
- return rc;
-}
-
-
-void sqlite3Fts3SegReaderFinish(
- Fts3MultiSegReader *pCsr /* Cursor object */
-){
- if( pCsr ){
- int i;
- for(i=0; i<pCsr->nSegment; i++){
- sqlite3Fts3SegReaderFree(pCsr->apSegment[i]);
- }
- sqlite3_free(pCsr->apSegment);
- sqlite3_free(pCsr->aBuffer);
-
- pCsr->nSegment = 0;
- pCsr->apSegment = 0;
- pCsr->aBuffer = 0;
- }
-}
-
-/*
-** Merge all level iLevel segments in the database into a single
-** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
-** single segment with a level equal to the numerically largest level
-** currently present in the database.
-**
-** If this function is called with iLevel<0, but there is only one
-** segment in the database, SQLITE_DONE is returned immediately.
-** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
-** an SQLite error code is returned.
-*/
-static int fts3SegmentMerge(
- Fts3Table *p,
- int iLangid, /* Language id to merge */
- int iIndex, /* Index in p->aIndex[] to merge */
- int iLevel /* Level to merge */
-){
- int rc; /* Return code */
- int iIdx = 0; /* Index of new segment */
- sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */
- SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
- Fts3SegFilter filter; /* Segment term filter condition */
- Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
- int bIgnoreEmpty = 0; /* True to ignore empty segments */
-
- assert( iLevel==FTS3_SEGCURSOR_ALL
- || iLevel==FTS3_SEGCURSOR_PENDING
- || iLevel>=0
- );
- assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
- assert( iIndex>=0 && iIndex<p->nIndex );
-
- rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
- if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
-
- if( iLevel==FTS3_SEGCURSOR_ALL ){
- /* This call is to merge all segments in the database to a single
- ** segment. The level of the new segment is equal to the numerically
- ** greatest segment level currently present in the database for this
- ** index. The idx of the new segment is always 0. */
- if( csr.nSegment==1 ){
- rc = SQLITE_DONE;
- goto finished;
- }
- rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
- bIgnoreEmpty = 1;
-
- }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
- iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
- rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
- }else{
- /* This call is to merge all segments at level iLevel. find the next
- ** available segment index at level iLevel+1. The call to
- ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
- ** a single iLevel+2 segment if necessary. */
- rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
- iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
- }
- if( rc!=SQLITE_OK ) goto finished;
- assert( csr.nSegment>0 );
- assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
- assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
-
- memset(&filter, 0, sizeof(Fts3SegFilter));
- filter.flags = FTS3_SEGMENT_REQUIRE_POS;
- filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
-
- rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
- while( SQLITE_OK==rc ){
- rc = sqlite3Fts3SegReaderStep(p, &csr);
- if( rc!=SQLITE_ROW ) break;
- rc = fts3SegWriterAdd(p, &pWriter, 1,
- csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
- }
- if( rc!=SQLITE_OK ) goto finished;
- assert( pWriter );
-
- if( iLevel!=FTS3_SEGCURSOR_PENDING ){
- rc = fts3DeleteSegdir(
- p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
- );
- if( rc!=SQLITE_OK ) goto finished;
- }
- rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
-
- finished:
- fts3SegWriterFree(pWriter);
- sqlite3Fts3SegReaderFinish(&csr);
- return rc;
-}
-
-
-/*
-** Flush the contents of pendingTerms to level 0 segments.
-*/
-int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
- int rc = SQLITE_OK;
- int i;
-
- for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
- rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
- if( rc==SQLITE_DONE ) rc = SQLITE_OK;
- }
- sqlite3Fts3PendingTermsClear(p);
-
- /* Determine the auto-incr-merge setting if unknown. If enabled,
- ** estimate the number of leaf blocks of content to be written
- */
- if( rc==SQLITE_OK && p->bHasStat
- && p->bAutoincrmerge==0xff && p->nLeafAdd>0
- ){
- sqlite3_stmt *pStmt = 0;
- rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
- rc = sqlite3_step(pStmt);
- p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
- rc = sqlite3_reset(pStmt);
- }
- }
- return rc;
-}
-
-/*
-** Encode N integers as varints into a blob.
-*/
-static void fts3EncodeIntArray(
- int N, /* The number of integers to encode */
- u32 *a, /* The integer values */
- char *zBuf, /* Write the BLOB here */
- int *pNBuf /* Write number of bytes if zBuf[] used here */
-){
- int i, j;
- for(i=j=0; i<N; i++){
- j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]);
- }
- *pNBuf = j;
-}
-
-/*
-** Decode a blob of varints into N integers
-*/
-static void fts3DecodeIntArray(
- int N, /* The number of integers to decode */
- u32 *a, /* Write the integer values */
- const char *zBuf, /* The BLOB containing the varints */
- int nBuf /* size of the BLOB */
-){
- int i, j;
- UNUSED_PARAMETER(nBuf);
- for(i=j=0; i<N; i++){
- sqlite3_int64 x;
- j += sqlite3Fts3GetVarint(&zBuf[j], &x);
- assert(j<=nBuf);
- a[i] = (u32)(x & 0xffffffff);
- }
-}
-
-/*
-** Insert the sizes (in tokens) for each column of the document
-** with docid equal to p->iPrevDocid. The sizes are encoded as
-** a blob of varints.
-*/
-static void fts3InsertDocsize(
- int *pRC, /* Result code */
- Fts3Table *p, /* Table into which to insert */
- u32 *aSz /* Sizes of each column, in tokens */
-){
- char *pBlob; /* The BLOB encoding of the document size */
- int nBlob; /* Number of bytes in the BLOB */
- sqlite3_stmt *pStmt; /* Statement used to insert the encoding */
- int rc; /* Result code from subfunctions */
-
- if( *pRC ) return;
- pBlob = sqlite3_malloc( 10*p->nColumn );
- if( pBlob==0 ){
- *pRC = SQLITE_NOMEM;
- return;
- }
- fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob);
- rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0);
- if( rc ){
- sqlite3_free(pBlob);
- *pRC = rc;
- return;
- }
- sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
- sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
- sqlite3_step(pStmt);
- *pRC = sqlite3_reset(pStmt);
-}
-
-/*
-** Record 0 of the %_stat table contains a blob consisting of N varints,
-** where N is the number of user defined columns in the fts3 table plus
-** two. If nCol is the number of user defined columns, then values of the
-** varints are set as follows:
-**
-** Varint 0: Total number of rows in the table.
-**
-** Varint 1..nCol: For each column, the total number of tokens stored in
-** the column for all rows of the table.
-**
-** Varint 1+nCol: The total size, in bytes, of all text values in all
-** columns of all rows of the table.
-**
-*/
-static void fts3UpdateDocTotals(
- int *pRC, /* The result code */
- Fts3Table *p, /* Table being updated */
- u32 *aSzIns, /* Size increases */
- u32 *aSzDel, /* Size decreases */
- int nChng /* Change in the number of documents */
-){
- char *pBlob; /* Storage for BLOB written into %_stat */
- int nBlob; /* Size of BLOB written into %_stat */
- u32 *a; /* Array of integers that becomes the BLOB */
- sqlite3_stmt *pStmt; /* Statement for reading and writing */
- int i; /* Loop counter */
- int rc; /* Result code from subfunctions */
-
- const int nStat = p->nColumn+2;
-
- if( *pRC ) return;
- a = sqlite3_malloc( (sizeof(u32)+10)*nStat );
- if( a==0 ){
- *pRC = SQLITE_NOMEM;
- return;
- }
- pBlob = (char*)&a[nStat];
- rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
- if( rc ){
- sqlite3_free(a);
- *pRC = rc;
- return;
- }
- sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
- if( sqlite3_step(pStmt)==SQLITE_ROW ){
- fts3DecodeIntArray(nStat, a,
- sqlite3_column_blob(pStmt, 0),
- sqlite3_column_bytes(pStmt, 0));
- }else{
- memset(a, 0, sizeof(u32)*(nStat) );
- }
- rc = sqlite3_reset(pStmt);
- if( rc!=SQLITE_OK ){
- sqlite3_free(a);
- *pRC = rc;
- return;
- }
- if( nChng<0 && a[0]<(u32)(-nChng) ){
- a[0] = 0;
- }else{
- a[0] += nChng;
- }
- for(i=0; i<p->nColumn+1; i++){
- u32 x = a[i+1];
- if( x+aSzIns[i] < aSzDel[i] ){
- x = 0;
- }else{
- x = x + aSzIns[i] - aSzDel[i];
- }
- a[i+1] = x;
- }
- fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
- rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
- if( rc ){
- sqlite3_free(a);
- *pRC = rc;
- return;
- }
- sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
- sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC);
- sqlite3_step(pStmt);
- *pRC = sqlite3_reset(pStmt);
- sqlite3_free(a);
-}
-
-/*
-** Merge the entire database so that there is one segment for each
-** iIndex/iLangid combination.
-*/
-static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
- int bSeenDone = 0;
- int rc;
- sqlite3_stmt *pAllLangid = 0;
-
- rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
- if( rc==SQLITE_OK ){
- int rc2;
- sqlite3_bind_int(pAllLangid, 1, p->nIndex);
- while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
- int i;
- int iLangid = sqlite3_column_int(pAllLangid, 0);
- for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
- rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
- if( rc==SQLITE_DONE ){
- bSeenDone = 1;
- rc = SQLITE_OK;
- }
- }
- }
- rc2 = sqlite3_reset(pAllLangid);
- if( rc==SQLITE_OK ) rc = rc2;
- }
-
- sqlite3Fts3SegmentsClose(p);
- sqlite3Fts3PendingTermsClear(p);
-
- return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
-}
-
-/*
-** This function is called when the user executes the following statement:
-**
-** INSERT INTO <tbl>(<tbl>) VALUES('rebuild');
-**
-** The entire FTS index is discarded and rebuilt. If the table is one
-** created using the content=xxx option, then the new index is based on
-** the current contents of the xxx table. Otherwise, it is rebuilt based
-** on the contents of the %_content table.
-*/
-static int fts3DoRebuild(Fts3Table *p){
- int rc; /* Return Code */
-
- rc = fts3DeleteAll(p, 0);
- if( rc==SQLITE_OK ){
- u32 *aSz = 0;
- u32 *aSzIns = 0;
- u32 *aSzDel = 0;
- sqlite3_stmt *pStmt = 0;
- int nEntry = 0;
-
- /* Compose and prepare an SQL statement to loop through the content table */
- char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
- if( !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
- sqlite3_free(zSql);
- }
-
- if( rc==SQLITE_OK ){
- int nByte = sizeof(u32) * (p->nColumn+1)*3;
- aSz = (u32 *)sqlite3_malloc(nByte);
- if( aSz==0 ){
- rc = SQLITE_NOMEM;
- }else{
- memset(aSz, 0, nByte);
- aSzIns = &aSz[p->nColumn+1];
- aSzDel = &aSzIns[p->nColumn+1];
- }
- }
-
- while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
- int iCol;
- int iLangid = langidFromSelect(p, pStmt);
- rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
- memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
- for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
- const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
- rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
- aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
- }
- if( p->bHasDocsize ){
- fts3InsertDocsize(&rc, p, aSz);
- }
- if( rc!=SQLITE_OK ){
- sqlite3_finalize(pStmt);
- pStmt = 0;
- }else{
- nEntry++;
- for(iCol=0; iCol<=p->nColumn; iCol++){
- aSzIns[iCol] += aSz[iCol];
- }
- }
- }
- if( p->bFts4 ){
- fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry);
- }
- sqlite3_free(aSz);
-
- if( pStmt ){
- int rc2 = sqlite3_finalize(pStmt);
- if( rc==SQLITE_OK ){
- rc = rc2;
- }
- }
- }
-
- return rc;
-}
-
-
-/*
-** This function opens a cursor used to read the input data for an
-** incremental merge operation. Specifically, it opens a cursor to scan
-** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute
-** level iAbsLevel.
-*/
-static int fts3IncrmergeCsr(
- Fts3Table *p, /* FTS3 table handle */
- sqlite3_int64 iAbsLevel, /* Absolute level to open */
- int nSeg, /* Number of segments to merge */
- Fts3MultiSegReader *pCsr /* Cursor object to populate */
-){
- int rc; /* Return Code */
- sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */
- int nByte; /* Bytes allocated at pCsr->apSegment[] */
-
- /* Allocate space for the Fts3MultiSegReader.aCsr[] array */
- memset(pCsr, 0, sizeof(*pCsr));
- nByte = sizeof(Fts3SegReader *) * nSeg;
- pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);
-
- if( pCsr->apSegment==0 ){
- rc = SQLITE_NOMEM;
- }else{
- memset(pCsr->apSegment, 0, nByte);
- rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
- }
- if( rc==SQLITE_OK ){
- int i;
- int rc2;
- sqlite3_bind_int64(pStmt, 1, iAbsLevel);
- assert( pCsr->nSegment==0 );
- for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){
- rc = sqlite3Fts3SegReaderNew(i, 0,
- sqlite3_column_int64(pStmt, 1), /* segdir.start_block */
- sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */
- sqlite3_column_int64(pStmt, 3), /* segdir.end_block */
- sqlite3_column_blob(pStmt, 4), /* segdir.root */
- sqlite3_column_bytes(pStmt, 4), /* segdir.root */
- &pCsr->apSegment[i]
- );
- pCsr->nSegment++;
- }
- rc2 = sqlite3_reset(pStmt);
- if( rc==SQLITE_OK ) rc = rc2;
- }
-
- return rc;
-}
-
-typedef struct IncrmergeWriter IncrmergeWriter;
-typedef struct NodeWriter NodeWriter;
-typedef struct Blob Blob;
-typedef struct NodeReader NodeReader;
-
-/*
-** An instance of the following structure is used as a dynamic buffer
-** to build up nodes or other blobs of data in.
-**
-** The function blobGrowBuffer() is used to extend the allocation.
-*/
-struct Blob {
- char *a; /* Pointer to allocation */
- int n; /* Number of valid bytes of data in a[] */
- int nAlloc; /* Allocated size of a[] (nAlloc>=n) */
-};
-
-/*
-** This structure is used to build up buffers containing segment b-tree
-** nodes (blocks).
-*/
-struct NodeWriter {
- sqlite3_int64 iBlock; /* Current block id */
- Blob key; /* Last key written to the current block */
- Blob block; /* Current block image */
-};
-
-/*
-** An object of this type contains the state required to create or append
-** to an appendable b-tree segment.
-*/
-struct IncrmergeWriter {
- int nLeafEst; /* Space allocated for leaf blocks */
- int nWork; /* Number of leaf pages flushed */
- sqlite3_int64 iAbsLevel; /* Absolute level of input segments */
- int iIdx; /* Index of *output* segment in iAbsLevel+1 */
- sqlite3_int64 iStart; /* Block number of first allocated block */
- sqlite3_int64 iEnd; /* Block number of last allocated block */
- NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
-};
-
-/*
-** An object of the following type is used to read data from a single
-** FTS segment node. See the following functions:
-**
-** nodeReaderInit()
-** nodeReaderNext()
-** nodeReaderRelease()
-*/
-struct NodeReader {
- const char *aNode;
- int nNode;
- int iOff; /* Current offset within aNode[] */
-
- /* Output variables. Containing the current node entry. */
- sqlite3_int64 iChild; /* Pointer to child node */
- Blob term; /* Current term */
- const char *aDoclist; /* Pointer to doclist */
- int nDoclist; /* Size of doclist in bytes */
-};
-
-/*
-** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
-** Otherwise, if the allocation at pBlob->a is not already at least nMin
-** bytes in size, extend (realloc) it to be so.
-**
-** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a
-** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc
-** to reflect the new size of the pBlob->a[] buffer.
-*/
-static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){
- if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){
- int nAlloc = nMin;
- char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc);
- if( a ){
- pBlob->nAlloc = nAlloc;
- pBlob->a = a;
- }else{
- *pRc = SQLITE_NOMEM;
- }
- }
-}
-
-/*
-** Attempt to advance the node-reader object passed as the first argument to
-** the next entry on the node.
-**
-** Return an error code if an error occurs (SQLITE_NOMEM is possible).
-** Otherwise return SQLITE_OK. If there is no next entry on the node
-** (e.g. because the current entry is the last) set NodeReader->aNode to
-** NULL to indicate EOF. Otherwise, populate the NodeReader structure output
-** variables for the new entry.
-*/
-static int nodeReaderNext(NodeReader *p){
- int bFirst = (p->term.n==0); /* True for first term on the node */
- int nPrefix = 0; /* Bytes to copy from previous term */
- int nSuffix = 0; /* Bytes to append to the prefix */
- int rc = SQLITE_OK; /* Return code */
-
- assert( p->aNode );
- if( p->iChild && bFirst==0 ) p->iChild++;
- if( p->iOff>=p->nNode ){
- /* EOF */
- p->aNode = 0;
- }else{
- if( bFirst==0 ){
- p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nPrefix);
- }
- p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nSuffix);
-
- blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc);
- if( rc==SQLITE_OK ){
- memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix);
- p->term.n = nPrefix+nSuffix;
- p->iOff += nSuffix;
- if( p->iChild==0 ){
- p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist);
- p->aDoclist = &p->aNode[p->iOff];
- p->iOff += p->nDoclist;
- }
- }
- }
-
- assert( p->iOff<=p->nNode );
-
- return rc;
-}
-
-/*
-** Release all dynamic resources held by node-reader object *p.
-*/
-static void nodeReaderRelease(NodeReader *p){
- sqlite3_free(p->term.a);
-}
-
-/*
-** Initialize a node-reader object to read the node in buffer aNode/nNode.
-**
-** If successful, SQLITE_OK is returned and the NodeReader object set to
-** point to the first entry on the node (if any). Otherwise, an SQLite
-** error code is returned.
-*/
-static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){
- memset(p, 0, sizeof(NodeReader));
- p->aNode = aNode;
- p->nNode = nNode;
-
- /* Figure out if this is a leaf or an internal node. */
- if( p->aNode[0] ){
- /* An internal node. */
- p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild);
- }else{
- p->iOff = 1;
- }
-
- return nodeReaderNext(p);
-}
-
-/*
-** This function is called while writing an FTS segment each time a leaf o
-** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed
-** to be greater than the largest key on the node just written, but smaller
-** than or equal to the first key that will be written to the next leaf
-** node.
-**
-** The block id of the leaf node just written to disk may be found in
-** (pWriter->aNodeWriter[0].iBlock) when this function is called.
-*/
-static int fts3IncrmergePush(
- Fts3Table *p, /* Fts3 table handle */
- IncrmergeWriter *pWriter, /* Writer object */
- const char *zTerm, /* Term to write to internal node */
- int nTerm /* Bytes at zTerm */
-){
- sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock;
- int iLayer;
-
- assert( nTerm>0 );
- for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){
- sqlite3_int64 iNextPtr = 0;
- NodeWriter *pNode = &pWriter->aNodeWriter[iLayer];
- int rc = SQLITE_OK;
- int nPrefix;
- int nSuffix;
- int nSpace;
-
- /* Figure out how much space the key will consume if it is written to
- ** the current node of layer iLayer. Due to the prefix compression,
- ** the space required changes depending on which node the key is to
- ** be added to. */
- nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm);
- nSuffix = nTerm - nPrefix;
- nSpace = sqlite3Fts3VarintLen(nPrefix);
- nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
-
- if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){
- /* If the current node of layer iLayer contains zero keys, or if adding
- ** the key to it will not cause it to grow to larger than nNodeSize
- ** bytes in size, write the key here. */
-
- Blob *pBlk = &pNode->block;
- if( pBlk->n==0 ){
- blobGrowBuffer(pBlk, p->nNodeSize, &rc);
- if( rc==SQLITE_OK ){
- pBlk->a[0] = (char)iLayer;
- pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr);
- }
- }
- blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc);
- blobGrowBuffer(&pNode->key, nTerm, &rc);
-
- if( rc==SQLITE_OK ){
- if( pNode->key.n ){
- pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix);
- }
- pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix);
- memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix);
- pBlk->n += nSuffix;
-
- memcpy(pNode->key.a, zTerm, nTerm);
- pNode->key.n = nTerm;
- }
- }else{
- /* Otherwise, flush the current node of layer iLayer to disk.
- ** Then allocate a new, empty sibling node. The key will be written
- ** into the parent of this node. */
- rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
-
- assert( pNode->block.nAlloc>=p->nNodeSize );
- pNode->block.a[0] = (char)iLayer;
- pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1);
-
- iNextPtr = pNode->iBlock;
- pNode->iBlock++;
- pNode->key.n = 0;
- }
-
- if( rc!=SQLITE_OK || iNextPtr==0 ) return rc;
- iPtr = iNextPtr;
- }
-
- assert( 0 );
- return 0;
-}
-
-/*
-** Append a term and (optionally) doclist to the FTS segment node currently
-** stored in blob *pNode. The node need not contain any terms, but the
-** header must be written before this function is called.
-**
-** A node header is a single 0x00 byte for a leaf node, or a height varint
-** followed by the left-hand-child varint for an internal node.
-**
-** The term to be appended is passed via arguments zTerm/nTerm. For a
-** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal
-** node, both aDoclist and nDoclist must be passed 0.
-**
-** If the size of the value in blob pPrev is zero, then this is the first
-** term written to the node. Otherwise, pPrev contains a copy of the
-** previous term. Before this function returns, it is updated to contain a
-** copy of zTerm/nTerm.
-**
-** It is assumed that the buffer associated with pNode is already large
-** enough to accommodate the new entry. The buffer associated with pPrev
-** is extended by this function if requrired.
-**
-** If an error (i.e. OOM condition) occurs, an SQLite error code is
-** returned. Otherwise, SQLITE_OK.
-*/
-static int fts3AppendToNode(
- Blob *pNode, /* Current node image to append to */
- Blob *pPrev, /* Buffer containing previous term written */
- const char *zTerm, /* New term to write */
- int nTerm, /* Size of zTerm in bytes */
- const char *aDoclist, /* Doclist (or NULL) to write */
- int nDoclist /* Size of aDoclist in bytes */
-){
- int rc = SQLITE_OK; /* Return code */
- int bFirst = (pPrev->n==0); /* True if this is the first term written */
- int nPrefix; /* Size of term prefix in bytes */
- int nSuffix; /* Size of term suffix in bytes */
-
- /* Node must have already been started. There must be a doclist for a
- ** leaf node, and there must not be a doclist for an internal node. */
- assert( pNode->n>0 );
- assert( (pNode->a[0]=='\0')==(aDoclist!=0) );
-
- blobGrowBuffer(pPrev, nTerm, &rc);
- if( rc!=SQLITE_OK ) return rc;
-
- nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm);
- nSuffix = nTerm - nPrefix;
- memcpy(pPrev->a, zTerm, nTerm);
- pPrev->n = nTerm;
-
- if( bFirst==0 ){
- pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix);
- }
- pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix);
- memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix);
- pNode->n += nSuffix;
-
- if( aDoclist ){
- pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist);
- memcpy(&pNode->a[pNode->n], aDoclist, nDoclist);
- pNode->n += nDoclist;
- }
-
- assert( pNode->n<=pNode->nAlloc );
-
- return SQLITE_OK;
-}
-
-/*
-** Append the current term and doclist pointed to by cursor pCsr to the
-** appendable b-tree segment opened for writing by pWriter.
-**
-** Return SQLITE_OK if successful, or an SQLite error code otherwise.
-*/
-static int fts3IncrmergeAppend(
- Fts3Table *p, /* Fts3 table handle */
- IncrmergeWriter *pWriter, /* Writer object */
- Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */
-){
- const char *zTerm = pCsr->zTerm;
- int nTerm = pCsr->nTerm;
- const char *aDoclist = pCsr->aDoclist;
- int nDoclist = pCsr->nDoclist;
- int rc = SQLITE_OK; /* Return code */
- int nSpace; /* Total space in bytes required on leaf */
- int nPrefix; /* Size of prefix shared with previous term */
- int nSuffix; /* Size of suffix (nTerm - nPrefix) */
- NodeWriter *pLeaf; /* Object used to write leaf nodes */
-
- pLeaf = &pWriter->aNodeWriter[0];
- nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm);
- nSuffix = nTerm - nPrefix;
-
- nSpace = sqlite3Fts3VarintLen(nPrefix);
- nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
- nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
-
- /* If the current block is not empty, and if adding this term/doclist
- ** to the current block would make it larger than Fts3Table.nNodeSize
- ** bytes, write this block out to the database. */
- if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){
- rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n);
- pWriter->nWork++;
-
- /* Add the current term to the parent node. The term added to the
- ** parent must:
- **
- ** a) be greater than the largest term on the leaf node just written
- ** to the database (still available in pLeaf->key), and
- **
- ** b) be less than or equal to the term about to be added to the new
- ** leaf node (zTerm/nTerm).
- **
- ** In other words, it must be the prefix of zTerm 1 byte longer than
- ** the common prefix (if any) of zTerm and pWriter->zTerm.
- */
- if( rc==SQLITE_OK ){
- rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1);
- }
-
- /* Advance to the next output block */
- pLeaf->iBlock++;
- pLeaf->key.n = 0;
- pLeaf->block.n = 0;
-
- nSuffix = nTerm;
- nSpace = 1;
- nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
- nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
- }
-
- blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);
-
- if( rc==SQLITE_OK ){
- if( pLeaf->block.n==0 ){
- pLeaf->block.n = 1;
- pLeaf->block.a[0] = '\0';
- }
- rc = fts3AppendToNode(
- &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist
- );
- }
-
- return rc;
-}
-
-/*
-** This function is called to release all dynamic resources held by the
-** merge-writer object pWriter, and if no error has occurred, to flush
-** all outstanding node buffers held by pWriter to disk.
-**
-** If *pRc is not SQLITE_OK when this function is called, then no attempt
-** is made to write any data to disk. Instead, this function serves only
-** to release outstanding resources.
-**
-** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while
-** flushing buffers to disk, *pRc is set to an SQLite error code before
-** returning.
-*/
-static void fts3IncrmergeRelease(
- Fts3Table *p, /* FTS3 table handle */
- IncrmergeWriter *pWriter, /* Merge-writer object */
- int *pRc /* IN/OUT: Error code */
-){
- int i; /* Used to iterate through non-root layers */
- int iRoot; /* Index of root in pWriter->aNodeWriter */
- NodeWriter *pRoot; /* NodeWriter for root node */
- int rc = *pRc; /* Error code */
-
- /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment
- ** root node. If the segment fits entirely on a single leaf node, iRoot
- ** will be set to 0. If the root node is the parent of the leaves, iRoot
- ** will be 1. And so on. */
- for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){
- NodeWriter *pNode = &pWriter->aNodeWriter[iRoot];
- if( pNode->block.n>0 ) break;
- assert( *pRc || pNode->block.nAlloc==0 );
- assert( *pRc || pNode->key.nAlloc==0 );
- sqlite3_free(pNode->block.a);
- sqlite3_free(pNode->key.a);
- }
-
- /* Empty output segment. This is a no-op. */
- if( iRoot<0 ) return;
-
- /* The entire output segment fits on a single node. Normally, this means
- ** the node would be stored as a blob in the "root" column of the %_segdir
- ** table. However, this is not permitted in this case. The problem is that
- ** space has already been reserved in the %_segments table, and so the
- ** start_block and end_block fields of the %_segdir table must be populated.
- ** And, by design or by accident, released versions of FTS cannot handle
- ** segments that fit entirely on the root node with start_block!=0.
- **
- ** Instead, create a synthetic root node that contains nothing but a
- ** pointer to the single content node. So that the segment consists of a
- ** single leaf and a single interior (root) node.
- **
- ** Todo: Better might be to defer allocating space in the %_segments
- ** table until we are sure it is needed.
- */
- if( iRoot==0 ){
- Blob *pBlock = &pWriter->aNodeWriter[1].block;
- blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc);
- if( rc==SQLITE_OK ){
- pBlock->a[0] = 0x01;
- pBlock->n = 1 + sqlite3Fts3PutVarint(
- &pBlock->a[1], pWriter->aNodeWriter[0].iBlock
- );
- }
- iRoot = 1;
- }
- pRoot = &pWriter->aNodeWriter[iRoot];
-
- /* Flush all currently outstanding nodes to disk. */
- for(i=0; i<iRoot; i++){
- NodeWriter *pNode = &pWriter->aNodeWriter[i];
- if( pNode->block.n>0 && rc==SQLITE_OK ){
- rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
- }
- sqlite3_free(pNode->block.a);
- sqlite3_free(pNode->key.a);
- }
-
- /* Write the %_segdir record. */
- if( rc==SQLITE_OK ){
- rc = fts3WriteSegdir(p,
- pWriter->iAbsLevel+1, /* level */
- pWriter->iIdx, /* idx */
- pWriter->iStart, /* start_block */
- pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */
- pWriter->iEnd, /* end_block */
- pRoot->block.a, pRoot->block.n /* root */
- );
- }
- sqlite3_free(pRoot->block.a);
- sqlite3_free(pRoot->key.a);
-
- *pRc = rc;
-}
-
-/*
-** Compare the term in buffer zLhs (size in bytes nLhs) with that in
-** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of
-** the other, it is considered to be smaller than the other.
-**
-** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve
-** if it is greater.
-*/
-static int fts3TermCmp(
- const char *zLhs, int nLhs, /* LHS of comparison */
- const char *zRhs, int nRhs /* RHS of comparison */
-){
- int nCmp = MIN(nLhs, nRhs);
- int res;
-
- res = memcmp(zLhs, zRhs, nCmp);
- if( res==0 ) res = nLhs - nRhs;
-
- return res;
-}
-
-
-/*
-** Query to see if the entry in the %_segments table with blockid iEnd is
-** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before
-** returning. Otherwise, set *pbRes to 0.
-**
-** Or, if an error occurs while querying the database, return an SQLite
-** error code. The final value of *pbRes is undefined in this case.
-**
-** This is used to test if a segment is an "appendable" segment. If it
-** is, then a NULL entry has been inserted into the %_segments table
-** with blockid %_segdir.end_block.
-*/
-static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){
- int bRes = 0; /* Result to set *pbRes to */
- sqlite3_stmt *pCheck = 0; /* Statement to query database with */
- int rc; /* Return code */
-
- rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pCheck, 1, iEnd);
- if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1;
- rc = sqlite3_reset(pCheck);
- }
-
- *pbRes = bRes;
- return rc;
-}
-
-/*
-** This function is called when initializing an incremental-merge operation.
-** It checks if the existing segment with index value iIdx at absolute level
-** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the
-** merge-writer object *pWriter is initialized to write to it.
-**
-** An existing segment can be appended to by an incremental merge if:
-**
-** * It was initially created as an appendable segment (with all required
-** space pre-allocated), and
-**
-** * The first key read from the input (arguments zKey and nKey) is
-** greater than the largest key currently stored in the potential
-** output segment.
-*/
-static int fts3IncrmergeLoad(
- Fts3Table *p, /* Fts3 table handle */
- sqlite3_int64 iAbsLevel, /* Absolute level of input segments */
- int iIdx, /* Index of candidate output segment */
- const char *zKey, /* First key to write */
- int nKey, /* Number of bytes in nKey */
- IncrmergeWriter *pWriter /* Populate this object */
-){
- int rc; /* Return code */
- sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */
-
- rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0);
- if( rc==SQLITE_OK ){
- sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */
- sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */
- sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */
- const char *aRoot = 0; /* Pointer to %_segdir.root buffer */
- int nRoot = 0; /* Size of aRoot[] in bytes */
- int rc2; /* Return code from sqlite3_reset() */
- int bAppendable = 0; /* Set to true if segment is appendable */
-
- /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */
- sqlite3_bind_int64(pSelect, 1, iAbsLevel+1);
- sqlite3_bind_int(pSelect, 2, iIdx);
- if( sqlite3_step(pSelect)==SQLITE_ROW ){
- iStart = sqlite3_column_int64(pSelect, 1);
- iLeafEnd = sqlite3_column_int64(pSelect, 2);
- iEnd = sqlite3_column_int64(pSelect, 3);
- nRoot = sqlite3_column_bytes(pSelect, 4);
- aRoot = sqlite3_column_blob(pSelect, 4);
- }else{
- return sqlite3_reset(pSelect);
- }
-
- /* Check for the zero-length marker in the %_segments table */
- rc = fts3IsAppendable(p, iEnd, &bAppendable);
-
- /* Check that zKey/nKey is larger than the largest key the candidate */
- if( rc==SQLITE_OK && bAppendable ){
- char *aLeaf = 0;
- int nLeaf = 0;
-
- rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0);
- if( rc==SQLITE_OK ){
- NodeReader reader;
- for(rc = nodeReaderInit(&reader, aLeaf, nLeaf);
- rc==SQLITE_OK && reader.aNode;
- rc = nodeReaderNext(&reader)
- ){
- assert( reader.aNode );
- }
- if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){
- bAppendable = 0;
- }
- nodeReaderRelease(&reader);
- }
- sqlite3_free(aLeaf);
- }
-
- if( rc==SQLITE_OK && bAppendable ){
- /* It is possible to append to this segment. Set up the IncrmergeWriter
- ** object to do so. */
- int i;
- int nHeight = (int)aRoot[0];
- NodeWriter *pNode;
-
- pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT;
- pWriter->iStart = iStart;
- pWriter->iEnd = iEnd;
- pWriter->iAbsLevel = iAbsLevel;
- pWriter->iIdx = iIdx;
-
- for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
- pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
- }
-
- pNode = &pWriter->aNodeWriter[nHeight];
- pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight;
- blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc);
- if( rc==SQLITE_OK ){
- memcpy(pNode->block.a, aRoot, nRoot);
- pNode->block.n = nRoot;
- }
-
- for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){
- NodeReader reader;
- pNode = &pWriter->aNodeWriter[i];
-
- rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n);
- while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader);
- blobGrowBuffer(&pNode->key, reader.term.n, &rc);
- if( rc==SQLITE_OK ){
- memcpy(pNode->key.a, reader.term.a, reader.term.n);
- pNode->key.n = reader.term.n;
- if( i>0 ){
- char *aBlock = 0;
- int nBlock = 0;
- pNode = &pWriter->aNodeWriter[i-1];
- pNode->iBlock = reader.iChild;
- rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0);
- blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc);
- if( rc==SQLITE_OK ){
- memcpy(pNode->block.a, aBlock, nBlock);
- pNode->block.n = nBlock;
- }
- sqlite3_free(aBlock);
- }
- }
- nodeReaderRelease(&reader);
- }
- }
-
- rc2 = sqlite3_reset(pSelect);
- if( rc==SQLITE_OK ) rc = rc2;
- }
-
- return rc;
-}
-
-/*
-** Determine the largest segment index value that exists within absolute
-** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus
-** one before returning SQLITE_OK. Or, if there are no segments at all
-** within level iAbsLevel, set *piIdx to zero.
-**
-** If an error occurs, return an SQLite error code. The final value of
-** *piIdx is undefined in this case.
-*/
-static int fts3IncrmergeOutputIdx(
- Fts3Table *p, /* FTS Table handle */
- sqlite3_int64 iAbsLevel, /* Absolute index of input segments */
- int *piIdx /* OUT: Next free index at iAbsLevel+1 */
-){
- int rc;
- sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */
-
- rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1);
- sqlite3_step(pOutputIdx);
- *piIdx = sqlite3_column_int(pOutputIdx, 0);
- rc = sqlite3_reset(pOutputIdx);
- }
-
- return rc;
-}
-
-/*
-** Allocate an appendable output segment on absolute level iAbsLevel+1
-** with idx value iIdx.
-**
-** In the %_segdir table, a segment is defined by the values in three
-** columns:
-**
-** start_block
-** leaves_end_block
-** end_block
-**
-** When an appendable segment is allocated, it is estimated that the
-** maximum number of leaf blocks that may be required is the sum of the
-** number of leaf blocks consumed by the input segments, plus the number
-** of input segments, multiplied by two. This value is stored in stack
-** variable nLeafEst.
-**
-** A total of 16*nLeafEst blocks are allocated when an appendable segment
-** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous
-** array of leaf nodes starts at the first block allocated. The array
-** of interior nodes that are parents of the leaf nodes start at block
-** (start_block + (1 + end_block - start_block) / 16). And so on.
-**
-** In the actual code below, the value "16" is replaced with the
-** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT.
-*/
-static int fts3IncrmergeWriter(
- Fts3Table *p, /* Fts3 table handle */
- sqlite3_int64 iAbsLevel, /* Absolute level of input segments */
- int iIdx, /* Index of new output segment */
- Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */
- IncrmergeWriter *pWriter /* Populate this object */
-){
- int rc; /* Return Code */
- int i; /* Iterator variable */
- int nLeafEst = 0; /* Blocks allocated for leaf nodes */
- sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */
- sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */
-
- /* Calculate nLeafEst. */
- rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pLeafEst, 1, iAbsLevel);
- sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment);
- if( SQLITE_ROW==sqlite3_step(pLeafEst) ){
- nLeafEst = sqlite3_column_int(pLeafEst, 0);
- }
- rc = sqlite3_reset(pLeafEst);
- }
- if( rc!=SQLITE_OK ) return rc;
-
- /* Calculate the first block to use in the output segment */
- rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0);
- if( rc==SQLITE_OK ){
- if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){
- pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0);
- pWriter->iEnd = pWriter->iStart - 1;
- pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT;
- }
- rc = sqlite3_reset(pFirstBlock);
- }
- if( rc!=SQLITE_OK ) return rc;
-
- /* Insert the marker in the %_segments table to make sure nobody tries
- ** to steal the space just allocated. This is also used to identify
- ** appendable segments. */
- rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0);
- if( rc!=SQLITE_OK ) return rc;
-
- pWriter->iAbsLevel = iAbsLevel;
- pWriter->nLeafEst = nLeafEst;
- pWriter->iIdx = iIdx;
-
- /* Set up the array of NodeWriter objects */
- for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
- pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
- }
- return SQLITE_OK;
-}
-
-/*
-** Remove an entry from the %_segdir table. This involves running the
-** following two statements:
-**
-** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx
-** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx
-**
-** The DELETE statement removes the specific %_segdir level. The UPDATE
-** statement ensures that the remaining segments have contiguously allocated
-** idx values.
-*/
-static int fts3RemoveSegdirEntry(
- Fts3Table *p, /* FTS3 table handle */
- sqlite3_int64 iAbsLevel, /* Absolute level to delete from */
- int iIdx /* Index of %_segdir entry to delete */
-){
- int rc; /* Return code */
- sqlite3_stmt *pDelete = 0; /* DELETE statement */
-
- rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pDelete, 1, iAbsLevel);
- sqlite3_bind_int(pDelete, 2, iIdx);
- sqlite3_step(pDelete);
- rc = sqlite3_reset(pDelete);
- }
-
- return rc;
-}
-
-/*
-** One or more segments have just been removed from absolute level iAbsLevel.
-** Update the 'idx' values of the remaining segments in the level so that
-** the idx values are a contiguous sequence starting from 0.
-*/
-static int fts3RepackSegdirLevel(
- Fts3Table *p, /* FTS3 table handle */
- sqlite3_int64 iAbsLevel /* Absolute level to repack */
-){
- int rc; /* Return code */
- int *aIdx = 0; /* Array of remaining idx values */
- int nIdx = 0; /* Valid entries in aIdx[] */
- int nAlloc = 0; /* Allocated size of aIdx[] */
- int i; /* Iterator variable */
- sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */
- sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */
-
- rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0);
- if( rc==SQLITE_OK ){
- int rc2;
- sqlite3_bind_int64(pSelect, 1, iAbsLevel);
- while( SQLITE_ROW==sqlite3_step(pSelect) ){
- if( nIdx>=nAlloc ){
- int *aNew;
- nAlloc += 16;
- aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int));
- if( !aNew ){
- rc = SQLITE_NOMEM;
- break;
- }
- aIdx = aNew;
- }
- aIdx[nIdx++] = sqlite3_column_int(pSelect, 0);
- }
- rc2 = sqlite3_reset(pSelect);
- if( rc==SQLITE_OK ) rc = rc2;
- }
-
- if( rc==SQLITE_OK ){
- rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0);
- }
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pUpdate, 2, iAbsLevel);
- }
-
- assert( p->bIgnoreSavepoint==0 );
- p->bIgnoreSavepoint = 1;
- for(i=0; rc==SQLITE_OK && i<nIdx; i++){
- if( aIdx[i]!=i ){
- sqlite3_bind_int(pUpdate, 3, aIdx[i]);
- sqlite3_bind_int(pUpdate, 1, i);
- sqlite3_step(pUpdate);
- rc = sqlite3_reset(pUpdate);
- }
- }
- p->bIgnoreSavepoint = 0;
-
- sqlite3_free(aIdx);
- return rc;
-}
-
-static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){
- pNode->a[0] = (char)iHeight;
- if( iChild ){
- assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) );
- pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild);
- }else{
- assert( pNode->nAlloc>=1 );
- pNode->n = 1;
- }
-}
-
-/*
-** The first two arguments are a pointer to and the size of a segment b-tree
-** node. The node may be a leaf or an internal node.
-**
-** This function creates a new node image in blob object *pNew by copying
-** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes)
-** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode.
-*/
-static int fts3TruncateNode(
- const char *aNode, /* Current node image */
- int nNode, /* Size of aNode in bytes */
- Blob *pNew, /* OUT: Write new node image here */
- const char *zTerm, /* Omit all terms smaller than this */
- int nTerm, /* Size of zTerm in bytes */
- sqlite3_int64 *piBlock /* OUT: Block number in next layer down */
-){
- NodeReader reader; /* Reader object */
- Blob prev = {0, 0, 0}; /* Previous term written to new node */
- int rc = SQLITE_OK; /* Return code */
- int bLeaf = aNode[0]=='\0'; /* True for a leaf node */
-
- /* Allocate required output space */
- blobGrowBuffer(pNew, nNode, &rc);
- if( rc!=SQLITE_OK ) return rc;
- pNew->n = 0;
-
- /* Populate new node buffer */
- for(rc = nodeReaderInit(&reader, aNode, nNode);
- rc==SQLITE_OK && reader.aNode;
- rc = nodeReaderNext(&reader)
- ){
- if( pNew->n==0 ){
- int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm);
- if( res<0 || (bLeaf==0 && res==0) ) continue;
- fts3StartNode(pNew, (int)aNode[0], reader.iChild);
- *piBlock = reader.iChild;
- }
- rc = fts3AppendToNode(
- pNew, &prev, reader.term.a, reader.term.n,
- reader.aDoclist, reader.nDoclist
- );
- if( rc!=SQLITE_OK ) break;
- }
- if( pNew->n==0 ){
- fts3StartNode(pNew, (int)aNode[0], reader.iChild);
- *piBlock = reader.iChild;
- }
- assert( pNew->n<=pNew->nAlloc );
-
- nodeReaderRelease(&reader);
- sqlite3_free(prev.a);
- return rc;
-}
-
-/*
-** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute
-** level iAbsLevel. This may involve deleting entries from the %_segments
-** table, and modifying existing entries in both the %_segments and %_segdir
-** tables.
-**
-** SQLITE_OK is returned if the segment is updated successfully. Or an
-** SQLite error code otherwise.
-*/
-static int fts3TruncateSegment(
- Fts3Table *p, /* FTS3 table handle */
- sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */
- int iIdx, /* Index within level of segment to modify */
- const char *zTerm, /* Remove terms smaller than this */
- int nTerm /* Number of bytes in buffer zTerm */
-){
- int rc = SQLITE_OK; /* Return code */
- Blob root = {0,0,0}; /* New root page image */
- Blob block = {0,0,0}; /* Buffer used for any other block */
- sqlite3_int64 iBlock = 0; /* Block id */
- sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */
- sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */
- sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */
-
- rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0);
- if( rc==SQLITE_OK ){
- int rc2; /* sqlite3_reset() return code */
- sqlite3_bind_int64(pFetch, 1, iAbsLevel);
- sqlite3_bind_int(pFetch, 2, iIdx);
- if( SQLITE_ROW==sqlite3_step(pFetch) ){
- const char *aRoot = sqlite3_column_blob(pFetch, 4);
- int nRoot = sqlite3_column_bytes(pFetch, 4);
- iOldStart = sqlite3_column_int64(pFetch, 1);
- rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock);
- }
- rc2 = sqlite3_reset(pFetch);
- if( rc==SQLITE_OK ) rc = rc2;
- }
-
- while( rc==SQLITE_OK && iBlock ){
- char *aBlock = 0;
- int nBlock = 0;
- iNewStart = iBlock;
-
- rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0);
- if( rc==SQLITE_OK ){
- rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock);
- }
- if( rc==SQLITE_OK ){
- rc = fts3WriteSegment(p, iNewStart, block.a, block.n);
- }
- sqlite3_free(aBlock);
- }
-
- /* Variable iNewStart now contains the first valid leaf node. */
- if( rc==SQLITE_OK && iNewStart ){
- sqlite3_stmt *pDel = 0;
- rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pDel, 1, iOldStart);
- sqlite3_bind_int64(pDel, 2, iNewStart-1);
- sqlite3_step(pDel);
- rc = sqlite3_reset(pDel);
- }
- }
-
- if( rc==SQLITE_OK ){
- sqlite3_stmt *pChomp = 0;
- rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int64(pChomp, 1, iNewStart);
- sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC);
- sqlite3_bind_int64(pChomp, 3, iAbsLevel);
- sqlite3_bind_int(pChomp, 4, iIdx);
- sqlite3_step(pChomp);
- rc = sqlite3_reset(pChomp);
- }
- }
-
- sqlite3_free(root.a);
- sqlite3_free(block.a);
- return rc;
-}
-
-/*
-** This function is called after an incrmental-merge operation has run to
-** merge (or partially merge) two or more segments from absolute level
-** iAbsLevel.
-**
-** Each input segment is either removed from the db completely (if all of
-** its data was copied to the output segment by the incrmerge operation)
-** or modified in place so that it no longer contains those entries that
-** have been duplicated in the output segment.
-*/
-static int fts3IncrmergeChomp(
- Fts3Table *p, /* FTS table handle */
- sqlite3_int64 iAbsLevel, /* Absolute level containing segments */
- Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */
- int *pnRem /* Number of segments not deleted */
-){
- int i;
- int nRem = 0;
- int rc = SQLITE_OK;
-
- for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){
- Fts3SegReader *pSeg = 0;
- int j;
-
- /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding
- ** somewhere in the pCsr->apSegment[] array. */
- for(j=0; ALWAYS(j<pCsr->nSegment); j++){
- pSeg = pCsr->apSegment[j];
- if( pSeg->iIdx==i ) break;
- }
- assert( j<pCsr->nSegment && pSeg->iIdx==i );
-
- if( pSeg->aNode==0 ){
- /* Seg-reader is at EOF. Remove the entire input segment. */
- rc = fts3DeleteSegment(p, pSeg);
- if( rc==SQLITE_OK ){
- rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx);
- }
- *pnRem = 0;
- }else{
- /* The incremental merge did not copy all the data from this
- ** segment to the upper level. The segment is modified in place
- ** so that it contains no keys smaller than zTerm/nTerm. */
- const char *zTerm = pSeg->zTerm;
- int nTerm = pSeg->nTerm;
- rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm);
- nRem++;
- }
- }
-
- if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){
- rc = fts3RepackSegdirLevel(p, iAbsLevel);
- }
-
- *pnRem = nRem;
- return rc;
-}
-
-/*
-** Store an incr-merge hint in the database.
-*/
-static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){
- sqlite3_stmt *pReplace = 0;
- int rc; /* Return code */
-
- rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT);
- sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC);
- sqlite3_step(pReplace);
- rc = sqlite3_reset(pReplace);
- }
-
- return rc;
-}
-
-/*
-** Load an incr-merge hint from the database. The incr-merge hint, if one
-** exists, is stored in the rowid==1 row of the %_stat table.
-**
-** If successful, populate blob *pHint with the value read from the %_stat
-** table and return SQLITE_OK. Otherwise, if an error occurs, return an
-** SQLite error code.
-*/
-static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){
- sqlite3_stmt *pSelect = 0;
- int rc;
-
- pHint->n = 0;
- rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0);
- if( rc==SQLITE_OK ){
- int rc2;
- sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT);
- if( SQLITE_ROW==sqlite3_step(pSelect) ){
- const char *aHint = sqlite3_column_blob(pSelect, 0);
- int nHint = sqlite3_column_bytes(pSelect, 0);
- if( aHint ){
- blobGrowBuffer(pHint, nHint, &rc);
- if( rc==SQLITE_OK ){
- memcpy(pHint->a, aHint, nHint);
- pHint->n = nHint;
- }
- }
- }
- rc2 = sqlite3_reset(pSelect);
- if( rc==SQLITE_OK ) rc = rc2;
- }
-
- return rc;
-}
-
-/*
-** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
-** Otherwise, append an entry to the hint stored in blob *pHint. Each entry
-** consists of two varints, the absolute level number of the input segments
-** and the number of input segments.
-**
-** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs,
-** set *pRc to an SQLite error code before returning.
-*/
-static void fts3IncrmergeHintPush(
- Blob *pHint, /* Hint blob to append to */
- i64 iAbsLevel, /* First varint to store in hint */
- int nInput, /* Second varint to store in hint */
- int *pRc /* IN/OUT: Error code */
-){
- blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc);
- if( *pRc==SQLITE_OK ){
- pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel);
- pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput);
- }
-}
-
-/*
-** Read the last entry (most recently pushed) from the hint blob *pHint
-** and then remove the entry. Write the two values read to *piAbsLevel and
-** *pnInput before returning.
-**
-** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does
-** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB.
-*/
-static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
- const int nHint = pHint->n;
- int i;
-
- i = pHint->n-2;
- while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
- while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
-
- pHint->n = i;
- i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel);
- i += sqlite3Fts3GetVarint32(&pHint->a[i], pnInput);
- if( i!=nHint ) return SQLITE_CORRUPT_VTAB;
-
- return SQLITE_OK;
-}
-
-
-/*
-** Attempt an incremental merge that writes nMerge leaf blocks.
-**
-** Incremental merges happen nMin segments at a time. The two
-** segments to be merged are the nMin oldest segments (the ones with
-** the smallest indexes) in the highest level that contains at least
-** nMin segments. Multiple merges might occur in an attempt to write the
-** quota of nMerge leaf blocks.
-*/
-int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
- int rc; /* Return code */
- int nRem = nMerge; /* Number of leaf pages yet to be written */
- Fts3MultiSegReader *pCsr; /* Cursor used to read input data */
- Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */
- IncrmergeWriter *pWriter; /* Writer object */
- int nSeg = 0; /* Number of input segments */
- sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */
- Blob hint = {0, 0, 0}; /* Hint read from %_stat table */
- int bDirtyHint = 0; /* True if blob 'hint' has been modified */
-
- /* Allocate space for the cursor, filter and writer objects */
- const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter);
- pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc);
- if( !pWriter ) return SQLITE_NOMEM;
- pFilter = (Fts3SegFilter *)&pWriter[1];
- pCsr = (Fts3MultiSegReader *)&pFilter[1];
-
- rc = fts3IncrmergeHintLoad(p, &hint);
- while( rc==SQLITE_OK && nRem>0 ){
- const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;
- sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */
- int bUseHint = 0; /* True if attempting to append */
-
- /* Search the %_segdir table for the absolute level with the smallest
- ** relative level number that contains at least nMin segments, if any.
- ** If one is found, set iAbsLevel to the absolute level number and
- ** nSeg to nMin. If no level with at least nMin segments can be found,
- ** set nSeg to -1.
- */
- rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0);
- sqlite3_bind_int(pFindLevel, 1, nMin);
- if( sqlite3_step(pFindLevel)==SQLITE_ROW ){
- iAbsLevel = sqlite3_column_int64(pFindLevel, 0);
- nSeg = nMin;
- }else{
- nSeg = -1;
- }
- rc = sqlite3_reset(pFindLevel);
-
- /* If the hint read from the %_stat table is not empty, check if the
- ** last entry in it specifies a relative level smaller than or equal
- ** to the level identified by the block above (if any). If so, this
- ** iteration of the loop will work on merging at the hinted level.
- */
- if( rc==SQLITE_OK && hint.n ){
- int nHint = hint.n;
- sqlite3_int64 iHintAbsLevel = 0; /* Hint level */
- int nHintSeg = 0; /* Hint number of segments */
-
- rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg);
- if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){
- iAbsLevel = iHintAbsLevel;
- nSeg = nHintSeg;
- bUseHint = 1;
- bDirtyHint = 1;
- }else{
- /* This undoes the effect of the HintPop() above - so that no entry
- ** is removed from the hint blob. */
- hint.n = nHint;
- }
- }
-
- /* If nSeg is less that zero, then there is no level with at least
- ** nMin segments and no hint in the %_stat table. No work to do.
- ** Exit early in this case. */
- if( nSeg<0 ) break;
-
- /* Open a cursor to iterate through the contents of the oldest nSeg
- ** indexes of absolute level iAbsLevel. If this cursor is opened using
- ** the 'hint' parameters, it is possible that there are less than nSeg
- ** segments available in level iAbsLevel. In this case, no work is
- ** done on iAbsLevel - fall through to the next iteration of the loop
- ** to start work on some other level. */
- memset(pWriter, 0, nAlloc);
- pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
- if( rc==SQLITE_OK ){
- rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);
- }
- if( SQLITE_OK==rc && pCsr->nSegment==nSeg
- && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))
- && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))
- ){
- int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
- rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
- if( rc==SQLITE_OK ){
- if( bUseHint && iIdx>0 ){
- const char *zKey = pCsr->zTerm;
- int nKey = pCsr->nTerm;
- rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
- }else{
- rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
- }
- }
-
- if( rc==SQLITE_OK && pWriter->nLeafEst ){
- fts3LogMerge(nSeg, iAbsLevel);
- do {
- rc = fts3IncrmergeAppend(p, pWriter, pCsr);
- if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr);
- if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK;
- }while( rc==SQLITE_ROW );
-
- /* Update or delete the input segments */
- if( rc==SQLITE_OK ){
- nRem -= (1 + pWriter->nWork);
- rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg);
- if( nSeg!=0 ){
- bDirtyHint = 1;
- fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc);
- }
- }
- }
-
- fts3IncrmergeRelease(p, pWriter, &rc);
- }
-
- sqlite3Fts3SegReaderFinish(pCsr);
- }
-
- /* Write the hint values into the %_stat table for the next incr-merger */
- if( bDirtyHint && rc==SQLITE_OK ){
- rc = fts3IncrmergeHintStore(p, &hint);
- }
-
- sqlite3_free(pWriter);
- sqlite3_free(hint.a);
- return rc;
-}
-
-/*
-** Convert the text beginning at *pz into an integer and return
-** its value. Advance *pz to point to the first character past
-** the integer.
-*/
-static int fts3Getint(const char **pz){
- const char *z = *pz;
- int i = 0;
- while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0';
- *pz = z;
- return i;
-}
-
-/*
-** Process statements of the form:
-**
-** INSERT INTO table(table) VALUES('merge=A,B');
-**
-** A and B are integers that decode to be the number of leaf pages
-** written for the merge, and the minimum number of segments on a level
-** before it will be selected for a merge, respectively.
-*/
-static int fts3DoIncrmerge(
- Fts3Table *p, /* FTS3 table handle */
- const char *zParam /* Nul-terminated string containing "A,B" */
-){
- int rc;
- int nMin = (FTS3_MERGE_COUNT / 2);
- int nMerge = 0;
- const char *z = zParam;
-
- /* Read the first integer value */
- nMerge = fts3Getint(&z);
-
- /* If the first integer value is followed by a ',', read the second
- ** integer value. */
- if( z[0]==',' && z[1]!='\0' ){
- z++;
- nMin = fts3Getint(&z);
- }
-
- if( z[0]!='\0' || nMin<2 ){
- rc = SQLITE_ERROR;
- }else{
- rc = SQLITE_OK;
- if( !p->bHasStat ){
- assert( p->bFts4==0 );
- sqlite3Fts3CreateStatTable(&rc, p);
- }
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
- }
- sqlite3Fts3SegmentsClose(p);
- }
- return rc;
-}
-
-/*
-** Process statements of the form:
-**
-** INSERT INTO table(table) VALUES('automerge=X');
-**
-** where X is an integer. X==0 means to turn automerge off. X!=0 means
-** turn it on. The setting is persistent.
-*/
-static int fts3DoAutoincrmerge(
- Fts3Table *p, /* FTS3 table handle */
- const char *zParam /* Nul-terminated string containing boolean */
-){
- int rc = SQLITE_OK;
- sqlite3_stmt *pStmt = 0;
- p->bAutoincrmerge = fts3Getint(&zParam)!=0;
- if( !p->bHasStat ){
- assert( p->bFts4==0 );
- sqlite3Fts3CreateStatTable(&rc, p);
- if( rc ) return rc;
- }
- rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
- if( rc ) return rc;;
- sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
- sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
- sqlite3_step(pStmt);
- rc = sqlite3_reset(pStmt);
- return rc;
-}
-
-/*
-** Return a 64-bit checksum for the FTS index entry specified by the
-** arguments to this function.
-*/
-static u64 fts3ChecksumEntry(
- const char *zTerm, /* Pointer to buffer containing term */
- int nTerm, /* Size of zTerm in bytes */
- int iLangid, /* Language id for current row */
- int iIndex, /* Index (0..Fts3Table.nIndex-1) */
- i64 iDocid, /* Docid for current row. */
- int iCol, /* Column number */
- int iPos /* Position */
-){
- int i;
- u64 ret = (u64)iDocid;
-
- ret += (ret<<3) + iLangid;
- ret += (ret<<3) + iIndex;
- ret += (ret<<3) + iCol;
- ret += (ret<<3) + iPos;
- for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i];
-
- return ret;
-}
-
-/*
-** Return a checksum of all entries in the FTS index that correspond to
-** language id iLangid. The checksum is calculated by XORing the checksums
-** of each individual entry (see fts3ChecksumEntry()) together.
-**
-** If successful, the checksum value is returned and *pRc set to SQLITE_OK.
-** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The
-** return value is undefined in this case.
-*/
-static u64 fts3ChecksumIndex(
- Fts3Table *p, /* FTS3 table handle */
- int iLangid, /* Language id to return cksum for */
- int iIndex, /* Index to cksum (0..p->nIndex-1) */
- int *pRc /* OUT: Return code */
-){
- Fts3SegFilter filter;
- Fts3MultiSegReader csr;
- int rc;
- u64 cksum = 0;
-
- assert( *pRc==SQLITE_OK );
-
- memset(&filter, 0, sizeof(filter));
- memset(&csr, 0, sizeof(csr));
- filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
- filter.flags |= FTS3_SEGMENT_SCAN;
-
- rc = sqlite3Fts3SegReaderCursor(
- p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr
- );
- if( rc==SQLITE_OK ){
- rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
- }
-
- if( rc==SQLITE_OK ){
- while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){
- char *pCsr = csr.aDoclist;
- char *pEnd = &pCsr[csr.nDoclist];
-
- i64 iDocid = 0;
- i64 iCol = 0;
- i64 iPos = 0;
-
- pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid);
- while( pCsr<pEnd ){
- i64 iVal = 0;
- pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
- if( pCsr<pEnd ){
- if( iVal==0 || iVal==1 ){
- iCol = 0;
- iPos = 0;
- if( iVal ){
- pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
- }else{
- pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
- iDocid += iVal;
- }
- }else{
- iPos += (iVal - 2);
- cksum = cksum ^ fts3ChecksumEntry(
- csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid,
- (int)iCol, (int)iPos
- );
- }
- }
- }
- }
- }
- sqlite3Fts3SegReaderFinish(&csr);
-
- *pRc = rc;
- return cksum;
-}
-
-/*
-** Check if the contents of the FTS index match the current contents of the
-** content table. If no error occurs and the contents do match, set *pbOk
-** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk
-** to false before returning.
-**
-** If an error occurs (e.g. an OOM or IO error), return an SQLite error
-** code. The final value of *pbOk is undefined in this case.
-*/
-static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
- int rc = SQLITE_OK; /* Return code */
- u64 cksum1 = 0; /* Checksum based on FTS index contents */
- u64 cksum2 = 0; /* Checksum based on %_content contents */
- sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */
-
- /* This block calculates the checksum according to the FTS index. */
- rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
- if( rc==SQLITE_OK ){
- int rc2;
- sqlite3_bind_int(pAllLangid, 1, p->nIndex);
- while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
- int iLangid = sqlite3_column_int(pAllLangid, 0);
- int i;
- for(i=0; i<p->nIndex; i++){
- cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc);
- }
- }
- rc2 = sqlite3_reset(pAllLangid);
- if( rc==SQLITE_OK ) rc = rc2;
- }
-
- /* This block calculates the checksum according to the %_content table */
- rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
- if( rc==SQLITE_OK ){
- sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
- sqlite3_stmt *pStmt = 0;
- char *zSql;
-
- zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
- if( !zSql ){
- rc = SQLITE_NOMEM;
- }else{
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
- sqlite3_free(zSql);
- }
-
- while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
- i64 iDocid = sqlite3_column_int64(pStmt, 0);
- int iLang = langidFromSelect(p, pStmt);
- int iCol;
-
- for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
- const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
- int nText = sqlite3_column_bytes(pStmt, iCol+1);
- sqlite3_tokenizer_cursor *pT = 0;
-
- rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);
- while( rc==SQLITE_OK ){
- char const *zToken; /* Buffer containing token */
- int nToken = 0; /* Number of bytes in token */
- int iDum1 = 0, iDum2 = 0; /* Dummy variables */
- int iPos = 0; /* Position of token in zText */
-
- rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
- if( rc==SQLITE_OK ){
- int i;
- cksum2 = cksum2 ^ fts3ChecksumEntry(
- zToken, nToken, iLang, 0, iDocid, iCol, iPos
- );
- for(i=1; i<p->nIndex; i++){
- if( p->aIndex[i].nPrefix<=nToken ){
- cksum2 = cksum2 ^ fts3ChecksumEntry(
- zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
- );
- }
- }
- }
- }
- if( pT ) pModule->xClose(pT);
- if( rc==SQLITE_DONE ) rc = SQLITE_OK;
- }
- }
-
- sqlite3_finalize(pStmt);
- }
-
- *pbOk = (cksum1==cksum2);
- return rc;
-}
-
-/*
-** Run the integrity-check. If no error occurs and the current contents of
-** the FTS index are correct, return SQLITE_OK. Or, if the contents of the
-** FTS index are incorrect, return SQLITE_CORRUPT_VTAB.
-**
-** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite
-** error code.
-**
-** The integrity-check works as follows. For each token and indexed token
-** prefix in the document set, a 64-bit checksum is calculated (by code
-** in fts3ChecksumEntry()) based on the following:
-**
-** + The index number (0 for the main index, 1 for the first prefix
-** index etc.),
-** + The token (or token prefix) text itself,
-** + The language-id of the row it appears in,
-** + The docid of the row it appears in,
-** + The column it appears in, and
-** + The tokens position within that column.
-**
-** The checksums for all entries in the index are XORed together to create
-** a single checksum for the entire index.
-**
-** The integrity-check code calculates the same checksum in two ways:
-**
-** 1. By scanning the contents of the FTS index, and
-** 2. By scanning and tokenizing the content table.
-**
-** If the two checksums are identical, the integrity-check is deemed to have
-** passed.
-*/
-static int fts3DoIntegrityCheck(
- Fts3Table *p /* FTS3 table handle */
-){
- int rc;
- int bOk = 0;
- rc = fts3IntegrityCheck(p, &bOk);
- if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB;
- return rc;
-}
-
-/*
-** Handle a 'special' INSERT of the form:
-**
-** "INSERT INTO tbl(tbl) VALUES(<expr>)"
-**
-** Argument pVal contains the result of <expr>. Currently the only
-** meaningful value to insert is the text 'optimize'.
-*/
-static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
- int rc; /* Return Code */
- const char *zVal = (const char *)sqlite3_value_text(pVal);
- int nVal = sqlite3_value_bytes(pVal);
-
- if( !zVal ){
- return SQLITE_NOMEM;
- }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
- rc = fts3DoOptimize(p, 0);
- }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){
- rc = fts3DoRebuild(p);
- }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){
- rc = fts3DoIntegrityCheck(p);
- }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){
- rc = fts3DoIncrmerge(p, &zVal[6]);
- }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){
- rc = fts3DoAutoincrmerge(p, &zVal[10]);
-#ifdef SQLITE_TEST
- }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
- p->nNodeSize = atoi(&zVal[9]);
- rc = SQLITE_OK;
- }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
- p->nMaxPendingData = atoi(&zVal[11]);
- rc = SQLITE_OK;
-#endif
- }else{
- rc = SQLITE_ERROR;
- }
-
- return rc;
-}
-
-#ifndef SQLITE_DISABLE_FTS4_DEFERRED
-/*
-** Delete all cached deferred doclists. Deferred doclists are cached
-** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
-*/
-void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
- Fts3DeferredToken *pDef;
- for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
- fts3PendingListDelete(pDef->pList);
- pDef->pList = 0;
- }
-}
-
-/*
-** Free all entries in the pCsr->pDeffered list. Entries are added to
-** this list using sqlite3Fts3DeferToken().
-*/
-void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){
- Fts3DeferredToken *pDef;
- Fts3DeferredToken *pNext;
- for(pDef=pCsr->pDeferred; pDef; pDef=pNext){
- pNext = pDef->pNext;
- fts3PendingListDelete(pDef->pList);
- sqlite3_free(pDef);
- }
- pCsr->pDeferred = 0;
-}
-
-/*
-** Generate deferred-doclists for all tokens in the pCsr->pDeferred list
-** based on the row that pCsr currently points to.
-**
-** A deferred-doclist is like any other doclist with position information
-** included, except that it only contains entries for a single row of the
-** table, not for all rows.
-*/
-int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
- int rc = SQLITE_OK; /* Return code */
- if( pCsr->pDeferred ){
- int i; /* Used to iterate through table columns */
- sqlite3_int64 iDocid; /* Docid of the row pCsr points to */
- Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */
-
- Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
- sqlite3_tokenizer *pT = p->pTokenizer;
- sqlite3_tokenizer_module const *pModule = pT->pModule;
-
- assert( pCsr->isRequireSeek==0 );
- iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
-
- for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
- const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
- sqlite3_tokenizer_cursor *pTC = 0;
-
- rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
- while( rc==SQLITE_OK ){
- char const *zToken; /* Buffer containing token */
- int nToken = 0; /* Number of bytes in token */
- int iDum1 = 0, iDum2 = 0; /* Dummy variables */
- int iPos = 0; /* Position of token in zText */
-
- rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
- for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
- Fts3PhraseToken *pPT = pDef->pToken;
- if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
- && (pPT->bFirst==0 || iPos==0)
- && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
- && (0==memcmp(zToken, pPT->z, pPT->n))
- ){
- fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
- }
- }
- }
- if( pTC ) pModule->xClose(pTC);
- if( rc==SQLITE_DONE ) rc = SQLITE_OK;
- }
-
- for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
- if( pDef->pList ){
- rc = fts3PendingListAppendVarint(&pDef->pList, 0);
- }
- }
- }
-
- return rc;
-}
-
-int sqlite3Fts3DeferredTokenList(
- Fts3DeferredToken *p,
- char **ppData,
- int *pnData
-){
- char *pRet;
- int nSkip;
- sqlite3_int64 dummy;
-
- *ppData = 0;
- *pnData = 0;
-
- if( p->pList==0 ){
- return SQLITE_OK;
- }
-
- pRet = (char *)sqlite3_malloc(p->pList->nData);
- if( !pRet ) return SQLITE_NOMEM;
-
- nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy);
- *pnData = p->pList->nData - nSkip;
- *ppData = pRet;
-
- memcpy(pRet, &p->pList->aData[nSkip], *pnData);
- return SQLITE_OK;
-}
-
-/*
-** Add an entry for token pToken to the pCsr->pDeferred list.
-*/
-int sqlite3Fts3DeferToken(
- Fts3Cursor *pCsr, /* Fts3 table cursor */
- Fts3PhraseToken *pToken, /* Token to defer */
- int iCol /* Column that token must appear in (or -1) */
-){
- Fts3DeferredToken *pDeferred;
- pDeferred = sqlite3_malloc(sizeof(*pDeferred));
- if( !pDeferred ){
- return SQLITE_NOMEM;
- }
- memset(pDeferred, 0, sizeof(*pDeferred));
- pDeferred->pToken = pToken;
- pDeferred->pNext = pCsr->pDeferred;
- pDeferred->iCol = iCol;
- pCsr->pDeferred = pDeferred;
-
- assert( pToken->pDeferred==0 );
- pToken->pDeferred = pDeferred;
-
- return SQLITE_OK;
-}
-#endif
-
-/*
-** SQLite value pRowid contains the rowid of a row that may or may not be
-** present in the FTS3 table. If it is, delete it and adjust the contents
-** of subsiduary data structures accordingly.
-*/
-static int fts3DeleteByRowid(
- Fts3Table *p,
- sqlite3_value *pRowid,
- int *pnChng, /* IN/OUT: Decrement if row is deleted */
- u32 *aSzDel
-){
- int rc = SQLITE_OK; /* Return code */
- int bFound = 0; /* True if *pRowid really is in the table */
-
- fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound);
- if( bFound && rc==SQLITE_OK ){
- int isEmpty = 0; /* Deleting *pRowid leaves the table empty */
- rc = fts3IsEmpty(p, pRowid, &isEmpty);
- if( rc==SQLITE_OK ){
- if( isEmpty ){
- /* Deleting this row means the whole table is empty. In this case
- ** delete the contents of all three tables and throw away any
- ** data in the pendingTerms hash table. */
- rc = fts3DeleteAll(p, 1);
- *pnChng = 0;
- memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2);
- }else{
- *pnChng = *pnChng - 1;
- if( p->zContentTbl==0 ){
- fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
- }
- if( p->bHasDocsize ){
- fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
- }
- }
- }
- }
-
- return rc;
-}
-
-/*
-** This function does the work for the xUpdate method of FTS3 virtual
-** tables. The schema of the virtual table being:
-**
-** CREATE TABLE <table name>(
-** <user columns>,
-** <table name> HIDDEN,
-** docid HIDDEN,
-** <langid> HIDDEN
-** );
-**
-**
-*/
-int sqlite3Fts3UpdateMethod(
- sqlite3_vtab *pVtab, /* FTS3 vtab object */
- int nArg, /* Size of argument array */
- sqlite3_value **apVal, /* Array of arguments */
- sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
-){
- Fts3Table *p = (Fts3Table *)pVtab;
- int rc = SQLITE_OK; /* Return Code */
- int isRemove = 0; /* True for an UPDATE or DELETE */
- u32 *aSzIns = 0; /* Sizes of inserted documents */
- u32 *aSzDel = 0; /* Sizes of deleted documents */
- int nChng = 0; /* Net change in number of documents */
- int bInsertDone = 0;
-
- assert( p->pSegments==0 );
- assert(
- nArg==1 /* DELETE operations */
- || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */
- );
-
- /* Check for a "special" INSERT operation. One of the form:
- **
- ** INSERT INTO xyz(xyz) VALUES('command');
- */
- if( nArg>1
- && sqlite3_value_type(apVal[0])==SQLITE_NULL
- && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL
- ){
- rc = fts3SpecialInsert(p, apVal[p->nColumn+2]);
- goto update_out;
- }
-
- if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
- rc = SQLITE_CONSTRAINT;
- goto update_out;
- }
-
- /* Allocate space to hold the change in document sizes */
- aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 );
- if( aSzDel==0 ){
- rc = SQLITE_NOMEM;
- goto update_out;
- }
- aSzIns = &aSzDel[p->nColumn+1];
- memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);
-
- /* If this is an INSERT operation, or an UPDATE that modifies the rowid
- ** value, then this operation requires constraint handling.
- **
- ** If the on-conflict mode is REPLACE, this means that the existing row
- ** should be deleted from the database before inserting the new row. Or,
- ** if the on-conflict mode is other than REPLACE, then this method must
- ** detect the conflict and return SQLITE_CONSTRAINT before beginning to
- ** modify the database file.
- */
- if( nArg>1 && p->zContentTbl==0 ){
- /* Find the value object that holds the new rowid value. */
- sqlite3_value *pNewRowid = apVal[3+p->nColumn];
- if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){
- pNewRowid = apVal[1];
- }
-
- if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && (
- sqlite3_value_type(apVal[0])==SQLITE_NULL
- || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid)
- )){
- /* The new rowid is not NULL (in this case the rowid will be
- ** automatically assigned and there is no chance of a conflict), and
- ** the statement is either an INSERT or an UPDATE that modifies the
- ** rowid column. So if the conflict mode is REPLACE, then delete any
- ** existing row with rowid=pNewRowid.
- **
- ** Or, if the conflict mode is not REPLACE, insert the new record into
- ** the %_content table. If we hit the duplicate rowid constraint (or any
- ** other error) while doing so, return immediately.
- **
- ** This branch may also run if pNewRowid contains a value that cannot
- ** be losslessly converted to an integer. In this case, the eventual
- ** call to fts3InsertData() (either just below or further on in this
- ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is
- ** invoked, it will delete zero rows (since no row will have
- ** docid=$pNewRowid if $pNewRowid is not an integer value).
- */
- if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){
- rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel);
- }else{
- rc = fts3InsertData(p, apVal, pRowid);
- bInsertDone = 1;
- }
- }
- }
- if( rc!=SQLITE_OK ){
- goto update_out;
- }
-
- /* If this is a DELETE or UPDATE operation, remove the old record. */
- if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
- assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
- rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
- isRemove = 1;
- }
-
- /* If this is an INSERT or UPDATE operation, insert the new record. */
- if( nArg>1 && rc==SQLITE_OK ){
- int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
- if( bInsertDone==0 ){
- rc = fts3InsertData(p, apVal, pRowid);
- if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
- rc = FTS_CORRUPT_VTAB;
- }
- }
- if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
- rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
- }
- if( rc==SQLITE_OK ){
- assert( p->iPrevDocid==*pRowid );
- rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
- }
- if( p->bHasDocsize ){
- fts3InsertDocsize(&rc, p, aSzIns);
- }
- nChng++;
- }
-
- if( p->bFts4 ){
- fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
- }
-
- update_out:
- sqlite3_free(aSzDel);
- sqlite3Fts3SegmentsClose(p);
- return rc;
-}
-
-/*
-** Flush any data in the pending-terms hash table to disk. If successful,
-** merge all segments in the database (including the new segment, if
-** there was any data to flush) into a single segment.
-*/
-int sqlite3Fts3Optimize(Fts3Table *p){
- int rc;
- rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
- if( rc==SQLITE_OK ){
- rc = fts3DoOptimize(p, 1);
- if( rc==SQLITE_OK || rc==SQLITE_DONE ){
- int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
- if( rc2!=SQLITE_OK ) rc = rc2;
- }else{
- sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
- sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
- }
- }
- sqlite3Fts3SegmentsClose(p);
- return rc;
-}
-
-#endif
diff --git a/src/libtracker-fts/fts5.c b/src/libtracker-fts/fts5.c
new file mode 100644
index 000000000..5e2fb1471
--- /dev/null
+++ b/src/libtracker-fts/fts5.c
@@ -0,0 +1,20402 @@
+
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5)
+
+#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
+# define NDEBUG 1
+#endif
+#if defined(NDEBUG) && defined(SQLITE_DEBUG)
+# undef NDEBUG
+#endif
+
+#line 1 "fts5.h"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Interfaces to extend FTS5. Using the interfaces defined in this file,
+** FTS5 may be extended with:
+**
+** * custom tokenizers, and
+** * custom auxiliary functions.
+*/
+
+
+#ifndef _FTS5_H
+#define _FTS5_H
+
+#include "sqlite3.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*************************************************************************
+** CUSTOM AUXILIARY FUNCTIONS
+**
+** Virtual table implementations may overload SQL functions by implementing
+** the sqlite3_module.xFindFunction() method.
+*/
+
+typedef struct Fts5ExtensionApi Fts5ExtensionApi;
+typedef struct Fts5Context Fts5Context;
+typedef struct Fts5PhraseIter Fts5PhraseIter;
+
+typedef void (*fts5_extension_function)(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ sqlite3_context *pCtx, /* Context for returning result/error */
+ int nVal, /* Number of values in apVal[] array */
+ sqlite3_value **apVal /* Array of trailing arguments */
+);
+
+struct Fts5PhraseIter {
+ const unsigned char *a;
+ const unsigned char *b;
+};
+
+/*
+** EXTENSION API FUNCTIONS
+**
+** xUserData(pFts):
+** Return a copy of the context pointer the extension function was
+** registered with.
+**
+** xColumnTotalSize(pFts, iCol, pnToken):
+** If parameter iCol is less than zero, set output variable *pnToken
+** to the total number of tokens in the FTS5 table. Or, if iCol is
+** non-negative but less than the number of columns in the table, return
+** the total number of tokens in column iCol, considering all rows in
+** the FTS5 table.
+**
+** If parameter iCol is greater than or equal to the number of columns
+** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
+** an OOM condition or IO error), an appropriate SQLite error code is
+** returned.
+**
+** xColumnCount(pFts):
+** Return the number of columns in the table.
+**
+** xColumnSize(pFts, iCol, pnToken):
+** If parameter iCol is less than zero, set output variable *pnToken
+** to the total number of tokens in the current row. Or, if iCol is
+** non-negative but less than the number of columns in the table, set
+** *pnToken to the number of tokens in column iCol of the current row.
+**
+** If parameter iCol is greater than or equal to the number of columns
+** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
+** an OOM condition or IO error), an appropriate SQLite error code is
+** returned.
+**
+** This function may be quite inefficient if used with an FTS5 table
+** created with the "columnsize=0" option.
+**
+** xColumnText:
+** This function attempts to retrieve the text of column iCol of the
+** current document. If successful, (*pz) is set to point to a buffer
+** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
+** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
+** if an error occurs, an SQLite error code is returned and the final values
+** of (*pz) and (*pn) are undefined.
+**
+** xPhraseCount:
+** Returns the number of phrases in the current query expression.
+**
+** xPhraseSize:
+** Returns the number of tokens in phrase iPhrase of the query. Phrases
+** are numbered starting from zero.
+**
+** xInstCount:
+** Set *pnInst to the total number of occurrences of all phrases within
+** the query within the current row. Return SQLITE_OK if successful, or
+** an error code (i.e. SQLITE_NOMEM) if an error occurs.
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" or "detail=column" option. If the FTS5 table is created
+** with either "detail=none" or "detail=column" and "content=" option
+** (i.e. if it is a contentless table), then this API always returns 0.
+**
+** xInst:
+** Query for the details of phrase match iIdx within the current row.
+** Phrase matches are numbered starting from zero, so the iIdx argument
+** should be greater than or equal to zero and smaller than the value
+** output by xInstCount().
+**
+** Usually, output parameter *piPhrase is set to the phrase number, *piCol
+** to the column in which it occurs and *piOff the token offset of the
+** first token of the phrase. The exception is if the table was created
+** with the offsets=0 option specified. In this case *piOff is always
+** set to -1.
+**
+** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
+** if an error occurs.
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" or "detail=column" option.
+**
+** xRowid:
+** Returns the rowid of the current row.
+**
+** xTokenize:
+** Tokenize text using the tokenizer belonging to the FTS5 table.
+**
+** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
+** This API function is used to query the FTS table for phrase iPhrase
+** of the current query. Specifically, a query equivalent to:
+**
+** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
+**
+** with $p set to a phrase equivalent to the phrase iPhrase of the
+** current query is executed. For each row visited, the callback function
+** passed as the fourth argument is invoked. The context and API objects
+** passed to the callback function may be used to access the properties of
+** each matched row. Invoking Api.xUserData() returns a copy of the pointer
+** passed as the third argument to pUserData.
+**
+** If the callback function returns any value other than SQLITE_OK, the
+** query is abandoned and the xQueryPhrase function returns immediately.
+** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
+** Otherwise, the error code is propagated upwards.
+**
+** If the query runs to completion without incident, SQLITE_OK is returned.
+** Or, if some error occurs before the query completes or is aborted by
+** the callback, an SQLite error code is returned.
+**
+**
+** xSetAuxdata(pFts5, pAux, xDelete)
+**
+** Save the pointer passed as the second argument as the extension functions
+** "auxiliary data". The pointer may then be retrieved by the current or any
+** future invocation of the same fts5 extension function made as part of
+** of the same MATCH query using the xGetAuxdata() API.
+**
+** Each extension function is allocated a single auxiliary data slot for
+** each FTS query (MATCH expression). If the extension function is invoked
+** more than once for a single FTS query, then all invocations share a
+** single auxiliary data context.
+**
+** If there is already an auxiliary data pointer when this function is
+** invoked, then it is replaced by the new pointer. If an xDelete callback
+** was specified along with the original pointer, it is invoked at this
+** point.
+**
+** The xDelete callback, if one is specified, is also invoked on the
+** auxiliary data pointer after the FTS5 query has finished.
+**
+** If an error (e.g. an OOM condition) occurs within this function, an
+** the auxiliary data is set to NULL and an error code returned. If the
+** xDelete parameter was not NULL, it is invoked on the auxiliary data
+** pointer before returning.
+**
+**
+** xGetAuxdata(pFts5, bClear)
+**
+** Returns the current auxiliary data pointer for the fts5 extension
+** function. See the xSetAuxdata() method for details.
+**
+** If the bClear argument is non-zero, then the auxiliary data is cleared
+** (set to NULL) before this function returns. In this case the xDelete,
+** if any, is not invoked.
+**
+**
+** xRowCount(pFts5, pnRow)
+**
+** This function is used to retrieve the total number of rows in the table.
+** In other words, the same value that would be returned by:
+**
+** SELECT count(*) FROM ftstable;
+**
+** xPhraseFirst()
+** This function is used, along with type Fts5PhraseIter and the xPhraseNext
+** method, to iterate through all instances of a single query phrase within
+** the current row. This is the same information as is accessible via the
+** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
+** to use, this API may be faster under some circumstances. To iterate
+** through instances of phrase iPhrase, use the following code:
+**
+** Fts5PhraseIter iter;
+** int iCol, iOff;
+** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
+** iCol>=0;
+** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
+** ){
+** // An instance of phrase iPhrase at offset iOff of column iCol
+** }
+**
+** The Fts5PhraseIter structure is defined above. Applications should not
+** modify this structure directly - it should only be used as shown above
+** with the xPhraseFirst() and xPhraseNext() API methods (and by
+** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" or "detail=column" option. If the FTS5 table is created
+** with either "detail=none" or "detail=column" and "content=" option
+** (i.e. if it is a contentless table), then this API always iterates
+** through an empty set (all calls to xPhraseFirst() set iCol to -1).
+**
+** xPhraseNext()
+** See xPhraseFirst above.
+**
+** xPhraseFirstColumn()
+** This function and xPhraseNextColumn() are similar to the xPhraseFirst()
+** and xPhraseNext() APIs described above. The difference is that instead
+** of iterating through all instances of a phrase in the current row, these
+** APIs are used to iterate through the set of columns in the current row
+** that contain one or more instances of a specified phrase. For example:
+**
+** Fts5PhraseIter iter;
+** int iCol;
+** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
+** iCol>=0;
+** pApi->xPhraseNextColumn(pFts, &iter, &iCol)
+** ){
+** // Column iCol contains at least one instance of phrase iPhrase
+** }
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" option. If the FTS5 table is created with either
+** "detail=none" "content=" option (i.e. if it is a contentless table),
+** then this API always iterates through an empty set (all calls to
+** xPhraseFirstColumn() set iCol to -1).
+**
+** The information accessed using this API and its companion
+** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
+** (or xInst/xInstCount). The chief advantage of this API is that it is
+** significantly more efficient than those alternatives when used with
+** "detail=column" tables.
+**
+** xPhraseNextColumn()
+** See xPhraseFirstColumn above.
+*/
+struct Fts5ExtensionApi {
+ int iVersion; /* Currently always set to 3 */
+
+ void *(*xUserData)(Fts5Context*);
+
+ int (*xColumnCount)(Fts5Context*);
+ int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
+ int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
+
+ int (*xTokenize)(Fts5Context*,
+ const char *pText, int nText, /* Text to tokenize */
+ void *pCtx, /* Context passed to xToken() */
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
+ );
+
+ int (*xPhraseCount)(Fts5Context*);
+ int (*xPhraseSize)(Fts5Context*, int iPhrase);
+
+ int (*xInstCount)(Fts5Context*, int *pnInst);
+ int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
+
+ sqlite3_int64 (*xRowid)(Fts5Context*);
+ int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
+ int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
+
+ int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
+ int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
+ );
+ int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
+ void *(*xGetAuxdata)(Fts5Context*, int bClear);
+
+ int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
+ void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
+
+ int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
+ void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
+};
+
+/*
+** CUSTOM AUXILIARY FUNCTIONS
+*************************************************************************/
+
+/*************************************************************************
+** CUSTOM TOKENIZERS
+**
+** Applications may also register custom tokenizer types. A tokenizer
+** is registered by providing fts5 with a populated instance of the
+** following structure. All structure methods must be defined, setting
+** any member of the fts5_tokenizer struct to NULL leads to undefined
+** behaviour. The structure methods are expected to function as follows:
+**
+** xCreate:
+** This function is used to allocate and inititalize a tokenizer instance.
+** A tokenizer instance is required to actually tokenize text.
+**
+** The first argument passed to this function is a copy of the (void*)
+** pointer provided by the application when the fts5_tokenizer object
+** was registered with FTS5 (the third argument to xCreateTokenizer()).
+** The second and third arguments are an array of nul-terminated strings
+** containing the tokenizer arguments, if any, specified following the
+** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
+** to create the FTS5 table.
+**
+** The final argument is an output variable. If successful, (*ppOut)
+** should be set to point to the new tokenizer handle and SQLITE_OK
+** returned. If an error occurs, some value other than SQLITE_OK should
+** be returned. In this case, fts5 assumes that the final value of *ppOut
+** is undefined.
+**
+** xDelete:
+** This function is invoked to delete a tokenizer handle previously
+** allocated using xCreate(). Fts5 guarantees that this function will
+** be invoked exactly once for each successful call to xCreate().
+**
+** xTokenize:
+** This function is expected to tokenize the nText byte string indicated
+** by argument pText. pText may or may not be nul-terminated. The first
+** argument passed to this function is a pointer to an Fts5Tokenizer object
+** returned by an earlier call to xCreate().
+**
+** The second argument indicates the reason that FTS5 is requesting
+** tokenization of the supplied text. This is always one of the following
+** four values:
+**
+** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
+** or removed from the FTS table. The tokenizer is being invoked to
+** determine the set of tokens to add to (or delete from) the
+** FTS index.
+**
+** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
+** against the FTS index. The tokenizer is being called to tokenize
+** a bareword or quoted string specified as part of the query.
+**
+** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
+** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
+** followed by a "*" character, indicating that the last token
+** returned by the tokenizer will be treated as a token prefix.
+**
+** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
+** satisfy an fts5_api.xTokenize() request made by an auxiliary
+** function. Or an fts5_api.xColumnSize() request made by the same
+** on a columnsize=0 database.
+** </ul>
+**
+** For each token in the input string, the supplied callback xToken() must
+** be invoked. The first argument to it should be a copy of the pointer
+** passed as the second argument to xTokenize(). The third and fourth
+** arguments are a pointer to a buffer containing the token text, and the
+** size of the token in bytes. The 4th and 5th arguments are the byte offsets
+** of the first byte of and first byte immediately following the text from
+** which the token is derived within the input.
+**
+** The second argument passed to the xToken() callback ("tflags") should
+** normally be set to 0. The exception is if the tokenizer supports
+** synonyms. In this case see the discussion below for details.
+**
+** FTS5 assumes the xToken() callback is invoked for each token in the
+** order that they occur within the input text.
+**
+** If an xToken() callback returns any value other than SQLITE_OK, then
+** the tokenization should be abandoned and the xTokenize() method should
+** immediately return a copy of the xToken() return value. Or, if the
+** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
+** if an error occurs with the xTokenize() implementation itself, it
+** may abandon the tokenization and return any error code other than
+** SQLITE_OK or SQLITE_DONE.
+**
+** SYNONYM SUPPORT
+**
+** Custom tokenizers may also support synonyms. Consider a case in which a
+** user wishes to query for a phrase such as "first place". Using the
+** built-in tokenizers, the FTS5 query 'first + place' will match instances
+** of "first place" within the document set, but not alternative forms
+** such as "1st place". In some applications, it would be better to match
+** all instances of "first place" or "1st place" regardless of which form
+** the user specified in the MATCH query text.
+**
+** There are several ways to approach this in FTS5:
+**
+** <ol><li> By mapping all synonyms to a single token. In this case, the
+** In the above example, this means that the tokenizer returns the
+** same token for inputs "first" and "1st". Say that token is in
+** fact "first", so that when the user inserts the document "I won
+** 1st place" entries are added to the index for tokens "i", "won",
+** "first" and "place". If the user then queries for '1st + place',
+** the tokenizer substitutes "first" for "1st" and the query works
+** as expected.
+**
+** <li> By adding multiple synonyms for a single term to the FTS index.
+** In this case, when tokenizing query text, the tokenizer may
+** provide multiple synonyms for a single term within the document.
+** FTS5 then queries the index for each synonym individually. For
+** example, faced with the query:
+**
+** <codeblock>
+** ... MATCH 'first place'</codeblock>
+**
+** the tokenizer offers both "1st" and "first" as synonyms for the
+** first token in the MATCH query and FTS5 effectively runs a query
+** similar to:
+**
+** <codeblock>
+** ... MATCH '(first OR 1st) place'</codeblock>
+**
+** except that, for the purposes of auxiliary functions, the query
+** still appears to contain just two phrases - "(first OR 1st)"
+** being treated as a single phrase.
+**
+** <li> By adding multiple synonyms for a single term to the FTS index.
+** Using this method, when tokenizing document text, the tokenizer
+** provides multiple synonyms for each token. So that when a
+** document such as "I won first place" is tokenized, entries are
+** added to the FTS index for "i", "won", "first", "1st" and
+** "place".
+**
+** This way, even if the tokenizer does not provide synonyms
+** when tokenizing query text (it should not - to do would be
+** inefficient), it doesn't matter if the user queries for
+** 'first + place' or '1st + place', as there are entires in the
+** FTS index corresponding to both forms of the first token.
+** </ol>
+**
+** Whether it is parsing document or query text, any call to xToken that
+** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
+** is considered to supply a synonym for the previous token. For example,
+** when parsing the document "I won first place", a tokenizer that supports
+** synonyms would call xToken() 5 times, as follows:
+**
+** <codeblock>
+** xToken(pCtx, 0, "i", 1, 0, 1);
+** xToken(pCtx, 0, "won", 3, 2, 5);
+** xToken(pCtx, 0, "first", 5, 6, 11);
+** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
+** xToken(pCtx, 0, "place", 5, 12, 17);
+**</codeblock>
+**
+** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
+** xToken() is called. Multiple synonyms may be specified for a single token
+** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
+** There is no limit to the number of synonyms that may be provided for a
+** single token.
+**
+** In many cases, method (1) above is the best approach. It does not add
+** extra data to the FTS index or require FTS5 to query for multiple terms,
+** so it is efficient in terms of disk space and query speed. However, it
+** does not support prefix queries very well. If, as suggested above, the
+** token "first" is subsituted for "1st" by the tokenizer, then the query:
+**
+** <codeblock>
+** ... MATCH '1s*'</codeblock>
+**
+** will not match documents that contain the token "1st" (as the tokenizer
+** will probably not map "1s" to any prefix of "first").
+**
+** For full prefix support, method (3) may be preferred. In this case,
+** because the index contains entries for both "first" and "1st", prefix
+** queries such as 'fi*' or '1s*' will match correctly. However, because
+** extra entries are added to the FTS index, this method uses more space
+** within the database.
+**
+** Method (2) offers a midpoint between (1) and (3). Using this method,
+** a query such as '1s*' will match documents that contain the literal
+** token "1st", but not "first" (assuming the tokenizer is not able to
+** provide synonyms for prefixes). However, a non-prefix query like '1st'
+** will match against "1st" and "first". This method does not require
+** extra disk space, as no extra entries are added to the FTS index.
+** On the other hand, it may require more CPU cycles to run MATCH queries,
+** as separate queries of the FTS index are required for each synonym.
+**
+** When using methods (2) or (3), it is important that the tokenizer only
+** provide synonyms when tokenizing document text (method (2)) or query
+** text (method (3)), not both. Doing so will not cause any errors, but is
+** inefficient.
+*/
+typedef struct Fts5Tokenizer Fts5Tokenizer;
+typedef struct fts5_tokenizer fts5_tokenizer;
+struct fts5_tokenizer {
+ int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
+ void (*xDelete)(Fts5Tokenizer*);
+ int (*xTokenize)(Fts5Tokenizer*,
+ void *pCtx,
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
+ const char *pText, int nText,
+ int (*xToken)(
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Pointer to buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStart, /* Byte offset of token within input text */
+ int iEnd /* Byte offset of end of token within input text */
+ )
+ );
+};
+
+/* Flags that may be passed as the third argument to xTokenize() */
+#define FTS5_TOKENIZE_QUERY 0x0001
+#define FTS5_TOKENIZE_PREFIX 0x0002
+#define FTS5_TOKENIZE_DOCUMENT 0x0004
+#define FTS5_TOKENIZE_AUX 0x0008
+
+/* Flags that may be passed by the tokenizer implementation back to FTS5
+** as the third argument to the supplied xToken callback. */
+#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
+
+/*
+** END OF CUSTOM TOKENIZERS
+*************************************************************************/
+
+/*************************************************************************
+** FTS5 EXTENSION REGISTRATION API
+*/
+typedef struct fts5_api fts5_api;
+struct fts5_api {
+ int iVersion; /* Currently always set to 2 */
+
+ /* Create a new tokenizer */
+ int (*xCreateTokenizer)(
+ fts5_api *pApi,
+ const char *zName,
+ void *pContext,
+ fts5_tokenizer *pTokenizer,
+ void (*xDestroy)(void*)
+ );
+
+ /* Find an existing tokenizer */
+ int (*xFindTokenizer)(
+ fts5_api *pApi,
+ const char *zName,
+ void **ppContext,
+ fts5_tokenizer *pTokenizer
+ );
+
+ /* Create a new auxiliary function */
+ int (*xCreateFunction)(
+ fts5_api *pApi,
+ const char *zName,
+ void *pContext,
+ fts5_extension_function xFunction,
+ void (*xDestroy)(void*)
+ );
+};
+
+/*
+** END OF REGISTRATION API
+*************************************************************************/
+
+#ifdef __cplusplus
+} /* end of the 'extern "C"' block */
+#endif
+
+#endif /* _FTS5_H */
+
+
+#line 1 "fts5Int.h"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+#ifndef _FTS5INT_H
+#define _FTS5INT_H
+
+/* #include "fts5.h" */
+#include "sqlite3ext.h"
+SQLITE_EXTENSION_INIT1
+
+#include <string.h>
+#include <assert.h>
+
+#ifndef SQLITE_AMALGAMATION
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+typedef unsigned short u16;
+typedef short i16;
+typedef sqlite3_int64 i64;
+typedef sqlite3_uint64 u64;
+
+#define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0])))
+
+#define testcase(x)
+#define ALWAYS(x) 1
+#define NEVER(x) 0
+
+#define MIN(x,y) (((x) < (y)) ? (x) : (y))
+#define MAX(x,y) (((x) > (y)) ? (x) : (y))
+
+/*
+** Constants for the largest and smallest possible 64-bit signed integers.
+*/
+# define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
+# define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
+
+#endif
+
+
+/*
+** Maximum number of prefix indexes on single FTS5 table. This must be
+** less than 32. If it is set to anything large than that, an #error
+** directive in fts5_index.c will cause the build to fail.
+*/
+#define FTS5_MAX_PREFIX_INDEXES 31
+
+#define FTS5_DEFAULT_NEARDIST 10
+#define FTS5_DEFAULT_RANK "bm25"
+
+/* Name of rank and rowid columns */
+#define FTS5_RANK_NAME "rank"
+#define FTS5_ROWID_NAME "rowid"
+
+#ifdef SQLITE_DEBUG
+# define FTS5_CORRUPT sqlite3Fts5Corrupt()
+static int sqlite3Fts5Corrupt(void);
+#else
+# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
+#endif
+
+/*
+** The assert_nc() macro is similar to the assert() macro, except that it
+** is used for assert() conditions that are true only if it can be
+** guranteed that the database is not corrupt.
+*/
+#ifdef SQLITE_DEBUG
+extern int sqlite3_fts5_may_be_corrupt;
+# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
+#else
+# define assert_nc(x) assert(x)
+#endif
+
+/* Mark a function parameter as unused, to suppress nuisance compiler
+** warnings. */
+#ifndef UNUSED_PARAM
+# define UNUSED_PARAM(X) (void)(X)
+#endif
+
+#ifndef UNUSED_PARAM2
+# define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y)
+#endif
+
+typedef struct Fts5Global Fts5Global;
+typedef struct Fts5Colset Fts5Colset;
+
+/* If a NEAR() clump or phrase may only match a specific set of columns,
+** then an object of the following type is used to record the set of columns.
+** Each entry in the aiCol[] array is a column that may be matched.
+**
+** This object is used by fts5_expr.c and fts5_index.c.
+*/
+struct Fts5Colset {
+ int nCol;
+ int aiCol[1];
+};
+
+
+
+/**************************************************************************
+** Interface to code in fts5_config.c. fts5_config.c contains contains code
+** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
+*/
+
+typedef struct Fts5Config Fts5Config;
+
+/*
+** An instance of the following structure encodes all information that can
+** be gleaned from the CREATE VIRTUAL TABLE statement.
+**
+** And all information loaded from the %_config table.
+**
+** nAutomerge:
+** The minimum number of segments that an auto-merge operation should
+** attempt to merge together. A value of 1 sets the object to use the
+** compile time default. Zero disables auto-merge altogether.
+**
+** zContent:
+**
+** zContentRowid:
+** The value of the content_rowid= option, if one was specified. Or
+** the string "rowid" otherwise. This text is not quoted - if it is
+** used as part of an SQL statement it needs to be quoted appropriately.
+**
+** zContentExprlist:
+**
+** pzErrmsg:
+** This exists in order to allow the fts5_index.c module to return a
+** decent error message if it encounters a file-format version it does
+** not understand.
+**
+** bColumnsize:
+** True if the %_docsize table is created.
+**
+** bPrefixIndex:
+** This is only used for debugging. If set to false, any prefix indexes
+** are ignored. This value is configured using:
+**
+** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
+**
+*/
+struct Fts5Config {
+ sqlite3 *db; /* Database handle */
+ char *zDb; /* Database holding FTS index (e.g. "main") */
+ char *zName; /* Name of FTS index */
+ int nCol; /* Number of columns */
+ char **azCol; /* Column names */
+ u8 *abUnindexed; /* True for unindexed columns */
+ int nPrefix; /* Number of prefix indexes */
+ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
+ int eContent; /* An FTS5_CONTENT value */
+ char *zContent; /* content table */
+ char *zContentRowid; /* "content_rowid=" option value */
+ int bColumnsize; /* "columnsize=" option value (dflt==1) */
+ int eDetail; /* FTS5_DETAIL_XXX value */
+ char *zContentExprlist;
+ Fts5Tokenizer *pTok;
+ fts5_tokenizer *pTokApi;
+
+ /* Values loaded from the %_config table */
+ int iCookie; /* Incremented when %_config is modified */
+ int pgsz; /* Approximate page size used in %_data */
+ int nAutomerge; /* 'automerge' setting */
+ int nCrisisMerge; /* Maximum allowed segments per level */
+ int nHashSize; /* Bytes of memory for in-memory hash */
+ char *zRank; /* Name of rank function */
+ char *zRankArgs; /* Arguments to rank function */
+
+ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
+ char **pzErrmsg;
+
+#ifdef SQLITE_DEBUG
+ int bPrefixIndex; /* True to use prefix-indexes */
+#endif
+};
+
+/* Current expected value of %_config table 'version' field */
+#define FTS5_CURRENT_VERSION 4
+
+#define FTS5_CONTENT_NORMAL 0
+#define FTS5_CONTENT_NONE 1
+#define FTS5_CONTENT_EXTERNAL 2
+
+#define FTS5_DETAIL_FULL 0
+#define FTS5_DETAIL_NONE 1
+#define FTS5_DETAIL_COLUMNS 2
+
+
+
+static int sqlite3Fts5ConfigParse(
+ Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
+);
+static void sqlite3Fts5ConfigFree(Fts5Config*);
+
+static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
+
+static int sqlite3Fts5Tokenize(
+ Fts5Config *pConfig, /* FTS5 Configuration object */
+ int flags, /* FTS5_TOKENIZE_* flags */
+ const char *pText, int nText, /* Text to tokenize */
+ void *pCtx, /* Context passed to xToken() */
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
+);
+
+static void sqlite3Fts5Dequote(char *z);
+
+/* Load the contents of the %_config table */
+static int sqlite3Fts5ConfigLoad(Fts5Config*, int);
+
+/* Set the value of a single config attribute */
+static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
+
+static int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
+
+/*
+** End of interface to code in fts5_config.c.
+**************************************************************************/
+
+/**************************************************************************
+** Interface to code in fts5_buffer.c.
+*/
+
+/*
+** Buffer object for the incremental building of string data.
+*/
+typedef struct Fts5Buffer Fts5Buffer;
+struct Fts5Buffer {
+ u8 *p;
+ int n;
+ int nSpace;
+};
+
+static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32);
+static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
+static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*);
+static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
+static void sqlite3Fts5BufferFree(Fts5Buffer*);
+static void sqlite3Fts5BufferZero(Fts5Buffer*);
+static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
+static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
+
+static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
+
+#define fts5BufferZero(x) sqlite3Fts5BufferZero(x)
+#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c)
+#define fts5BufferFree(a) sqlite3Fts5BufferFree(a)
+#define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
+#define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
+
+#define fts5BufferGrow(pRc,pBuf,nn) ( \
+ (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \
+ sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \
+)
+
+/* Write and decode big-endian 32-bit integer values */
+static void sqlite3Fts5Put32(u8*, int);
+static int sqlite3Fts5Get32(const u8*);
+
+#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
+#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)
+
+typedef struct Fts5PoslistReader Fts5PoslistReader;
+struct Fts5PoslistReader {
+ /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
+ const u8 *a; /* Position list to iterate through */
+ int n; /* Size of buffer at a[] in bytes */
+ int i; /* Current offset in a[] */
+
+ u8 bFlag; /* For client use (any custom purpose) */
+
+ /* Output variables */
+ u8 bEof; /* Set to true at EOF */
+ i64 iPos; /* (iCol<<32) + iPos */
+};
+static int sqlite3Fts5PoslistReaderInit(
+ const u8 *a, int n, /* Poslist buffer to iterate through */
+ Fts5PoslistReader *pIter /* Iterator object to initialize */
+);
+static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
+
+typedef struct Fts5PoslistWriter Fts5PoslistWriter;
+struct Fts5PoslistWriter {
+ i64 iPrev;
+};
+static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
+static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64);
+
+static int sqlite3Fts5PoslistNext64(
+ const u8 *a, int n, /* Buffer containing poslist */
+ int *pi, /* IN/OUT: Offset within a[] */
+ i64 *piOff /* IN/OUT: Current offset */
+);
+
+/* Malloc utility */
+static void *sqlite3Fts5MallocZero(int *pRc, int nByte);
+static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
+
+/* Character set tests (like isspace(), isalpha() etc.) */
+static int sqlite3Fts5IsBareword(char t);
+
+
+/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
+typedef struct Fts5Termset Fts5Termset;
+static int sqlite3Fts5TermsetNew(Fts5Termset**);
+static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
+static void sqlite3Fts5TermsetFree(Fts5Termset*);
+
+/*
+** End of interface to code in fts5_buffer.c.
+**************************************************************************/
+
+/**************************************************************************
+** Interface to code in fts5_index.c. fts5_index.c contains contains code
+** to access the data stored in the %_data table.
+*/
+
+typedef struct Fts5Index Fts5Index;
+typedef struct Fts5IndexIter Fts5IndexIter;
+
+struct Fts5IndexIter {
+ i64 iRowid;
+ const u8 *pData;
+ int nData;
+ u8 bEof;
+};
+
+#define sqlite3Fts5IterEof(x) ((x)->bEof)
+
+/*
+** Values used as part of the flags argument passed to IndexQuery().
+*/
+#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
+#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
+#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
+#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
+
+/* The following are used internally by the fts5_index.c module. They are
+** defined here only to make it easier to avoid clashes with the flags
+** above. */
+#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
+#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
+
+/*
+** Create/destroy an Fts5Index object.
+*/
+static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
+static int sqlite3Fts5IndexClose(Fts5Index *p);
+
+/*
+** Return a simple checksum value based on the arguments.
+*/
+static u64 sqlite3Fts5IndexEntryCksum(
+ i64 iRowid,
+ int iCol,
+ int iPos,
+ int iIdx,
+ const char *pTerm,
+ int nTerm
+);
+
+/*
+** Argument p points to a buffer containing utf-8 text that is n bytes in
+** size. Return the number of bytes in the nChar character prefix of the
+** buffer, or 0 if there are less than nChar characters in total.
+*/
+static int sqlite3Fts5IndexCharlenToBytelen(
+ const char *p,
+ int nByte,
+ int nChar
+);
+
+/*
+** Open a new iterator to iterate though all rowids that match the
+** specified token or token prefix.
+*/
+static int sqlite3Fts5IndexQuery(
+ Fts5Index *p, /* FTS index to query */
+ const char *pToken, int nToken, /* Token (or prefix) to query for */
+ int flags, /* Mask of FTS5INDEX_QUERY_X flags */
+ Fts5Colset *pColset, /* Match these columns only */
+ Fts5IndexIter **ppIter /* OUT: New iterator object */
+);
+
+/*
+** The various operations on open token or token prefix iterators opened
+** using sqlite3Fts5IndexQuery().
+*/
+static int sqlite3Fts5IterNext(Fts5IndexIter*);
+static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
+
+/*
+** Close an iterator opened by sqlite3Fts5IndexQuery().
+*/
+static void sqlite3Fts5IterClose(Fts5IndexIter*);
+
+/*
+** This interface is used by the fts5vocab module.
+*/
+static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
+static int sqlite3Fts5IterNextScan(Fts5IndexIter*);
+
+
+/*
+** Insert or remove data to or from the index. Each time a document is
+** added to or removed from the index, this function is called one or more
+** times.
+**
+** For an insert, it must be called once for each token in the new document.
+** If the operation is a delete, it must be called (at least) once for each
+** unique token in the document with an iCol value less than zero. The iPos
+** argument is ignored for a delete.
+*/
+static int sqlite3Fts5IndexWrite(
+ Fts5Index *p, /* Index to write to */
+ int iCol, /* Column token appears in (-ve -> delete) */
+ int iPos, /* Position of token within column */
+ const char *pToken, int nToken /* Token to add or remove to or from index */
+);
+
+/*
+** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
+** document iDocid.
+*/
+static int sqlite3Fts5IndexBeginWrite(
+ Fts5Index *p, /* Index to write to */
+ int bDelete, /* True if current operation is a delete */
+ i64 iDocid /* Docid to add or remove data from */
+);
+
+/*
+** Flush any data stored in the in-memory hash tables to the database.
+** If the bCommit flag is true, also close any open blob handles.
+*/
+static int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit);
+
+/*
+** Discard any data stored in the in-memory hash tables. Do not write it
+** to the database. Additionally, assume that the contents of the %_data
+** table may have changed on disk. So any in-memory caches of %_data
+** records must be invalidated.
+*/
+static int sqlite3Fts5IndexRollback(Fts5Index *p);
+
+/*
+** Get or set the "averages" values.
+*/
+static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
+static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
+
+/*
+** Functions called by the storage module as part of integrity-check.
+*/
+static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);
+
+/*
+** Called during virtual module initialization to register UDF
+** fts5_decode() with SQLite
+*/
+static int sqlite3Fts5IndexInit(sqlite3*);
+
+static int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
+
+/*
+** Return the total number of entries read from the %_data table by
+** this connection since it was created.
+*/
+static int sqlite3Fts5IndexReads(Fts5Index *p);
+
+static int sqlite3Fts5IndexReinit(Fts5Index *p);
+static int sqlite3Fts5IndexOptimize(Fts5Index *p);
+static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
+
+static int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
+
+/*
+** End of interface to code in fts5_index.c.
+**************************************************************************/
+
+/**************************************************************************
+** Interface to code in fts5_varint.c.
+*/
+static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
+static int sqlite3Fts5GetVarintLen(u32 iVal);
+static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
+static int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
+
+#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
+#define fts5GetVarint sqlite3Fts5GetVarint
+
+#define fts5FastGetVarint32(a, iOff, nVal) { \
+ nVal = (a)[iOff++]; \
+ if( nVal & 0x80 ){ \
+ iOff--; \
+ iOff += fts5GetVarint32(&(a)[iOff], nVal); \
+ } \
+}
+
+
+/*
+** End of interface to code in fts5_varint.c.
+**************************************************************************/
+
+
+/**************************************************************************
+** Interface to code in fts5.c.
+*/
+
+static int sqlite3Fts5GetTokenizer(
+ Fts5Global*,
+ const char **azArg,
+ int nArg,
+ Fts5Tokenizer**,
+ fts5_tokenizer**,
+ char **pzErr
+);
+
+static Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, Fts5Config **);
+
+/*
+** End of interface to code in fts5.c.
+**************************************************************************/
+
+/**************************************************************************
+** Interface to code in fts5_hash.c.
+*/
+typedef struct Fts5Hash Fts5Hash;
+
+/*
+** Create a hash table, free a hash table.
+*/
+static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
+static void sqlite3Fts5HashFree(Fts5Hash*);
+
+static int sqlite3Fts5HashWrite(
+ Fts5Hash*,
+ i64 iRowid, /* Rowid for this entry */
+ int iCol, /* Column token appears in (-ve -> delete) */
+ int iPos, /* Position of token within column */
+ char bByte,
+ const char *pToken, int nToken /* Token to add or remove to or from index */
+);
+
+/*
+** Empty (but do not delete) a hash table.
+*/
+static void sqlite3Fts5HashClear(Fts5Hash*);
+
+static int sqlite3Fts5HashQuery(
+ Fts5Hash*, /* Hash table to query */
+ const char *pTerm, int nTerm, /* Query term */
+ const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
+ int *pnDoclist /* OUT: Size of doclist in bytes */
+);
+
+static int sqlite3Fts5HashScanInit(
+ Fts5Hash*, /* Hash table to query */
+ const char *pTerm, int nTerm /* Query prefix */
+);
+static void sqlite3Fts5HashScanNext(Fts5Hash*);
+static int sqlite3Fts5HashScanEof(Fts5Hash*);
+static void sqlite3Fts5HashScanEntry(Fts5Hash *,
+ const char **pzTerm, /* OUT: term (nul-terminated) */
+ const u8 **ppDoclist, /* OUT: pointer to doclist */
+ int *pnDoclist /* OUT: size of doclist in bytes */
+);
+
+
+/*
+** End of interface to code in fts5_hash.c.
+**************************************************************************/
+
+/**************************************************************************
+** Interface to code in fts5_storage.c. fts5_storage.c contains contains
+** code to access the data stored in the %_content and %_docsize tables.
+*/
+
+#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
+#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
+#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
+
+typedef struct Fts5Storage Fts5Storage;
+
+static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
+static int sqlite3Fts5StorageClose(Fts5Storage *p);
+static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
+
+static int sqlite3Fts5DropAll(Fts5Config*);
+static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
+
+static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**);
+static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*);
+static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
+
+static int sqlite3Fts5StorageIntegrity(Fts5Storage *p);
+
+static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
+static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
+
+static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
+static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
+static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
+
+static int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit);
+static int sqlite3Fts5StorageRollback(Fts5Storage *p);
+
+static int sqlite3Fts5StorageConfigValue(
+ Fts5Storage *p, const char*, sqlite3_value*, int
+);
+
+static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
+static int sqlite3Fts5StorageRebuild(Fts5Storage *p);
+static int sqlite3Fts5StorageOptimize(Fts5Storage *p);
+static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
+
+/*
+** End of interface to code in fts5_storage.c.
+**************************************************************************/
+
+
+/**************************************************************************
+** Interface to code in fts5_expr.c.
+*/
+typedef struct Fts5Expr Fts5Expr;
+typedef struct Fts5ExprNode Fts5ExprNode;
+typedef struct Fts5Parse Fts5Parse;
+typedef struct Fts5Token Fts5Token;
+typedef struct Fts5ExprPhrase Fts5ExprPhrase;
+typedef struct Fts5ExprNearset Fts5ExprNearset;
+
+struct Fts5Token {
+ const char *p; /* Token text (not NULL terminated) */
+ int n; /* Size of buffer p in bytes */
+};
+
+/* Parse a MATCH expression. */
+static int sqlite3Fts5ExprNew(
+ Fts5Config *pConfig,
+ const char *zExpr,
+ Fts5Expr **ppNew,
+ char **pzErr
+);
+
+/*
+** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
+** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
+** rc = sqlite3Fts5ExprNext(pExpr)
+** ){
+** // The document with rowid iRowid matches the expression!
+** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
+** }
+*/
+static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
+static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
+static int sqlite3Fts5ExprEof(Fts5Expr*);
+static i64 sqlite3Fts5ExprRowid(Fts5Expr*);
+
+static void sqlite3Fts5ExprFree(Fts5Expr*);
+
+/* Called during startup to register a UDF with SQLite */
+static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
+
+static int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
+static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
+static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
+
+typedef struct Fts5PoslistPopulator Fts5PoslistPopulator;
+static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int);
+static int sqlite3Fts5ExprPopulatePoslists(
+ Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int
+);
+static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64);
+static void sqlite3Fts5ExprClearEof(Fts5Expr*);
+
+static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
+
+static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
+
+/*******************************************
+** The fts5_expr.c API above this point is used by the other hand-written
+** C code in this module. The interfaces below this point are called by
+** the parser code in fts5parse.y. */
+
+static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
+
+static Fts5ExprNode *sqlite3Fts5ParseNode(
+ Fts5Parse *pParse,
+ int eType,
+ Fts5ExprNode *pLeft,
+ Fts5ExprNode *pRight,
+ Fts5ExprNearset *pNear
+);
+
+static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
+ Fts5Parse *pParse,
+ Fts5ExprPhrase *pPhrase,
+ Fts5Token *pToken,
+ int bPrefix
+);
+
+static Fts5ExprNearset *sqlite3Fts5ParseNearset(
+ Fts5Parse*,
+ Fts5ExprNearset*,
+ Fts5ExprPhrase*
+);
+
+static Fts5Colset *sqlite3Fts5ParseColset(
+ Fts5Parse*,
+ Fts5Colset*,
+ Fts5Token *
+);
+
+static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
+static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
+static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
+
+static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
+static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5Colset*);
+static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
+static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
+
+/*
+** End of interface to code in fts5_expr.c.
+**************************************************************************/
+
+
+
+/**************************************************************************
+** Interface to code in fts5_aux.c.
+*/
+
+static int sqlite3Fts5AuxInit(fts5_api*);
+/*
+** End of interface to code in fts5_aux.c.
+**************************************************************************/
+
+/**************************************************************************
+** Interface to code in fts5_tokenizer.c.
+*/
+
+static int sqlite3Fts5TokenizerInit(fts5_api*);
+/*
+** End of interface to code in fts5_tokenizer.c.
+**************************************************************************/
+
+/**************************************************************************
+** Interface to code in fts5_vocab.c.
+*/
+
+static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
+
+/*
+** End of interface to code in fts5_vocab.c.
+**************************************************************************/
+
+
+/**************************************************************************
+** Interface to automatically generated code in fts5_unicode2.c.
+*/
+static int sqlite3Fts5UnicodeIsalnum(int c);
+static int sqlite3Fts5UnicodeIsdiacritic(int c);
+static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
+/*
+** End of interface to code in fts5_unicode2.c.
+**************************************************************************/
+
+#endif
+
+#line 1 "fts5parse.h"
+#define FTS5_OR 1
+#define FTS5_AND 2
+#define FTS5_NOT 3
+#define FTS5_TERM 4
+#define FTS5_COLON 5
+#define FTS5_LP 6
+#define FTS5_RP 7
+#define FTS5_LCP 8
+#define FTS5_RCP 9
+#define FTS5_STRING 10
+#define FTS5_COMMA 11
+#define FTS5_PLUS 12
+#define FTS5_STAR 13
+
+#line 1 "fts5parse.c"
+/*
+** 2000-05-29
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+*************************************************************************
+** Driver template for the LEMON parser generator.
+**
+** The "lemon" program processes an LALR(1) input grammar file, then uses
+** this template to construct a parser. The "lemon" program inserts text
+** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the
+** interstitial "-" characters) contained in this template is changed into
+** the value of the %name directive from the grammar. Otherwise, the content
+** of this template is copied straight through into the generate parser
+** source file.
+**
+** The following is the concatenation of all %include directives from the
+** input grammar file:
+*/
+#include <stdio.h>
+/************ Begin %include sections from the grammar ************************/
+#line 47 "fts5parse.y"
+
+/* #include "fts5Int.h" */
+/* #include "fts5parse.h" */
+
+/*
+** Disable all error recovery processing in the parser push-down
+** automaton.
+*/
+#define fts5YYNOERRORRECOVERY 1
+
+/*
+** Make fts5yytestcase() the same as testcase()
+*/
+#define fts5yytestcase(X) testcase(X)
+
+/*
+** Indicate that sqlite3ParserFree() will never be called with a null
+** pointer.
+*/
+#define fts5YYPARSEFREENOTNULL 1
+
+/*
+** Alternative datatype for the argument to the malloc() routine passed
+** into sqlite3ParserAlloc(). The default is size_t.
+*/
+#define fts5YYMALLOCARGTYPE u64
+
+#line 56 "fts5parse.c"
+/**************** End of %include directives **********************************/
+/* These constants specify the various numeric values for terminal symbols
+** in a format understandable to "makeheaders". This section is blank unless
+** "lemon" is run with the "-m" command-line option.
+***************** Begin makeheaders token definitions *************************/
+/**************** End makeheaders token definitions ***************************/
+
+/* The next sections is a series of control #defines.
+** various aspects of the generated parser.
+** fts5YYCODETYPE is the data type used to store the integer codes
+** that represent terminal and non-terminal symbols.
+** "unsigned char" is used if there are fewer than
+** 256 symbols. Larger types otherwise.
+** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for
+** any terminal or nonterminal symbol.
+** fts5YYFALLBACK If defined, this indicates that one or more tokens
+** (also known as: "terminal symbols") have fall-back
+** values which should be used if the original symbol
+** would not parse. This permits keywords to sometimes
+** be used as identifiers, for example.
+** fts5YYACTIONTYPE is the data type used for "action codes" - numbers
+** that indicate what to do in response to the next
+** token.
+** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal
+** symbols. Background: A "minor type" is a semantic
+** value associated with a terminal or non-terminal
+** symbols. For example, for an "ID" terminal symbol,
+** the minor type might be the name of the identifier.
+** Each non-terminal can have a different minor type.
+** Terminal symbols all have the same minor type, though.
+** This macros defines the minor type for terminal
+** symbols.
+** fts5YYMINORTYPE is the data type used for all minor types.
+** This is typically a union of many types, one of
+** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union
+** for terminal symbols is called "fts5yy0".
+** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If
+** zero the stack is dynamically sized using realloc()
+** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument
+** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument
+** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser
+** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser
+** fts5YYERRORSYMBOL is the code number of the error symbol. If not
+** defined, then do no error processing.
+** fts5YYNSTATE the combined number of states.
+** fts5YYNRULE the number of rules in the grammar
+** fts5YY_MAX_SHIFT Maximum value for shift actions
+** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
+** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
+** fts5YY_MIN_REDUCE Maximum value for reduce actions
+** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error
+** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept
+** fts5YY_NO_ACTION The fts5yy_action[] code for no-op
+*/
+#ifndef INTERFACE
+# define INTERFACE 1
+#endif
+/************* Begin control #defines *****************************************/
+#define fts5YYCODETYPE unsigned char
+#define fts5YYNOCODE 27
+#define fts5YYACTIONTYPE unsigned char
+#define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token
+typedef union {
+ int fts5yyinit;
+ sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0;
+ Fts5Colset* fts5yy3;
+ Fts5ExprPhrase* fts5yy11;
+ Fts5ExprNode* fts5yy18;
+ int fts5yy20;
+ Fts5ExprNearset* fts5yy26;
+} fts5YYMINORTYPE;
+#ifndef fts5YYSTACKDEPTH
+#define fts5YYSTACKDEPTH 100
+#endif
+#define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse;
+#define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse
+#define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse = fts5yypParser->pParse
+#define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse = pParse
+#define fts5YYNSTATE 26
+#define fts5YYNRULE 24
+#define fts5YY_MAX_SHIFT 25
+#define fts5YY_MIN_SHIFTREDUCE 40
+#define fts5YY_MAX_SHIFTREDUCE 63
+#define fts5YY_MIN_REDUCE 64
+#define fts5YY_MAX_REDUCE 87
+#define fts5YY_ERROR_ACTION 88
+#define fts5YY_ACCEPT_ACTION 89
+#define fts5YY_NO_ACTION 90
+/************* End control #defines *******************************************/
+
+/* Define the fts5yytestcase() macro to be a no-op if is not already defined
+** otherwise.
+**
+** Applications can choose to define fts5yytestcase() in the %include section
+** to a macro that can assist in verifying code coverage. For production
+** code the fts5yytestcase() macro should be turned off. But it is useful
+** for testing.
+*/
+#ifndef fts5yytestcase
+# define fts5yytestcase(X)
+#endif
+
+
+/* Next are the tables used to determine what action to take based on the
+** current state and lookahead token. These tables are used to implement
+** functions that take a state number and lookahead value and return an
+** action integer.
+**
+** Suppose the action integer is N. Then the action is determined as
+** follows
+**
+** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead
+** token onto the stack and goto state N.
+**
+** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
+** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE.
+**
+** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE
+** and fts5YY_MAX_REDUCE
+
+** N == fts5YY_ERROR_ACTION A syntax error has occurred.
+**
+** N == fts5YY_ACCEPT_ACTION The parser accepts its input.
+**
+** N == fts5YY_NO_ACTION No such action. Denotes unused
+** slots in the fts5yy_action[] table.
+**
+** The action table is constructed as a single large table named fts5yy_action[].
+** Given state S and lookahead X, the action is computed as
+**
+** fts5yy_action[ fts5yy_shift_ofst[S] + X ]
+**
+** If the index value fts5yy_shift_ofst[S]+X is out of range or if the value
+** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X or if fts5yy_shift_ofst[S]
+** is equal to fts5YY_SHIFT_USE_DFLT, it means that the action is not in the table
+** and that fts5yy_default[S] should be used instead.
+**
+** The formula above is for computing the action when the lookahead is
+** a terminal symbol. If the lookahead is a non-terminal (as occurs after
+** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of
+** the fts5yy_shift_ofst[] array and fts5YY_REDUCE_USE_DFLT is used in place of
+** fts5YY_SHIFT_USE_DFLT.
+**
+** The following are the tables generated in this section:
+**
+** fts5yy_action[] A single table containing all actions.
+** fts5yy_lookahead[] A table containing the lookahead for each entry in
+** fts5yy_action. Used to detect hash collisions.
+** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for
+** shifting terminals.
+** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for
+** shifting non-terminals after a reduce.
+** fts5yy_default[] Default action for each state.
+**
+*********** Begin parsing tables **********************************************/
+#define fts5YY_ACTTAB_COUNT (78)
+static const fts5YYACTIONTYPE fts5yy_action[] = {
+ /* 0 */ 89, 15, 46, 5, 48, 24, 12, 19, 23, 14,
+ /* 10 */ 46, 5, 48, 24, 20, 21, 23, 43, 46, 5,
+ /* 20 */ 48, 24, 6, 18, 23, 17, 46, 5, 48, 24,
+ /* 30 */ 75, 7, 23, 25, 46, 5, 48, 24, 62, 47,
+ /* 40 */ 23, 48, 24, 7, 11, 23, 9, 3, 4, 2,
+ /* 50 */ 62, 50, 52, 44, 64, 3, 4, 2, 49, 4,
+ /* 60 */ 2, 1, 23, 11, 16, 9, 12, 2, 10, 61,
+ /* 70 */ 53, 59, 62, 60, 22, 13, 55, 8,
+};
+static const fts5YYCODETYPE fts5yy_lookahead[] = {
+ /* 0 */ 15, 16, 17, 18, 19, 20, 10, 11, 23, 16,
+ /* 10 */ 17, 18, 19, 20, 23, 24, 23, 16, 17, 18,
+ /* 20 */ 19, 20, 22, 23, 23, 16, 17, 18, 19, 20,
+ /* 30 */ 5, 6, 23, 16, 17, 18, 19, 20, 13, 17,
+ /* 40 */ 23, 19, 20, 6, 8, 23, 10, 1, 2, 3,
+ /* 50 */ 13, 9, 10, 7, 0, 1, 2, 3, 19, 2,
+ /* 60 */ 3, 6, 23, 8, 21, 10, 10, 3, 10, 25,
+ /* 70 */ 10, 10, 13, 25, 12, 10, 7, 5,
+};
+#define fts5YY_SHIFT_USE_DFLT (-5)
+#define fts5YY_SHIFT_COUNT (25)
+#define fts5YY_SHIFT_MIN (-4)
+#define fts5YY_SHIFT_MAX (72)
+static const signed char fts5yy_shift_ofst[] = {
+ /* 0 */ 55, 55, 55, 55, 55, 36, -4, 56, 58, 25,
+ /* 10 */ 37, 60, 59, 59, 46, 54, 42, 57, 62, 61,
+ /* 20 */ 62, 69, 65, 62, 72, 64,
+};
+#define fts5YY_REDUCE_USE_DFLT (-16)
+#define fts5YY_REDUCE_COUNT (13)
+#define fts5YY_REDUCE_MIN (-15)
+#define fts5YY_REDUCE_MAX (48)
+static const signed char fts5yy_reduce_ofst[] = {
+ /* 0 */ -15, -7, 1, 9, 17, 22, -9, 0, 39, 44,
+ /* 10 */ 44, 43, 44, 48,
+};
+static const fts5YYACTIONTYPE fts5yy_default[] = {
+ /* 0 */ 88, 88, 88, 88, 88, 69, 82, 88, 88, 87,
+ /* 10 */ 87, 88, 87, 87, 88, 88, 88, 66, 80, 88,
+ /* 20 */ 81, 88, 88, 78, 88, 65,
+};
+/********** End of lemon-generated parsing tables *****************************/
+
+/* The next table maps tokens (terminal symbols) into fallback tokens.
+** If a construct like the following:
+**
+** %fallback ID X Y Z.
+**
+** appears in the grammar, then ID becomes a fallback token for X, Y,
+** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
+** but it does not parse, the type of the token is changed to ID and
+** the parse is retried before an error is thrown.
+**
+** This feature can be used, for example, to cause some keywords in a language
+** to revert to identifiers if they keyword does not apply in the context where
+** it appears.
+*/
+#ifdef fts5YYFALLBACK
+static const fts5YYCODETYPE fts5yyFallback[] = {
+};
+#endif /* fts5YYFALLBACK */
+
+/* The following structure represents a single element of the
+** parser's stack. Information stored includes:
+**
+** + The state number for the parser at this level of the stack.
+**
+** + The value of the token stored at this level of the stack.
+** (In other words, the "major" token.)
+**
+** + The semantic value stored at this level of the stack. This is
+** the information used by the action routines in the grammar.
+** It is sometimes called the "minor" token.
+**
+** After the "shift" half of a SHIFTREDUCE action, the stateno field
+** actually contains the reduce action for the second half of the
+** SHIFTREDUCE.
+*/
+struct fts5yyStackEntry {
+ fts5YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */
+ fts5YYCODETYPE major; /* The major token value. This is the code
+ ** number for the token at this stack level */
+ fts5YYMINORTYPE minor; /* The user-supplied minor token value. This
+ ** is the value of the token */
+};
+typedef struct fts5yyStackEntry fts5yyStackEntry;
+
+/* The state of the parser is completely contained in an instance of
+** the following structure */
+struct fts5yyParser {
+ int fts5yyidx; /* Index of top element in stack */
+#ifdef fts5YYTRACKMAXSTACKDEPTH
+ int fts5yyidxMax; /* Maximum value of fts5yyidx */
+#endif
+#ifndef fts5YYNOERRORRECOVERY
+ int fts5yyerrcnt; /* Shifts left before out of the error */
+#endif
+ sqlite3Fts5ParserARG_SDECL /* A place to hold %extra_argument */
+#if fts5YYSTACKDEPTH<=0
+ int fts5yystksz; /* Current side of the stack */
+ fts5yyStackEntry *fts5yystack; /* The parser's stack */
+#else
+ fts5yyStackEntry fts5yystack[fts5YYSTACKDEPTH]; /* The parser's stack */
+#endif
+};
+typedef struct fts5yyParser fts5yyParser;
+
+#ifndef NDEBUG
+#include <stdio.h>
+static FILE *fts5yyTraceFILE = 0;
+static char *fts5yyTracePrompt = 0;
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/*
+** Turn parser tracing on by giving a stream to which to write the trace
+** and a prompt to preface each trace message. Tracing is turned off
+** by making either argument NULL
+**
+** Inputs:
+** <ul>
+** <li> A FILE* to which trace output should be written.
+** If NULL, then tracing is turned off.
+** <li> A prefix string written at the beginning of every
+** line of trace output. If NULL, then tracing is
+** turned off.
+** </ul>
+**
+** Outputs:
+** None.
+*/
+static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){
+ fts5yyTraceFILE = TraceFILE;
+ fts5yyTracePrompt = zTracePrompt;
+ if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0;
+ else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0;
+}
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/* For tracing shifts, the names of all terminals and nonterminals
+** are required. The following table supplies these names */
+static const char *const fts5yyTokenName[] = {
+ "$", "OR", "AND", "NOT",
+ "TERM", "COLON", "LP", "RP",
+ "LCP", "RCP", "STRING", "COMMA",
+ "PLUS", "STAR", "error", "input",
+ "expr", "cnearset", "exprlist", "nearset",
+ "colset", "colsetlist", "nearphrases", "phrase",
+ "neardist_opt", "star_opt",
+};
+#endif /* NDEBUG */
+
+#ifndef NDEBUG
+/* For tracing reduce actions, the names of all rules are required.
+*/
+static const char *const fts5yyRuleName[] = {
+ /* 0 */ "input ::= expr",
+ /* 1 */ "expr ::= expr AND expr",
+ /* 2 */ "expr ::= expr OR expr",
+ /* 3 */ "expr ::= expr NOT expr",
+ /* 4 */ "expr ::= LP expr RP",
+ /* 5 */ "expr ::= exprlist",
+ /* 6 */ "exprlist ::= cnearset",
+ /* 7 */ "exprlist ::= exprlist cnearset",
+ /* 8 */ "cnearset ::= nearset",
+ /* 9 */ "cnearset ::= colset COLON nearset",
+ /* 10 */ "colset ::= LCP colsetlist RCP",
+ /* 11 */ "colset ::= STRING",
+ /* 12 */ "colsetlist ::= colsetlist STRING",
+ /* 13 */ "colsetlist ::= STRING",
+ /* 14 */ "nearset ::= phrase",
+ /* 15 */ "nearset ::= STRING LP nearphrases neardist_opt RP",
+ /* 16 */ "nearphrases ::= phrase",
+ /* 17 */ "nearphrases ::= nearphrases phrase",
+ /* 18 */ "neardist_opt ::=",
+ /* 19 */ "neardist_opt ::= COMMA STRING",
+ /* 20 */ "phrase ::= phrase PLUS STRING star_opt",
+ /* 21 */ "phrase ::= STRING star_opt",
+ /* 22 */ "star_opt ::= STAR",
+ /* 23 */ "star_opt ::=",
+};
+#endif /* NDEBUG */
+
+
+#if fts5YYSTACKDEPTH<=0
+/*
+** Try to increase the size of the parser stack.
+*/
+static void fts5yyGrowStack(fts5yyParser *p){
+ int newSize;
+ fts5yyStackEntry *pNew;
+
+ newSize = p->fts5yystksz*2 + 100;
+ pNew = realloc(p->fts5yystack, newSize*sizeof(pNew[0]));
+ if( pNew ){
+ p->fts5yystack = pNew;
+ p->fts5yystksz = newSize;
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sStack grows to %d entries!\n",
+ fts5yyTracePrompt, p->fts5yystksz);
+ }
+#endif
+ }
+}
+#endif
+
+/* Datatype of the argument to the memory allocated passed as the
+** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by
+** putting an appropriate #define in the %include section of the input
+** grammar.
+*/
+#ifndef fts5YYMALLOCARGTYPE
+# define fts5YYMALLOCARGTYPE size_t
+#endif
+
+/*
+** This function allocates a new parser.
+** The only argument is a pointer to a function which works like
+** malloc.
+**
+** Inputs:
+** A pointer to the function used to allocate memory.
+**
+** Outputs:
+** A pointer to a parser. This pointer is used in subsequent calls
+** to sqlite3Fts5Parser and sqlite3Fts5ParserFree.
+*/
+static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE)){
+ fts5yyParser *pParser;
+ pParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyParser) );
+ if( pParser ){
+ pParser->fts5yyidx = -1;
+#ifdef fts5YYTRACKMAXSTACKDEPTH
+ pParser->fts5yyidxMax = 0;
+#endif
+#if fts5YYSTACKDEPTH<=0
+ pParser->fts5yystack = NULL;
+ pParser->fts5yystksz = 0;
+ fts5yyGrowStack(pParser);
+#endif
+ }
+ return pParser;
+}
+
+/* The following function deletes the "minor type" or semantic value
+** associated with a symbol. The symbol can be either a terminal
+** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is
+** a pointer to the value to be deleted. The code used to do the
+** deletions is derived from the %destructor and/or %token_destructor
+** directives of the input grammar.
+*/
+static void fts5yy_destructor(
+ fts5yyParser *fts5yypParser, /* The parser */
+ fts5YYCODETYPE fts5yymajor, /* Type code for object to destroy */
+ fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */
+){
+ sqlite3Fts5ParserARG_FETCH;
+ switch( fts5yymajor ){
+ /* Here is inserted the actions which take place when a
+ ** terminal or non-terminal is destroyed. This can happen
+ ** when the symbol is popped from the stack during a
+ ** reduce or during error processing or when a parser is
+ ** being destroyed before it is finished parsing.
+ **
+ ** Note: during a reduce, the only symbols destroyed are those
+ ** which appear on the RHS of the rule, but which are *not* used
+ ** inside the C code.
+ */
+/********* Begin destructor definitions ***************************************/
+ case 15: /* input */
+{
+#line 83 "fts5parse.y"
+ (void)pParse;
+#line 489 "fts5parse.c"
+}
+ break;
+ case 16: /* expr */
+ case 17: /* cnearset */
+ case 18: /* exprlist */
+{
+#line 89 "fts5parse.y"
+ sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy18));
+#line 498 "fts5parse.c"
+}
+ break;
+ case 19: /* nearset */
+ case 22: /* nearphrases */
+{
+#line 137 "fts5parse.y"
+ sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy26));
+#line 506 "fts5parse.c"
+}
+ break;
+ case 20: /* colset */
+ case 21: /* colsetlist */
+{
+#line 119 "fts5parse.y"
+ sqlite3_free((fts5yypminor->fts5yy3));
+#line 514 "fts5parse.c"
+}
+ break;
+ case 23: /* phrase */
+{
+#line 168 "fts5parse.y"
+ sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy11));
+#line 521 "fts5parse.c"
+}
+ break;
+/********* End destructor definitions *****************************************/
+ default: break; /* If no destructor action specified: do nothing */
+ }
+}
+
+/*
+** Pop the parser's stack once.
+**
+** If there is a destructor routine associated with the token which
+** is popped from the stack, then call it.
+*/
+static void fts5yy_pop_parser_stack(fts5yyParser *pParser){
+ fts5yyStackEntry *fts5yytos;
+ assert( pParser->fts5yyidx>=0 );
+ fts5yytos = &pParser->fts5yystack[pParser->fts5yyidx--];
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sPopping %s\n",
+ fts5yyTracePrompt,
+ fts5yyTokenName[fts5yytos->major]);
+ }
+#endif
+ fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
+}
+
+/*
+** Deallocate and destroy a parser. Destructors are called for
+** all stack elements before shutting the parser down.
+**
+** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it
+** is defined in a %include section of the input grammar) then it is
+** assumed that the input pointer is never NULL.
+*/
+static void sqlite3Fts5ParserFree(
+ void *p, /* The parser to be deleted */
+ void (*freeProc)(void*) /* Function used to reclaim memory */
+){
+ fts5yyParser *pParser = (fts5yyParser*)p;
+#ifndef fts5YYPARSEFREENEVERNULL
+ if( pParser==0 ) return;
+#endif
+ while( pParser->fts5yyidx>=0 ) fts5yy_pop_parser_stack(pParser);
+#if fts5YYSTACKDEPTH<=0
+ free(pParser->fts5yystack);
+#endif
+ (*freeProc)((void*)pParser);
+}
+
+/*
+** Return the peak depth of the stack for a parser.
+*/
+#ifdef fts5YYTRACKMAXSTACKDEPTH
+static int sqlite3Fts5ParserStackPeak(void *p){
+ fts5yyParser *pParser = (fts5yyParser*)p;
+ return pParser->fts5yyidxMax;
+}
+#endif
+
+/*
+** Find the appropriate action for a parser given the terminal
+** look-ahead token iLookAhead.
+*/
+static int fts5yy_find_shift_action(
+ fts5yyParser *pParser, /* The parser */
+ fts5YYCODETYPE iLookAhead /* The look-ahead token */
+){
+ int i;
+ int stateno = pParser->fts5yystack[pParser->fts5yyidx].stateno;
+
+ if( stateno>=fts5YY_MIN_REDUCE ) return stateno;
+ assert( stateno <= fts5YY_SHIFT_COUNT );
+ do{
+ i = fts5yy_shift_ofst[stateno];
+ if( i==fts5YY_SHIFT_USE_DFLT ) return fts5yy_default[stateno];
+ assert( iLookAhead!=fts5YYNOCODE );
+ i += iLookAhead;
+ if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){
+ if( iLookAhead>0 ){
+#ifdef fts5YYFALLBACK
+ fts5YYCODETYPE iFallback; /* Fallback token */
+ if( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])
+ && (iFallback = fts5yyFallback[iLookAhead])!=0 ){
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n",
+ fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]);
+ }
+#endif
+ assert( fts5yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
+ iLookAhead = iFallback;
+ continue;
+ }
+#endif
+#ifdef fts5YYWILDCARD
+ {
+ int j = i - iLookAhead + fts5YYWILDCARD;
+ if(
+#if fts5YY_SHIFT_MIN+fts5YYWILDCARD<0
+ j>=0 &&
+#endif
+#if fts5YY_SHIFT_MAX+fts5YYWILDCARD>=fts5YY_ACTTAB_COUNT
+ j<fts5YY_ACTTAB_COUNT &&
+#endif
+ fts5yy_lookahead[j]==fts5YYWILDCARD
+ ){
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n",
+ fts5yyTracePrompt, fts5yyTokenName[iLookAhead],
+ fts5yyTokenName[fts5YYWILDCARD]);
+ }
+#endif /* NDEBUG */
+ return fts5yy_action[j];
+ }
+ }
+#endif /* fts5YYWILDCARD */
+ }
+ return fts5yy_default[stateno];
+ }else{
+ return fts5yy_action[i];
+ }
+ }while(1);
+}
+
+/*
+** Find the appropriate action for a parser given the non-terminal
+** look-ahead token iLookAhead.
+*/
+static int fts5yy_find_reduce_action(
+ int stateno, /* Current state number */
+ fts5YYCODETYPE iLookAhead /* The look-ahead token */
+){
+ int i;
+#ifdef fts5YYERRORSYMBOL
+ if( stateno>fts5YY_REDUCE_COUNT ){
+ return fts5yy_default[stateno];
+ }
+#else
+ assert( stateno<=fts5YY_REDUCE_COUNT );
+#endif
+ i = fts5yy_reduce_ofst[stateno];
+ assert( i!=fts5YY_REDUCE_USE_DFLT );
+ assert( iLookAhead!=fts5YYNOCODE );
+ i += iLookAhead;
+#ifdef fts5YYERRORSYMBOL
+ if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){
+ return fts5yy_default[stateno];
+ }
+#else
+ assert( i>=0 && i<fts5YY_ACTTAB_COUNT );
+ assert( fts5yy_lookahead[i]==iLookAhead );
+#endif
+ return fts5yy_action[i];
+}
+
+/*
+** The following routine is called if the stack overflows.
+*/
+static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){
+ sqlite3Fts5ParserARG_FETCH;
+ fts5yypParser->fts5yyidx--;
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt);
+ }
+#endif
+ while( fts5yypParser->fts5yyidx>=0 ) fts5yy_pop_parser_stack(fts5yypParser);
+ /* Here code is inserted which will execute if the parser
+ ** stack every overflows */
+/******** Begin %stack_overflow code ******************************************/
+#line 36 "fts5parse.y"
+
+ sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow");
+#line 697 "fts5parse.c"
+/******** End %stack_overflow code ********************************************/
+ sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument var */
+}
+
+/*
+** Print tracing information for a SHIFT action
+*/
+#ifndef NDEBUG
+static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState){
+ if( fts5yyTraceFILE ){
+ if( fts5yyNewState<fts5YYNSTATE ){
+ fprintf(fts5yyTraceFILE,"%sShift '%s', go to state %d\n",
+ fts5yyTracePrompt,fts5yyTokenName[fts5yypParser->fts5yystack[fts5yypParser->fts5yyidx].major],
+ fts5yyNewState);
+ }else{
+ fprintf(fts5yyTraceFILE,"%sShift '%s'\n",
+ fts5yyTracePrompt,fts5yyTokenName[fts5yypParser->fts5yystack[fts5yypParser->fts5yyidx].major]);
+ }
+ }
+}
+#else
+# define fts5yyTraceShift(X,Y)
+#endif
+
+/*
+** Perform a shift action.
+*/
+static void fts5yy_shift(
+ fts5yyParser *fts5yypParser, /* The parser to be shifted */
+ int fts5yyNewState, /* The new state to shift in */
+ int fts5yyMajor, /* The major token to shift in */
+ sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor /* The minor token to shift in */
+){
+ fts5yyStackEntry *fts5yytos;
+ fts5yypParser->fts5yyidx++;
+#ifdef fts5YYTRACKMAXSTACKDEPTH
+ if( fts5yypParser->fts5yyidx>fts5yypParser->fts5yyidxMax ){
+ fts5yypParser->fts5yyidxMax = fts5yypParser->fts5yyidx;
+ }
+#endif
+#if fts5YYSTACKDEPTH>0
+ if( fts5yypParser->fts5yyidx>=fts5YYSTACKDEPTH ){
+ fts5yyStackOverflow(fts5yypParser);
+ return;
+ }
+#else
+ if( fts5yypParser->fts5yyidx>=fts5yypParser->fts5yystksz ){
+ fts5yyGrowStack(fts5yypParser);
+ if( fts5yypParser->fts5yyidx>=fts5yypParser->fts5yystksz ){
+ fts5yyStackOverflow(fts5yypParser);
+ return;
+ }
+ }
+#endif
+ fts5yytos = &fts5yypParser->fts5yystack[fts5yypParser->fts5yyidx];
+ fts5yytos->stateno = (fts5YYACTIONTYPE)fts5yyNewState;
+ fts5yytos->major = (fts5YYCODETYPE)fts5yyMajor;
+ fts5yytos->minor.fts5yy0 = fts5yyMinor;
+ fts5yyTraceShift(fts5yypParser, fts5yyNewState);
+}
+
+/* The following table contains information about every rule that
+** is used during the reduce.
+*/
+static const struct {
+ fts5YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */
+ unsigned char nrhs; /* Number of right-hand side symbols in the rule */
+} fts5yyRuleInfo[] = {
+ { 15, 1 },
+ { 16, 3 },
+ { 16, 3 },
+ { 16, 3 },
+ { 16, 3 },
+ { 16, 1 },
+ { 18, 1 },
+ { 18, 2 },
+ { 17, 1 },
+ { 17, 3 },
+ { 20, 3 },
+ { 20, 1 },
+ { 21, 2 },
+ { 21, 1 },
+ { 19, 1 },
+ { 19, 5 },
+ { 22, 1 },
+ { 22, 2 },
+ { 24, 0 },
+ { 24, 2 },
+ { 23, 4 },
+ { 23, 2 },
+ { 25, 1 },
+ { 25, 0 },
+};
+
+static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */
+
+/*
+** Perform a reduce action and the shift that must immediately
+** follow the reduce.
+*/
+static void fts5yy_reduce(
+ fts5yyParser *fts5yypParser, /* The parser */
+ int fts5yyruleno /* Number of the rule by which to reduce */
+){
+ int fts5yygoto; /* The next state */
+ int fts5yyact; /* The next action */
+ fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */
+ int fts5yysize; /* Amount to pop the stack */
+ sqlite3Fts5ParserARG_FETCH;
+ fts5yymsp = &fts5yypParser->fts5yystack[fts5yypParser->fts5yyidx];
+#ifndef NDEBUG
+ if( fts5yyTraceFILE && fts5yyruleno>=0
+ && fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) ){
+ fts5yysize = fts5yyRuleInfo[fts5yyruleno].nrhs;
+ fprintf(fts5yyTraceFILE, "%sReduce [%s], go to state %d.\n", fts5yyTracePrompt,
+ fts5yyRuleName[fts5yyruleno], fts5yymsp[-fts5yysize].stateno);
+ }
+#endif /* NDEBUG */
+
+ /* Check that the stack is large enough to grow by a single entry
+ ** if the RHS of the rule is empty. This ensures that there is room
+ ** enough on the stack to push the LHS value */
+ if( fts5yyRuleInfo[fts5yyruleno].nrhs==0 ){
+#ifdef fts5YYTRACKMAXSTACKDEPTH
+ if( fts5yypParser->fts5yyidx>fts5yypParser->fts5yyidxMax ){
+ fts5yypParser->fts5yyidxMax = fts5yypParser->fts5yyidx;
+ }
+#endif
+#if fts5YYSTACKDEPTH>0
+ if( fts5yypParser->fts5yyidx>=fts5YYSTACKDEPTH-1 ){
+ fts5yyStackOverflow(fts5yypParser);
+ return;
+ }
+#else
+ if( fts5yypParser->fts5yyidx>=fts5yypParser->fts5yystksz-1 ){
+ fts5yyGrowStack(fts5yypParser);
+ if( fts5yypParser->fts5yyidx>=fts5yypParser->fts5yystksz-1 ){
+ fts5yyStackOverflow(fts5yypParser);
+ return;
+ }
+ }
+#endif
+ }
+
+ switch( fts5yyruleno ){
+ /* Beginning here are the reduction cases. A typical example
+ ** follows:
+ ** case 0:
+ ** #line <lineno> <grammarfile>
+ ** { ... } // User supplied code
+ ** #line <lineno> <thisfile>
+ ** break;
+ */
+/********** Begin reduce actions **********************************************/
+ fts5YYMINORTYPE fts5yylhsminor;
+ case 0: /* input ::= expr */
+#line 82 "fts5parse.y"
+{ sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy18); }
+#line 856 "fts5parse.c"
+ break;
+ case 1: /* expr ::= expr AND expr */
+#line 92 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy18 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-2].minor.fts5yy18, fts5yymsp[0].minor.fts5yy18, 0);
+}
+#line 863 "fts5parse.c"
+ fts5yymsp[-2].minor.fts5yy18 = fts5yylhsminor.fts5yy18;
+ break;
+ case 2: /* expr ::= expr OR expr */
+#line 95 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy18 = sqlite3Fts5ParseNode(pParse, FTS5_OR, fts5yymsp[-2].minor.fts5yy18, fts5yymsp[0].minor.fts5yy18, 0);
+}
+#line 871 "fts5parse.c"
+ fts5yymsp[-2].minor.fts5yy18 = fts5yylhsminor.fts5yy18;
+ break;
+ case 3: /* expr ::= expr NOT expr */
+#line 98 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy18 = sqlite3Fts5ParseNode(pParse, FTS5_NOT, fts5yymsp[-2].minor.fts5yy18, fts5yymsp[0].minor.fts5yy18, 0);
+}
+#line 879 "fts5parse.c"
+ fts5yymsp[-2].minor.fts5yy18 = fts5yylhsminor.fts5yy18;
+ break;
+ case 4: /* expr ::= LP expr RP */
+#line 102 "fts5parse.y"
+{fts5yymsp[-2].minor.fts5yy18 = fts5yymsp[-1].minor.fts5yy18;}
+#line 885 "fts5parse.c"
+ break;
+ case 5: /* expr ::= exprlist */
+ case 6: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==6);
+#line 103 "fts5parse.y"
+{fts5yylhsminor.fts5yy18 = fts5yymsp[0].minor.fts5yy18;}
+#line 891 "fts5parse.c"
+ fts5yymsp[0].minor.fts5yy18 = fts5yylhsminor.fts5yy18;
+ break;
+ case 7: /* exprlist ::= exprlist cnearset */
+#line 106 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy18 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-1].minor.fts5yy18, fts5yymsp[0].minor.fts5yy18, 0);
+}
+#line 899 "fts5parse.c"
+ fts5yymsp[-1].minor.fts5yy18 = fts5yylhsminor.fts5yy18;
+ break;
+ case 8: /* cnearset ::= nearset */
+#line 110 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy18 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy26);
+}
+#line 907 "fts5parse.c"
+ fts5yymsp[0].minor.fts5yy18 = fts5yylhsminor.fts5yy18;
+ break;
+ case 9: /* cnearset ::= colset COLON nearset */
+#line 113 "fts5parse.y"
+{
+ sqlite3Fts5ParseSetColset(pParse, fts5yymsp[0].minor.fts5yy26, fts5yymsp[-2].minor.fts5yy3);
+ fts5yylhsminor.fts5yy18 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy26);
+}
+#line 916 "fts5parse.c"
+ fts5yymsp[-2].minor.fts5yy18 = fts5yylhsminor.fts5yy18;
+ break;
+ case 10: /* colset ::= LCP colsetlist RCP */
+#line 123 "fts5parse.y"
+{ fts5yymsp[-2].minor.fts5yy3 = fts5yymsp[-1].minor.fts5yy3; }
+#line 922 "fts5parse.c"
+ break;
+ case 11: /* colset ::= STRING */
+#line 124 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy3 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
+}
+#line 929 "fts5parse.c"
+ fts5yymsp[0].minor.fts5yy3 = fts5yylhsminor.fts5yy3;
+ break;
+ case 12: /* colsetlist ::= colsetlist STRING */
+#line 128 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy3 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy3, &fts5yymsp[0].minor.fts5yy0); }
+#line 936 "fts5parse.c"
+ fts5yymsp[-1].minor.fts5yy3 = fts5yylhsminor.fts5yy3;
+ break;
+ case 13: /* colsetlist ::= STRING */
+#line 130 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy3 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
+}
+#line 944 "fts5parse.c"
+ fts5yymsp[0].minor.fts5yy3 = fts5yylhsminor.fts5yy3;
+ break;
+ case 14: /* nearset ::= phrase */
+#line 140 "fts5parse.y"
+{ fts5yylhsminor.fts5yy26 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy11); }
+#line 950 "fts5parse.c"
+ fts5yymsp[0].minor.fts5yy26 = fts5yylhsminor.fts5yy26;
+ break;
+ case 15: /* nearset ::= STRING LP nearphrases neardist_opt RP */
+#line 141 "fts5parse.y"
+{
+ sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0);
+ sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy26, &fts5yymsp[-1].minor.fts5yy0);
+ fts5yylhsminor.fts5yy26 = fts5yymsp[-2].minor.fts5yy26;
+}
+#line 960 "fts5parse.c"
+ fts5yymsp[-4].minor.fts5yy26 = fts5yylhsminor.fts5yy26;
+ break;
+ case 16: /* nearphrases ::= phrase */
+#line 147 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy26 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy11);
+}
+#line 968 "fts5parse.c"
+ fts5yymsp[0].minor.fts5yy26 = fts5yylhsminor.fts5yy26;
+ break;
+ case 17: /* nearphrases ::= nearphrases phrase */
+#line 150 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy26 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy26, fts5yymsp[0].minor.fts5yy11);
+}
+#line 976 "fts5parse.c"
+ fts5yymsp[-1].minor.fts5yy26 = fts5yylhsminor.fts5yy26;
+ break;
+ case 18: /* neardist_opt ::= */
+#line 157 "fts5parse.y"
+{ fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; }
+#line 982 "fts5parse.c"
+ break;
+ case 19: /* neardist_opt ::= COMMA STRING */
+#line 158 "fts5parse.y"
+{ fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; }
+#line 987 "fts5parse.c"
+ break;
+ case 20: /* phrase ::= phrase PLUS STRING star_opt */
+#line 170 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy11, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy20);
+}
+#line 994 "fts5parse.c"
+ fts5yymsp[-3].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
+ break;
+ case 21: /* phrase ::= STRING star_opt */
+#line 173 "fts5parse.y"
+{
+ fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy20);
+}
+#line 1002 "fts5parse.c"
+ fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
+ break;
+ case 22: /* star_opt ::= STAR */
+#line 182 "fts5parse.y"
+{ fts5yymsp[0].minor.fts5yy20 = 1; }
+#line 1008 "fts5parse.c"
+ break;
+ case 23: /* star_opt ::= */
+#line 183 "fts5parse.y"
+{ fts5yymsp[1].minor.fts5yy20 = 0; }
+#line 1013 "fts5parse.c"
+ break;
+ default:
+ break;
+/********** End reduce actions ************************************************/
+ };
+ assert( fts5yyruleno>=0 && fts5yyruleno<sizeof(fts5yyRuleInfo)/sizeof(fts5yyRuleInfo[0]) );
+ fts5yygoto = fts5yyRuleInfo[fts5yyruleno].lhs;
+ fts5yysize = fts5yyRuleInfo[fts5yyruleno].nrhs;
+ fts5yyact = fts5yy_find_reduce_action(fts5yymsp[-fts5yysize].stateno,(fts5YYCODETYPE)fts5yygoto);
+ if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){
+ if( fts5yyact>fts5YY_MAX_SHIFT ) fts5yyact += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE;
+ fts5yypParser->fts5yyidx -= fts5yysize - 1;
+ fts5yymsp -= fts5yysize-1;
+ fts5yymsp->stateno = (fts5YYACTIONTYPE)fts5yyact;
+ fts5yymsp->major = (fts5YYCODETYPE)fts5yygoto;
+ fts5yyTraceShift(fts5yypParser, fts5yyact);
+ }else{
+ assert( fts5yyact == fts5YY_ACCEPT_ACTION );
+ fts5yypParser->fts5yyidx -= fts5yysize;
+ fts5yy_accept(fts5yypParser);
+ }
+}
+
+/*
+** The following code executes when the parse fails
+*/
+#ifndef fts5YYNOERRORRECOVERY
+static void fts5yy_parse_failed(
+ fts5yyParser *fts5yypParser /* The parser */
+){
+ sqlite3Fts5ParserARG_FETCH;
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt);
+ }
+#endif
+ while( fts5yypParser->fts5yyidx>=0 ) fts5yy_pop_parser_stack(fts5yypParser);
+ /* Here code is inserted which will be executed whenever the
+ ** parser fails */
+/************ Begin %parse_failure code ***************************************/
+/************ End %parse_failure code *****************************************/
+ sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */
+}
+#endif /* fts5YYNOERRORRECOVERY */
+
+/*
+** The following code executes when a syntax error first occurs.
+*/
+static void fts5yy_syntax_error(
+ fts5yyParser *fts5yypParser, /* The parser */
+ int fts5yymajor, /* The major type of the error token */
+ sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The minor type of the error token */
+){
+ sqlite3Fts5ParserARG_FETCH;
+#define FTS5TOKEN fts5yyminor
+/************ Begin %syntax_error code ****************************************/
+#line 30 "fts5parse.y"
+
+ UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */
+ sqlite3Fts5ParseError(
+ pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKEN.n,FTS5TOKEN.p
+ );
+#line 1076 "fts5parse.c"
+/************ End %syntax_error code ******************************************/
+ sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */
+}
+
+/*
+** The following is executed when the parser accepts
+*/
+static void fts5yy_accept(
+ fts5yyParser *fts5yypParser /* The parser */
+){
+ sqlite3Fts5ParserARG_FETCH;
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt);
+ }
+#endif
+ while( fts5yypParser->fts5yyidx>=0 ) fts5yy_pop_parser_stack(fts5yypParser);
+ /* Here code is inserted which will be executed whenever the
+ ** parser accepts */
+/*********** Begin %parse_accept code *****************************************/
+/*********** End %parse_accept code *******************************************/
+ sqlite3Fts5ParserARG_STORE; /* Suppress warning about unused %extra_argument variable */
+}
+
+/* The main parser program.
+** The first argument is a pointer to a structure obtained from
+** "sqlite3Fts5ParserAlloc" which describes the current state of the parser.
+** The second argument is the major token number. The third is
+** the minor token. The fourth optional argument is whatever the
+** user wants (and specified in the grammar) and is available for
+** use by the action routines.
+**
+** Inputs:
+** <ul>
+** <li> A pointer to the parser (an opaque structure.)
+** <li> The major token number.
+** <li> The minor token number.
+** <li> An option argument of a grammar-specified type.
+** </ul>
+**
+** Outputs:
+** None.
+*/
+static void sqlite3Fts5Parser(
+ void *fts5yyp, /* The parser */
+ int fts5yymajor, /* The major token code number */
+ sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The value for the token */
+ sqlite3Fts5ParserARG_PDECL /* Optional %extra_argument parameter */
+){
+ fts5YYMINORTYPE fts5yyminorunion;
+ int fts5yyact; /* The parser action. */
+#if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
+ int fts5yyendofinput; /* True if we are at the end of input */
+#endif
+#ifdef fts5YYERRORSYMBOL
+ int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */
+#endif
+ fts5yyParser *fts5yypParser; /* The parser */
+
+ /* (re)initialize the parser, if necessary */
+ fts5yypParser = (fts5yyParser*)fts5yyp;
+ if( fts5yypParser->fts5yyidx<0 ){
+#if fts5YYSTACKDEPTH<=0
+ if( fts5yypParser->fts5yystksz <=0 ){
+ fts5yyStackOverflow(fts5yypParser);
+ return;
+ }
+#endif
+ fts5yypParser->fts5yyidx = 0;
+#ifndef fts5YYNOERRORRECOVERY
+ fts5yypParser->fts5yyerrcnt = -1;
+#endif
+ fts5yypParser->fts5yystack[0].stateno = 0;
+ fts5yypParser->fts5yystack[0].major = 0;
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sInitialize. Empty stack. State 0\n",
+ fts5yyTracePrompt);
+ }
+#endif
+ }
+#if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
+ fts5yyendofinput = (fts5yymajor==0);
+#endif
+ sqlite3Fts5ParserARG_STORE;
+
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sInput '%s'\n",fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]);
+ }
+#endif
+
+ do{
+ fts5yyact = fts5yy_find_shift_action(fts5yypParser,(fts5YYCODETYPE)fts5yymajor);
+ if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){
+ if( fts5yyact > fts5YY_MAX_SHIFT ) fts5yyact += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE;
+ fts5yy_shift(fts5yypParser,fts5yyact,fts5yymajor,fts5yyminor);
+#ifndef fts5YYNOERRORRECOVERY
+ fts5yypParser->fts5yyerrcnt--;
+#endif
+ fts5yymajor = fts5YYNOCODE;
+ }else if( fts5yyact <= fts5YY_MAX_REDUCE ){
+ fts5yy_reduce(fts5yypParser,fts5yyact-fts5YY_MIN_REDUCE);
+ }else{
+ assert( fts5yyact == fts5YY_ERROR_ACTION );
+ fts5yyminorunion.fts5yy0 = fts5yyminor;
+#ifdef fts5YYERRORSYMBOL
+ int fts5yymx;
+#endif
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt);
+ }
+#endif
+#ifdef fts5YYERRORSYMBOL
+ /* A syntax error has occurred.
+ ** The response to an error depends upon whether or not the
+ ** grammar defines an error token "ERROR".
+ **
+ ** This is what we do if the grammar does define ERROR:
+ **
+ ** * Call the %syntax_error function.
+ **
+ ** * Begin popping the stack until we enter a state where
+ ** it is legal to shift the error symbol, then shift
+ ** the error symbol.
+ **
+ ** * Set the error count to three.
+ **
+ ** * Begin accepting and shifting new tokens. No new error
+ ** processing will occur until three tokens have been
+ ** shifted successfully.
+ **
+ */
+ if( fts5yypParser->fts5yyerrcnt<0 ){
+ fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor);
+ }
+ fts5yymx = fts5yypParser->fts5yystack[fts5yypParser->fts5yyidx].major;
+ if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n",
+ fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]);
+ }
+#endif
+ fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yyminorunion);
+ fts5yymajor = fts5YYNOCODE;
+ }else{
+ while(
+ fts5yypParser->fts5yyidx >= 0 &&
+ fts5yymx != fts5YYERRORSYMBOL &&
+ (fts5yyact = fts5yy_find_reduce_action(
+ fts5yypParser->fts5yystack[fts5yypParser->fts5yyidx].stateno,
+ fts5YYERRORSYMBOL)) >= fts5YY_MIN_REDUCE
+ ){
+ fts5yy_pop_parser_stack(fts5yypParser);
+ }
+ if( fts5yypParser->fts5yyidx < 0 || fts5yymajor==0 ){
+ fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
+ fts5yy_parse_failed(fts5yypParser);
+ fts5yymajor = fts5YYNOCODE;
+ }else if( fts5yymx!=fts5YYERRORSYMBOL ){
+ fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor);
+ }
+ }
+ fts5yypParser->fts5yyerrcnt = 3;
+ fts5yyerrorhit = 1;
+#elif defined(fts5YYNOERRORRECOVERY)
+ /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to
+ ** do any kind of error recovery. Instead, simply invoke the syntax
+ ** error routine and continue going as if nothing had happened.
+ **
+ ** Applications can set this macro (for example inside %include) if
+ ** they intend to abandon the parse upon the first syntax error seen.
+ */
+ fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
+ fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
+ fts5yymajor = fts5YYNOCODE;
+
+#else /* fts5YYERRORSYMBOL is not defined */
+ /* This is what we do if the grammar does not define ERROR:
+ **
+ ** * Report an error message, and throw away the input token.
+ **
+ ** * If the input token is $, then fail the parse.
+ **
+ ** As before, subsequent error messages are suppressed until
+ ** three input tokens have been successfully shifted.
+ */
+ if( fts5yypParser->fts5yyerrcnt<=0 ){
+ fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
+ }
+ fts5yypParser->fts5yyerrcnt = 3;
+ fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
+ if( fts5yyendofinput ){
+ fts5yy_parse_failed(fts5yypParser);
+ }
+ fts5yymajor = fts5YYNOCODE;
+#endif
+ }
+ }while( fts5yymajor!=fts5YYNOCODE && fts5yypParser->fts5yyidx>=0 );
+#ifndef NDEBUG
+ if( fts5yyTraceFILE ){
+ int i;
+ fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt);
+ for(i=1; i<=fts5yypParser->fts5yyidx; i++)
+ fprintf(fts5yyTraceFILE,"%c%s", i==1 ? '[' : ' ',
+ fts5yyTokenName[fts5yypParser->fts5yystack[i].major]);
+ fprintf(fts5yyTraceFILE,"]\n");
+ }
+#endif
+ return;
+}
+
+#line 1 "fts5_aux.c"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+
+/* #include "fts5Int.h" */
+#include <math.h> /* amalgamator: keep */
+
+/*
+** Object used to iterate through all "coalesced phrase instances" in
+** a single column of the current row. If the phrase instances in the
+** column being considered do not overlap, this object simply iterates
+** through them. Or, if they do overlap (share one or more tokens in
+** common), each set of overlapping instances is treated as a single
+** match. See documentation for the highlight() auxiliary function for
+** details.
+**
+** Usage is:
+**
+** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
+** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
+** rc = fts5CInstIterNext(&iter)
+** ){
+** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
+** }
+**
+*/
+typedef struct CInstIter CInstIter;
+struct CInstIter {
+ const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
+ Fts5Context *pFts; /* First arg to pass to pApi functions */
+ int iCol; /* Column to search */
+ int iInst; /* Next phrase instance index */
+ int nInst; /* Total number of phrase instances */
+
+ /* Output variables */
+ int iStart; /* First token in coalesced phrase instance */
+ int iEnd; /* Last token in coalesced phrase instance */
+};
+
+/*
+** Advance the iterator to the next coalesced phrase instance. Return
+** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
+*/
+static int fts5CInstIterNext(CInstIter *pIter){
+ int rc = SQLITE_OK;
+ pIter->iStart = -1;
+ pIter->iEnd = -1;
+
+ while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
+ int ip; int ic; int io;
+ rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
+ if( rc==SQLITE_OK ){
+ if( ic==pIter->iCol ){
+ int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
+ if( pIter->iStart<0 ){
+ pIter->iStart = io;
+ pIter->iEnd = iEnd;
+ }else if( io<=pIter->iEnd ){
+ if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
+ }else{
+ break;
+ }
+ }
+ pIter->iInst++;
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Initialize the iterator object indicated by the final parameter to
+** iterate through coalesced phrase instances in column iCol.
+*/
+static int fts5CInstIterInit(
+ const Fts5ExtensionApi *pApi,
+ Fts5Context *pFts,
+ int iCol,
+ CInstIter *pIter
+){
+ int rc;
+
+ memset(pIter, 0, sizeof(CInstIter));
+ pIter->pApi = pApi;
+ pIter->pFts = pFts;
+ pIter->iCol = iCol;
+ rc = pApi->xInstCount(pFts, &pIter->nInst);
+
+ if( rc==SQLITE_OK ){
+ rc = fts5CInstIterNext(pIter);
+ }
+
+ return rc;
+}
+
+
+
+/*************************************************************************
+** Start of highlight() implementation.
+*/
+typedef struct HighlightContext HighlightContext;
+struct HighlightContext {
+ CInstIter iter; /* Coalesced Instance Iterator */
+ int iPos; /* Current token offset in zIn[] */
+ int iRangeStart; /* First token to include */
+ int iRangeEnd; /* If non-zero, last token to include */
+ const char *zOpen; /* Opening highlight */
+ const char *zClose; /* Closing highlight */
+ const char *zIn; /* Input text */
+ int nIn; /* Size of input text in bytes */
+ int iOff; /* Current offset within zIn[] */
+ char *zOut; /* Output value */
+};
+
+/*
+** Append text to the HighlightContext output string - p->zOut. Argument
+** z points to a buffer containing n bytes of text to append. If n is
+** negative, everything up until the first '\0' is appended to the output.
+**
+** If *pRc is set to any value other than SQLITE_OK when this function is
+** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
+** *pRc is set to an error code before returning.
+*/
+static void fts5HighlightAppend(
+ int *pRc,
+ HighlightContext *p,
+ const char *z, int n
+){
+ if( *pRc==SQLITE_OK ){
+ if( n<0 ) n = (int)strlen(z);
+ p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
+ if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
+ }
+}
+
+/*
+** Tokenizer callback used by implementation of highlight() function.
+*/
+static int fts5HighlightCb(
+ void *pContext, /* Pointer to HighlightContext object */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStartOff, /* Start offset of token */
+ int iEndOff /* End offset of token */
+){
+ HighlightContext *p = (HighlightContext*)pContext;
+ int rc = SQLITE_OK;
+ int iPos;
+
+ UNUSED_PARAM2(pToken, nToken);
+
+ if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
+ iPos = p->iPos++;
+
+ if( p->iRangeEnd>0 ){
+ if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
+ if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
+ }
+
+ if( iPos==p->iter.iStart ){
+ fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
+ fts5HighlightAppend(&rc, p, p->zOpen, -1);
+ p->iOff = iStartOff;
+ }
+
+ if( iPos==p->iter.iEnd ){
+ if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
+ fts5HighlightAppend(&rc, p, p->zOpen, -1);
+ }
+ fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
+ fts5HighlightAppend(&rc, p, p->zClose, -1);
+ p->iOff = iEndOff;
+ if( rc==SQLITE_OK ){
+ rc = fts5CInstIterNext(&p->iter);
+ }
+ }
+
+ if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
+ fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
+ p->iOff = iEndOff;
+ if( iPos<p->iter.iEnd ){
+ fts5HighlightAppend(&rc, p, p->zClose, -1);
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Implementation of highlight() function.
+*/
+static void fts5HighlightFunction(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ sqlite3_context *pCtx, /* Context for returning result/error */
+ int nVal, /* Number of values in apVal[] array */
+ sqlite3_value **apVal /* Array of trailing arguments */
+){
+ HighlightContext ctx;
+ int rc;
+ int iCol;
+
+ if( nVal!=3 ){
+ const char *zErr = "wrong number of arguments to function highlight()";
+ sqlite3_result_error(pCtx, zErr, -1);
+ return;
+ }
+
+ iCol = sqlite3_value_int(apVal[0]);
+ memset(&ctx, 0, sizeof(HighlightContext));
+ ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
+ ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
+ rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
+
+ if( ctx.zIn ){
+ if( rc==SQLITE_OK ){
+ rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
+ }
+ fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
+
+ if( rc==SQLITE_OK ){
+ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
+ }
+ sqlite3_free(ctx.zOut);
+ }
+ if( rc!=SQLITE_OK ){
+ sqlite3_result_error_code(pCtx, rc);
+ }
+}
+/*
+** End of highlight() implementation.
+**************************************************************************/
+
+/*
+** Implementation of snippet() function.
+*/
+static void fts5SnippetFunction(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ sqlite3_context *pCtx, /* Context for returning result/error */
+ int nVal, /* Number of values in apVal[] array */
+ sqlite3_value **apVal /* Array of trailing arguments */
+){
+ HighlightContext ctx;
+ int rc = SQLITE_OK; /* Return code */
+ int iCol; /* 1st argument to snippet() */
+ const char *zEllips; /* 4th argument to snippet() */
+ int nToken; /* 5th argument to snippet() */
+ int nInst = 0; /* Number of instance matches this row */
+ int i; /* Used to iterate through instances */
+ int nPhrase; /* Number of phrases in query */
+ unsigned char *aSeen; /* Array of "seen instance" flags */
+ int iBestCol; /* Column containing best snippet */
+ int iBestStart = 0; /* First token of best snippet */
+ int iBestLast; /* Last token of best snippet */
+ int nBestScore = 0; /* Score of best snippet */
+ int nColSize = 0; /* Total size of iBestCol in tokens */
+
+ if( nVal!=5 ){
+ const char *zErr = "wrong number of arguments to function snippet()";
+ sqlite3_result_error(pCtx, zErr, -1);
+ return;
+ }
+
+ memset(&ctx, 0, sizeof(HighlightContext));
+ iCol = sqlite3_value_int(apVal[0]);
+ ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
+ ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
+ zEllips = (const char*)sqlite3_value_text(apVal[3]);
+ nToken = sqlite3_value_int(apVal[4]);
+ iBestLast = nToken-1;
+
+ iBestCol = (iCol>=0 ? iCol : 0);
+ nPhrase = pApi->xPhraseCount(pFts);
+ aSeen = sqlite3_malloc(nPhrase);
+ if( aSeen==0 ){
+ rc = SQLITE_NOMEM;
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = pApi->xInstCount(pFts, &nInst);
+ }
+ for(i=0; rc==SQLITE_OK && i<nInst; i++){
+ int ip, iSnippetCol, iStart;
+ memset(aSeen, 0, nPhrase);
+ rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
+ if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
+ int nScore = 1000;
+ int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
+ int j;
+ aSeen[ip] = 1;
+
+ for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
+ int ic; int io; int iFinal;
+ rc = pApi->xInst(pFts, j, &ip, &ic, &io);
+ iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
+ if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
+ nScore += aSeen[ip] ? 1000 : 1;
+ aSeen[ip] = 1;
+ if( iFinal>iLast ) iLast = iFinal;
+ }
+ }
+
+ if( rc==SQLITE_OK && nScore>nBestScore ){
+ iBestCol = iSnippetCol;
+ iBestStart = iStart;
+ iBestLast = iLast;
+ nBestScore = nScore;
+ }
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
+ }
+ if( rc==SQLITE_OK ){
+ rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
+ }
+ if( ctx.zIn ){
+ if( rc==SQLITE_OK ){
+ rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
+ }
+
+ if( (iBestStart+nToken-1)>iBestLast ){
+ iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
+ }
+ if( iBestStart+nToken>nColSize ){
+ iBestStart = nColSize - nToken;
+ }
+ if( iBestStart<0 ) iBestStart = 0;
+
+ ctx.iRangeStart = iBestStart;
+ ctx.iRangeEnd = iBestStart + nToken - 1;
+
+ if( iBestStart>0 ){
+ fts5HighlightAppend(&rc, &ctx, zEllips, -1);
+ }
+ if( rc==SQLITE_OK ){
+ rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
+ }
+ if( ctx.iRangeEnd>=(nColSize-1) ){
+ fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
+ }else{
+ fts5HighlightAppend(&rc, &ctx, zEllips, -1);
+ }
+
+ if( rc==SQLITE_OK ){
+ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
+ }else{
+ sqlite3_result_error_code(pCtx, rc);
+ }
+ sqlite3_free(ctx.zOut);
+ }
+ sqlite3_free(aSeen);
+}
+
+/************************************************************************/
+
+/*
+** The first time the bm25() function is called for a query, an instance
+** of the following structure is allocated and populated.
+*/
+typedef struct Fts5Bm25Data Fts5Bm25Data;
+struct Fts5Bm25Data {
+ int nPhrase; /* Number of phrases in query */
+ double avgdl; /* Average number of tokens in each row */
+ double *aIDF; /* IDF for each phrase */
+ double *aFreq; /* Array used to calculate phrase freq. */
+};
+
+/*
+** Callback used by fts5Bm25GetData() to count the number of rows in the
+** table matched by each individual phrase within the query.
+*/
+static int fts5CountCb(
+ const Fts5ExtensionApi *pApi,
+ Fts5Context *pFts,
+ void *pUserData /* Pointer to sqlite3_int64 variable */
+){
+ sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
+ UNUSED_PARAM2(pApi, pFts);
+ (*pn)++;
+ return SQLITE_OK;
+}
+
+/*
+** Set *ppData to point to the Fts5Bm25Data object for the current query.
+** If the object has not already been allocated, allocate and populate it
+** now.
+*/
+static int fts5Bm25GetData(
+ const Fts5ExtensionApi *pApi,
+ Fts5Context *pFts,
+ Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
+){
+ int rc = SQLITE_OK; /* Return code */
+ Fts5Bm25Data *p; /* Object to return */
+
+ p = pApi->xGetAuxdata(pFts, 0);
+ if( p==0 ){
+ int nPhrase; /* Number of phrases in query */
+ sqlite3_int64 nRow = 0; /* Number of rows in table */
+ sqlite3_int64 nToken = 0; /* Number of tokens in table */
+ int nByte; /* Bytes of space to allocate */
+ int i;
+
+ /* Allocate the Fts5Bm25Data object */
+ nPhrase = pApi->xPhraseCount(pFts);
+ nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
+ p = (Fts5Bm25Data*)sqlite3_malloc(nByte);
+ if( p==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(p, 0, nByte);
+ p->nPhrase = nPhrase;
+ p->aIDF = (double*)&p[1];
+ p->aFreq = &p->aIDF[nPhrase];
+ }
+
+ /* Calculate the average document length for this FTS5 table */
+ if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
+ if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
+ if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow;
+
+ /* Calculate an IDF for each phrase in the query */
+ for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
+ sqlite3_int64 nHit = 0;
+ rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
+ if( rc==SQLITE_OK ){
+ /* Calculate the IDF (Inverse Document Frequency) for phrase i.
+ ** This is done using the standard BM25 formula as found on wikipedia:
+ **
+ ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
+ **
+ ** where "N" is the total number of documents in the set and nHit
+ ** is the number that contain at least one instance of the phrase
+ ** under consideration.
+ **
+ ** The problem with this is that if (N < 2*nHit), the IDF is
+ ** negative. Which is undesirable. So the mimimum allowable IDF is
+ ** (1e-6) - roughly the same as a term that appears in just over
+ ** half of set of 5,000,000 documents. */
+ double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
+ if( idf<=0.0 ) idf = 1e-6;
+ p->aIDF[i] = idf;
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(p);
+ }else{
+ rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
+ }
+ if( rc!=SQLITE_OK ) p = 0;
+ }
+ *ppData = p;
+ return rc;
+}
+
+/*
+** Implementation of bm25() function.
+*/
+static void fts5Bm25Function(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ sqlite3_context *pCtx, /* Context for returning result/error */
+ int nVal, /* Number of values in apVal[] array */
+ sqlite3_value **apVal /* Array of trailing arguments */
+){
+ const double k1 = 1.2; /* Constant "k1" from BM25 formula */
+ const double b = 0.75; /* Constant "b" from BM25 formula */
+ int rc = SQLITE_OK; /* Error code */
+ double score = 0.0; /* SQL function return value */
+ Fts5Bm25Data *pData; /* Values allocated/calculated once only */
+ int i; /* Iterator variable */
+ int nInst = 0; /* Value returned by xInstCount() */
+ double D = 0.0; /* Total number of tokens in row */
+ double *aFreq = 0; /* Array of phrase freq. for current row */
+
+ /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
+ ** for each phrase in the query for the current row. */
+ rc = fts5Bm25GetData(pApi, pFts, &pData);
+ if( rc==SQLITE_OK ){
+ aFreq = pData->aFreq;
+ memset(aFreq, 0, sizeof(double) * pData->nPhrase);
+ rc = pApi->xInstCount(pFts, &nInst);
+ }
+ for(i=0; rc==SQLITE_OK && i<nInst; i++){
+ int ip; int ic; int io;
+ rc = pApi->xInst(pFts, i, &ip, &ic, &io);
+ if( rc==SQLITE_OK ){
+ double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
+ aFreq[ip] += w;
+ }
+ }
+
+ /* Figure out the total size of the current row in tokens. */
+ if( rc==SQLITE_OK ){
+ int nTok;
+ rc = pApi->xColumnSize(pFts, -1, &nTok);
+ D = (double)nTok;
+ }
+
+ /* Determine the BM25 score for the current row. */
+ for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){
+ score += pData->aIDF[i] * (
+ ( aFreq[i] * (k1 + 1.0) ) /
+ ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
+ );
+ }
+
+ /* If no error has occurred, return the calculated score. Otherwise,
+ ** throw an SQL exception. */
+ if( rc==SQLITE_OK ){
+ sqlite3_result_double(pCtx, -1.0 * score);
+ }else{
+ sqlite3_result_error_code(pCtx, rc);
+ }
+}
+
+static int sqlite3Fts5AuxInit(fts5_api *pApi){
+ struct Builtin {
+ const char *zFunc; /* Function name (nul-terminated) */
+ void *pUserData; /* User-data pointer */
+ fts5_extension_function xFunc;/* Callback function */
+ void (*xDestroy)(void*); /* Destructor function */
+ } aBuiltin [] = {
+ { "snippet", 0, fts5SnippetFunction, 0 },
+ { "highlight", 0, fts5HighlightFunction, 0 },
+ { "bm25", 0, fts5Bm25Function, 0 },
+ };
+ int rc = SQLITE_OK; /* Return code */
+ int i; /* To iterate through builtin functions */
+
+ for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
+ rc = pApi->xCreateFunction(pApi,
+ aBuiltin[i].zFunc,
+ aBuiltin[i].pUserData,
+ aBuiltin[i].xFunc,
+ aBuiltin[i].xDestroy
+ );
+ }
+
+ return rc;
+}
+
+
+
+#line 1 "fts5_buffer.c"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+
+
+/* #include "fts5Int.h" */
+
+static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
+ if( (u32)pBuf->nSpace<nByte ){
+ u32 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
+ u8 *pNew;
+ while( nNew<nByte ){
+ nNew = nNew * 2;
+ }
+ pNew = sqlite3_realloc(pBuf->p, nNew);
+ if( pNew==0 ){
+ *pRc = SQLITE_NOMEM;
+ return 1;
+ }else{
+ pBuf->nSpace = nNew;
+ pBuf->p = pNew;
+ }
+ }
+ return 0;
+}
+
+
+/*
+** Encode value iVal as an SQLite varint and append it to the buffer object
+** pBuf. If an OOM error occurs, set the error code in p.
+*/
+static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
+ if( fts5BufferGrow(pRc, pBuf, 9) ) return;
+ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
+}
+
+static void sqlite3Fts5Put32(u8 *aBuf, int iVal){
+ aBuf[0] = (iVal>>24) & 0x00FF;
+ aBuf[1] = (iVal>>16) & 0x00FF;
+ aBuf[2] = (iVal>> 8) & 0x00FF;
+ aBuf[3] = (iVal>> 0) & 0x00FF;
+}
+
+static int sqlite3Fts5Get32(const u8 *aBuf){
+ return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3];
+}
+
+/*
+** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
+** the error code in p. If an error has already occurred when this function
+** is called, it is a no-op.
+*/
+static void sqlite3Fts5BufferAppendBlob(
+ int *pRc,
+ Fts5Buffer *pBuf,
+ u32 nData,
+ const u8 *pData
+){
+ assert_nc( *pRc || nData>=0 );
+ if( fts5BufferGrow(pRc, pBuf, nData) ) return;
+ memcpy(&pBuf->p[pBuf->n], pData, nData);
+ pBuf->n += nData;
+}
+
+/*
+** Append the nul-terminated string zStr to the buffer pBuf. This function
+** ensures that the byte following the buffer data is set to 0x00, even
+** though this byte is not included in the pBuf->n count.
+*/
+static void sqlite3Fts5BufferAppendString(
+ int *pRc,
+ Fts5Buffer *pBuf,
+ const char *zStr
+){
+ int nStr = (int)strlen(zStr);
+ sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
+ pBuf->n--;
+}
+
+/*
+** Argument zFmt is a printf() style format string. This function performs
+** the printf() style processing, then appends the results to buffer pBuf.
+**
+** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
+** following the buffer data is set to 0x00, even though this byte is not
+** included in the pBuf->n count.
+*/
+static void sqlite3Fts5BufferAppendPrintf(
+ int *pRc,
+ Fts5Buffer *pBuf,
+ char *zFmt, ...
+){
+ if( *pRc==SQLITE_OK ){
+ char *zTmp;
+ va_list ap;
+ va_start(ap, zFmt);
+ zTmp = sqlite3_vmprintf(zFmt, ap);
+ va_end(ap);
+
+ if( zTmp==0 ){
+ *pRc = SQLITE_NOMEM;
+ }else{
+ sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
+ sqlite3_free(zTmp);
+ }
+ }
+}
+
+static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
+ char *zRet = 0;
+ if( *pRc==SQLITE_OK ){
+ va_list ap;
+ va_start(ap, zFmt);
+ zRet = sqlite3_vmprintf(zFmt, ap);
+ va_end(ap);
+ if( zRet==0 ){
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+ return zRet;
+}
+
+
+/*
+** Free any buffer allocated by pBuf. Zero the structure before returning.
+*/
+static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
+ sqlite3_free(pBuf->p);
+ memset(pBuf, 0, sizeof(Fts5Buffer));
+}
+
+/*
+** Zero the contents of the buffer object. But do not free the associated
+** memory allocation.
+*/
+static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
+ pBuf->n = 0;
+}
+
+/*
+** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
+** the error code in p. If an error has already occurred when this function
+** is called, it is a no-op.
+*/
+static void sqlite3Fts5BufferSet(
+ int *pRc,
+ Fts5Buffer *pBuf,
+ int nData,
+ const u8 *pData
+){
+ pBuf->n = 0;
+ sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
+}
+
+static int sqlite3Fts5PoslistNext64(
+ const u8 *a, int n, /* Buffer containing poslist */
+ int *pi, /* IN/OUT: Offset within a[] */
+ i64 *piOff /* IN/OUT: Current offset */
+){
+ int i = *pi;
+ if( i>=n ){
+ /* EOF */
+ *piOff = -1;
+ return 1;
+ }else{
+ i64 iOff = *piOff;
+ int iVal;
+ fts5FastGetVarint32(a, i, iVal);
+ if( iVal==1 ){
+ fts5FastGetVarint32(a, i, iVal);
+ iOff = ((i64)iVal) << 32;
+ fts5FastGetVarint32(a, i, iVal);
+ }
+ *piOff = iOff + (iVal-2);
+ *pi = i;
+ return 0;
+ }
+}
+
+
+/*
+** Advance the iterator object passed as the only argument. Return true
+** if the iterator reaches EOF, or false otherwise.
+*/
+static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
+ if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
+ pIter->bEof = 1;
+ }
+ return pIter->bEof;
+}
+
+static int sqlite3Fts5PoslistReaderInit(
+ const u8 *a, int n, /* Poslist buffer to iterate through */
+ Fts5PoslistReader *pIter /* Iterator object to initialize */
+){
+ memset(pIter, 0, sizeof(*pIter));
+ pIter->a = a;
+ pIter->n = n;
+ sqlite3Fts5PoslistReaderNext(pIter);
+ return pIter->bEof;
+}
+
+/*
+** Append position iPos to the position list being accumulated in buffer
+** pBuf, which must be already be large enough to hold the new data.
+** The previous position written to this list is *piPrev. *piPrev is set
+** to iPos before returning.
+*/
+static void sqlite3Fts5PoslistSafeAppend(
+ Fts5Buffer *pBuf,
+ i64 *piPrev,
+ i64 iPos
+){
+ static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
+ if( (iPos & colmask) != (*piPrev & colmask) ){
+ pBuf->p[pBuf->n++] = 1;
+ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
+ *piPrev = (iPos & colmask);
+ }
+ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
+ *piPrev = iPos;
+}
+
+static int sqlite3Fts5PoslistWriterAppend(
+ Fts5Buffer *pBuf,
+ Fts5PoslistWriter *pWriter,
+ i64 iPos
+){
+ int rc = 0; /* Initialized only to suppress erroneous warning from Clang */
+ if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
+ sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
+ return SQLITE_OK;
+}
+
+static void *sqlite3Fts5MallocZero(int *pRc, int nByte){
+ void *pRet = 0;
+ if( *pRc==SQLITE_OK ){
+ pRet = sqlite3_malloc(nByte);
+ if( pRet==0 && nByte>0 ){
+ *pRc = SQLITE_NOMEM;
+ }else{
+ memset(pRet, 0, nByte);
+ }
+ }
+ return pRet;
+}
+
+/*
+** Return a nul-terminated copy of the string indicated by pIn. If nIn
+** is non-negative, then it is the length of the string in bytes. Otherwise,
+** the length of the string is determined using strlen().
+**
+** It is the responsibility of the caller to eventually free the returned
+** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
+*/
+static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
+ char *zRet = 0;
+ if( *pRc==SQLITE_OK ){
+ if( nIn<0 ){
+ nIn = (int)strlen(pIn);
+ }
+ zRet = (char*)sqlite3_malloc(nIn+1);
+ if( zRet ){
+ memcpy(zRet, pIn, nIn);
+ zRet[nIn] = '\0';
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+ return zRet;
+}
+
+
+/*
+** Return true if character 't' may be part of an FTS5 bareword, or false
+** otherwise. Characters that may be part of barewords:
+**
+** * All non-ASCII characters,
+** * The 52 upper and lower case ASCII characters, and
+** * The 10 integer ASCII characters.
+** * The underscore character "_" (0x5F).
+** * The unicode "subsitute" character (0x1A).
+*/
+static int sqlite3Fts5IsBareword(char t){
+ u8 aBareword[128] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
+ };
+
+ return (t & 0x80) || aBareword[(int)t];
+}
+
+
+/*************************************************************************
+*/
+typedef struct Fts5TermsetEntry Fts5TermsetEntry;
+struct Fts5TermsetEntry {
+ char *pTerm;
+ int nTerm;
+ int iIdx; /* Index (main or aPrefix[] entry) */
+ Fts5TermsetEntry *pNext;
+};
+
+struct Fts5Termset {
+ Fts5TermsetEntry *apHash[512];
+};
+
+static int sqlite3Fts5TermsetNew(Fts5Termset **pp){
+ int rc = SQLITE_OK;
+ *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
+ return rc;
+}
+
+static int sqlite3Fts5TermsetAdd(
+ Fts5Termset *p,
+ int iIdx,
+ const char *pTerm, int nTerm,
+ int *pbPresent
+){
+ int rc = SQLITE_OK;
+ *pbPresent = 0;
+ if( p ){
+ int i;
+ u32 hash = 13;
+ Fts5TermsetEntry *pEntry;
+
+ /* Calculate a hash value for this term. This is the same hash checksum
+ ** used by the fts5_hash.c module. This is not important for correct
+ ** operation of the module, but is necessary to ensure that some tests
+ ** designed to produce hash table collisions really do work. */
+ for(i=nTerm-1; i>=0; i--){
+ hash = (hash << 3) ^ hash ^ pTerm[i];
+ }
+ hash = (hash << 3) ^ hash ^ iIdx;
+ hash = hash % ArraySize(p->apHash);
+
+ for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
+ if( pEntry->iIdx==iIdx
+ && pEntry->nTerm==nTerm
+ && memcmp(pEntry->pTerm, pTerm, nTerm)==0
+ ){
+ *pbPresent = 1;
+ break;
+ }
+ }
+
+ if( pEntry==0 ){
+ pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
+ if( pEntry ){
+ pEntry->pTerm = (char*)&pEntry[1];
+ pEntry->nTerm = nTerm;
+ pEntry->iIdx = iIdx;
+ memcpy(pEntry->pTerm, pTerm, nTerm);
+ pEntry->pNext = p->apHash[hash];
+ p->apHash[hash] = pEntry;
+ }
+ }
+ }
+
+ return rc;
+}
+
+static void sqlite3Fts5TermsetFree(Fts5Termset *p){
+ if( p ){
+ u32 i;
+ for(i=0; i<ArraySize(p->apHash); i++){
+ Fts5TermsetEntry *pEntry = p->apHash[i];
+ while( pEntry ){
+ Fts5TermsetEntry *pDel = pEntry;
+ pEntry = pEntry->pNext;
+ sqlite3_free(pDel);
+ }
+ }
+ sqlite3_free(p);
+ }
+}
+
+#line 1 "fts5_config.c"
+/*
+** 2014 Jun 09
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This is an SQLite module implementing full-text search.
+*/
+
+
+/* #include "fts5Int.h" */
+
+#define FTS5_DEFAULT_PAGE_SIZE 4050
+#define FTS5_DEFAULT_AUTOMERGE 4
+#define FTS5_DEFAULT_CRISISMERGE 16
+#define FTS5_DEFAULT_HASHSIZE (1024*1024)
+
+/* Maximum allowed page size */
+#define FTS5_MAX_PAGE_SIZE (128*1024)
+
+static int fts5_iswhitespace(char x){
+ return (x==' ');
+}
+
+static int fts5_isopenquote(char x){
+ return (x=='"' || x=='\'' || x=='[' || x=='`');
+}
+
+/*
+** Argument pIn points to a character that is part of a nul-terminated
+** string. Return a pointer to the first character following *pIn in
+** the string that is not a white-space character.
+*/
+static const char *fts5ConfigSkipWhitespace(const char *pIn){
+ const char *p = pIn;
+ if( p ){
+ while( fts5_iswhitespace(*p) ){ p++; }
+ }
+ return p;
+}
+
+/*
+** Argument pIn points to a character that is part of a nul-terminated
+** string. Return a pointer to the first character following *pIn in
+** the string that is not a "bareword" character.
+*/
+static const char *fts5ConfigSkipBareword(const char *pIn){
+ const char *p = pIn;
+ while ( sqlite3Fts5IsBareword(*p) ) p++;
+ if( p==pIn ) p = 0;
+ return p;
+}
+
+static int fts5_isdigit(char a){
+ return (a>='0' && a<='9');
+}
+
+
+
+static const char *fts5ConfigSkipLiteral(const char *pIn){
+ const char *p = pIn;
+ switch( *p ){
+ case 'n': case 'N':
+ if( sqlite3_strnicmp("null", p, 4)==0 ){
+ p = &p[4];
+ }else{
+ p = 0;
+ }
+ break;
+
+ case 'x': case 'X':
+ p++;
+ if( *p=='\'' ){
+ p++;
+ while( (*p>='a' && *p<='f')
+ || (*p>='A' && *p<='F')
+ || (*p>='0' && *p<='9')
+ ){
+ p++;
+ }
+ if( *p=='\'' && 0==((p-pIn)%2) ){
+ p++;
+ }else{
+ p = 0;
+ }
+ }else{
+ p = 0;
+ }
+ break;
+
+ case '\'':
+ p++;
+ while( p ){
+ if( *p=='\'' ){
+ p++;
+ if( *p!='\'' ) break;
+ }
+ p++;
+ if( *p==0 ) p = 0;
+ }
+ break;
+
+ default:
+ /* maybe a number */
+ if( *p=='+' || *p=='-' ) p++;
+ while( fts5_isdigit(*p) ) p++;
+
+ /* At this point, if the literal was an integer, the parse is
+ ** finished. Or, if it is a floating point value, it may continue
+ ** with either a decimal point or an 'E' character. */
+ if( *p=='.' && fts5_isdigit(p[1]) ){
+ p += 2;
+ while( fts5_isdigit(*p) ) p++;
+ }
+ if( p==pIn ) p = 0;
+
+ break;
+ }
+
+ return p;
+}
+
+/*
+** The first character of the string pointed to by argument z is guaranteed
+** to be an open-quote character (see function fts5_isopenquote()).
+**
+** This function searches for the corresponding close-quote character within
+** the string and, if found, dequotes the string in place and adds a new
+** nul-terminator byte.
+**
+** If the close-quote is found, the value returned is the byte offset of
+** the character immediately following it. Or, if the close-quote is not
+** found, -1 is returned. If -1 is returned, the buffer is left in an
+** undefined state.
+*/
+static int fts5Dequote(char *z){
+ char q;
+ int iIn = 1;
+ int iOut = 0;
+ q = z[0];
+
+ /* Set stack variable q to the close-quote character */
+ assert( q=='[' || q=='\'' || q=='"' || q=='`' );
+ if( q=='[' ) q = ']';
+
+ while( ALWAYS(z[iIn]) ){
+ if( z[iIn]==q ){
+ if( z[iIn+1]!=q ){
+ /* Character iIn was the close quote. */
+ iIn++;
+ break;
+ }else{
+ /* Character iIn and iIn+1 form an escaped quote character. Skip
+ ** the input cursor past both and copy a single quote character
+ ** to the output buffer. */
+ iIn += 2;
+ z[iOut++] = q;
+ }
+ }else{
+ z[iOut++] = z[iIn++];
+ }
+ }
+
+ z[iOut] = '\0';
+ return iIn;
+}
+
+/*
+** Convert an SQL-style quoted string into a normal string by removing
+** the quote characters. The conversion is done in-place. If the
+** input does not begin with a quote character, then this routine
+** is a no-op.
+**
+** Examples:
+**
+** "abc" becomes abc
+** 'xyz' becomes xyz
+** [pqr] becomes pqr
+** `mno` becomes mno
+*/
+static void sqlite3Fts5Dequote(char *z){
+ char quote; /* Quote character (if any ) */
+
+ assert( 0==fts5_iswhitespace(z[0]) );
+ quote = z[0];
+ if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
+ fts5Dequote(z);
+ }
+}
+
+
+struct Fts5Enum {
+ const char *zName;
+ int eVal;
+};
+typedef struct Fts5Enum Fts5Enum;
+
+static int fts5ConfigSetEnum(
+ const Fts5Enum *aEnum,
+ const char *zEnum,
+ int *peVal
+){
+ int nEnum = (int)strlen(zEnum);
+ int i;
+ int iVal = -1;
+
+ for(i=0; aEnum[i].zName; i++){
+ if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
+ if( iVal>=0 ) return SQLITE_ERROR;
+ iVal = aEnum[i].eVal;
+ }
+ }
+
+ *peVal = iVal;
+ return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
+}
+
+/*
+** Parse a "special" CREATE VIRTUAL TABLE directive and update
+** configuration object pConfig as appropriate.
+**
+** If successful, object pConfig is updated and SQLITE_OK returned. If
+** an error occurs, an SQLite error code is returned and an error message
+** may be left in *pzErr. It is the responsibility of the caller to
+** eventually free any such error message using sqlite3_free().
+*/
+static int fts5ConfigParseSpecial(
+ Fts5Global *pGlobal,
+ Fts5Config *pConfig, /* Configuration object to update */
+ const char *zCmd, /* Special command to parse */
+ const char *zArg, /* Argument to parse */
+ char **pzErr /* OUT: Error message */
+){
+ int rc = SQLITE_OK;
+ int nCmd = (int)strlen(zCmd);
+ if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
+ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
+ const char *p;
+ int bFirst = 1;
+ if( pConfig->aPrefix==0 ){
+ pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
+ if( rc ) return rc;
+ }
+
+ p = zArg;
+ while( 1 ){
+ int nPre = 0;
+
+ while( p[0]==' ' ) p++;
+ if( bFirst==0 && p[0]==',' ){
+ p++;
+ while( p[0]==' ' ) p++;
+ }else if( p[0]=='\0' ){
+ break;
+ }
+ if( p[0]<'0' || p[0]>'9' ){
+ *pzErr = sqlite3_mprintf("malformed prefix=... directive");
+ rc = SQLITE_ERROR;
+ break;
+ }
+
+ if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){
+ *pzErr = sqlite3_mprintf(
+ "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES
+ );
+ rc = SQLITE_ERROR;
+ break;
+ }
+
+ while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
+ nPre = nPre*10 + (p[0] - '0');
+ p++;
+ }
+
+ if( nPre<=0 || nPre>=1000 ){
+ *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
+ rc = SQLITE_ERROR;
+ break;
+ }
+
+ pConfig->aPrefix[pConfig->nPrefix] = nPre;
+ pConfig->nPrefix++;
+ bFirst = 0;
+ }
+ assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
+ return rc;
+ }
+
+ if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
+ const char *p = (const char*)zArg;
+ int nArg = (int)strlen(zArg) + 1;
+ char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
+ char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
+ char *pSpace = pDel;
+
+ if( azArg && pSpace ){
+ if( pConfig->pTok ){
+ *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
+ rc = SQLITE_ERROR;
+ }else{
+ for(nArg=0; p && *p; nArg++){
+ const char *p2 = fts5ConfigSkipWhitespace(p);
+ if( *p2=='\'' ){
+ p = fts5ConfigSkipLiteral(p2);
+ }else{
+ p = fts5ConfigSkipBareword(p2);
+ }
+ if( p ){
+ memcpy(pSpace, p2, p-p2);
+ azArg[nArg] = pSpace;
+ sqlite3Fts5Dequote(pSpace);
+ pSpace += (p - p2) + 1;
+ p = fts5ConfigSkipWhitespace(p);
+ }
+ }
+ if( p==0 ){
+ *pzErr = sqlite3_mprintf("parse error in tokenize directive");
+ rc = SQLITE_ERROR;
+ }else{
+ rc = sqlite3Fts5GetTokenizer(pGlobal,
+ (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi,
+ pzErr
+ );
+ }
+ }
+ }
+
+ sqlite3_free(azArg);
+ sqlite3_free(pDel);
+ return rc;
+ }
+
+ if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
+ if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
+ *pzErr = sqlite3_mprintf("multiple content=... directives");
+ rc = SQLITE_ERROR;
+ }else{
+ if( zArg[0] ){
+ pConfig->eContent = FTS5_CONTENT_EXTERNAL;
+ pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
+ }else{
+ pConfig->eContent = FTS5_CONTENT_NONE;
+ }
+ }
+ return rc;
+ }
+
+ if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
+ if( pConfig->zContentRowid ){
+ *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
+ rc = SQLITE_ERROR;
+ }else{
+ pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
+ }
+ return rc;
+ }
+
+ if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
+ *pzErr = sqlite3_mprintf("malformed columnsize=... directive");
+ rc = SQLITE_ERROR;
+ }else{
+ pConfig->bColumnsize = (zArg[0]=='1');
+ }
+ return rc;
+ }
+
+ if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
+ const Fts5Enum aDetail[] = {
+ { "none", FTS5_DETAIL_NONE },
+ { "full", FTS5_DETAIL_FULL },
+ { "columns", FTS5_DETAIL_COLUMNS },
+ { 0, 0 }
+ };
+
+ if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
+ *pzErr = sqlite3_mprintf("malformed detail=... directive");
+ }
+ return rc;
+ }
+
+ *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
+ return SQLITE_ERROR;
+}
+
+/*
+** Allocate an instance of the default tokenizer ("simple") at
+** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
+** code if an error occurs.
+*/
+static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
+ assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
+ return sqlite3Fts5GetTokenizer(
+ pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0
+ );
+}
+
+/*
+** Gobble up the first bareword or quoted word from the input buffer zIn.
+** Return a pointer to the character immediately following the last in
+** the gobbled word if successful, or a NULL pointer otherwise (failed
+** to find close-quote character).
+**
+** Before returning, set pzOut to point to a new buffer containing a
+** nul-terminated, dequoted copy of the gobbled word. If the word was
+** quoted, *pbQuoted is also set to 1 before returning.
+**
+** If *pRc is other than SQLITE_OK when this function is called, it is
+** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
+** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
+** set if a parse error (failed to find close quote) occurs.
+*/
+static const char *fts5ConfigGobbleWord(
+ int *pRc, /* IN/OUT: Error code */
+ const char *zIn, /* Buffer to gobble string/bareword from */
+ char **pzOut, /* OUT: malloc'd buffer containing str/bw */
+ int *pbQuoted /* OUT: Set to true if dequoting required */
+){
+ const char *zRet = 0;
+
+ int nIn = (int)strlen(zIn);
+ char *zOut = sqlite3_malloc(nIn+1);
+
+ assert( *pRc==SQLITE_OK );
+ *pbQuoted = 0;
+ *pzOut = 0;
+
+ if( zOut==0 ){
+ *pRc = SQLITE_NOMEM;
+ }else{
+ memcpy(zOut, zIn, nIn+1);
+ if( fts5_isopenquote(zOut[0]) ){
+ int ii = fts5Dequote(zOut);
+ zRet = &zIn[ii];
+ *pbQuoted = 1;
+ }else{
+ zRet = fts5ConfigSkipBareword(zIn);
+ zOut[zRet-zIn] = '\0';
+ }
+ }
+
+ if( zRet==0 ){
+ sqlite3_free(zOut);
+ }else{
+ *pzOut = zOut;
+ }
+
+ return zRet;
+}
+
+static int fts5ConfigParseColumn(
+ Fts5Config *p,
+ char *zCol,
+ char *zArg,
+ char **pzErr
+){
+ int rc = SQLITE_OK;
+ if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
+ || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
+ ){
+ *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
+ rc = SQLITE_ERROR;
+ }else if( zArg ){
+ if( 0==sqlite3_stricmp(zArg, "unindexed") ){
+ p->abUnindexed[p->nCol] = 1;
+ }else{
+ *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
+ rc = SQLITE_ERROR;
+ }
+ }
+
+ p->azCol[p->nCol++] = zCol;
+ return rc;
+}
+
+/*
+** Populate the Fts5Config.zContentExprlist string.
+*/
+static int fts5ConfigMakeExprlist(Fts5Config *p){
+ int i;
+ int rc = SQLITE_OK;
+ Fts5Buffer buf = {0, 0, 0};
+
+ sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
+ if( p->eContent!=FTS5_CONTENT_NONE ){
+ for(i=0; i<p->nCol; i++){
+ if( p->eContent==FTS5_CONTENT_EXTERNAL ){
+ sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
+ }else{
+ sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
+ }
+ }
+ }
+
+ assert( p->zContentExprlist==0 );
+ p->zContentExprlist = (char*)buf.p;
+ return rc;
+}
+
+/*
+** Arguments nArg/azArg contain the string arguments passed to the xCreate
+** or xConnect method of the virtual table. This function attempts to
+** allocate an instance of Fts5Config containing the results of parsing
+** those arguments.
+**
+** If successful, SQLITE_OK is returned and *ppOut is set to point to the
+** new Fts5Config object. If an error occurs, an SQLite error code is
+** returned, *ppOut is set to NULL and an error message may be left in
+** *pzErr. It is the responsibility of the caller to eventually free any
+** such error message using sqlite3_free().
+*/
+static int sqlite3Fts5ConfigParse(
+ Fts5Global *pGlobal,
+ sqlite3 *db,
+ int nArg, /* Number of arguments */
+ const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
+ Fts5Config **ppOut, /* OUT: Results of parse */
+ char **pzErr /* OUT: Error message */
+){
+ int rc = SQLITE_OK; /* Return code */
+ Fts5Config *pRet; /* New object to return */
+ int i;
+ int nByte;
+
+ *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
+ if( pRet==0 ) return SQLITE_NOMEM;
+ memset(pRet, 0, sizeof(Fts5Config));
+ pRet->db = db;
+ pRet->iCookie = -1;
+
+ nByte = nArg * (sizeof(char*) + sizeof(u8));
+ pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
+ pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
+ pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
+ pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
+ pRet->bColumnsize = 1;
+ pRet->eDetail = FTS5_DETAIL_FULL;
+#ifdef SQLITE_DEBUG
+ pRet->bPrefixIndex = 1;
+#endif
+ if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
+ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
+ rc = SQLITE_ERROR;
+ }
+
+ for(i=3; rc==SQLITE_OK && i<nArg; i++){
+ const char *zOrig = azArg[i];
+ const char *z;
+ char *zOne = 0;
+ char *zTwo = 0;
+ int bOption = 0;
+ int bMustBeCol = 0;
+
+ z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
+ z = fts5ConfigSkipWhitespace(z);
+ if( z && *z=='=' ){
+ bOption = 1;
+ z++;
+ if( bMustBeCol ) z = 0;
+ }
+ z = fts5ConfigSkipWhitespace(z);
+ if( z && z[0] ){
+ int bDummy;
+ z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
+ if( z && z[0] ) z = 0;
+ }
+
+ if( rc==SQLITE_OK ){
+ if( z==0 ){
+ *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
+ rc = SQLITE_ERROR;
+ }else{
+ if( bOption ){
+ rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr);
+ }else{
+ rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
+ zOne = 0;
+ }
+ }
+ }
+
+ sqlite3_free(zOne);
+ sqlite3_free(zTwo);
+ }
+
+ /* If a tokenizer= option was successfully parsed, the tokenizer has
+ ** already been allocated. Otherwise, allocate an instance of the default
+ ** tokenizer (unicode61) now. */
+ if( rc==SQLITE_OK && pRet->pTok==0 ){
+ rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
+ }
+
+ /* If no zContent option was specified, fill in the default values. */
+ if( rc==SQLITE_OK && pRet->zContent==0 ){
+ const char *zTail = 0;
+ assert( pRet->eContent==FTS5_CONTENT_NORMAL
+ || pRet->eContent==FTS5_CONTENT_NONE
+ );
+ if( pRet->eContent==FTS5_CONTENT_NORMAL ){
+ zTail = "content";
+ }else if( pRet->bColumnsize ){
+ zTail = "docsize";
+ }
+
+ if( zTail ){
+ pRet->zContent = sqlite3Fts5Mprintf(
+ &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
+ );
+ }
+ }
+
+ if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
+ pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
+ }
+
+ /* Formulate the zContentExprlist text */
+ if( rc==SQLITE_OK ){
+ rc = fts5ConfigMakeExprlist(pRet);
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3Fts5ConfigFree(pRet);
+ *ppOut = 0;
+ }
+ return rc;
+}
+
+/*
+** Free the configuration object passed as the only argument.
+*/
+static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
+ if( pConfig ){
+ int i;
+ if( pConfig->pTok ){
+ pConfig->pTokApi->xDelete(pConfig->pTok);
+ }
+ sqlite3_free(pConfig->zDb);
+ sqlite3_free(pConfig->zName);
+ for(i=0; i<pConfig->nCol; i++){
+ sqlite3_free(pConfig->azCol[i]);
+ }
+ sqlite3_free(pConfig->azCol);
+ sqlite3_free(pConfig->aPrefix);
+ sqlite3_free(pConfig->zRank);
+ sqlite3_free(pConfig->zRankArgs);
+ sqlite3_free(pConfig->zContent);
+ sqlite3_free(pConfig->zContentRowid);
+ sqlite3_free(pConfig->zContentExprlist);
+ sqlite3_free(pConfig);
+ }
+}
+
+/*
+** Call sqlite3_declare_vtab() based on the contents of the configuration
+** object passed as the only argument. Return SQLITE_OK if successful, or
+** an SQLite error code if an error occurs.
+*/
+static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
+ int i;
+ int rc = SQLITE_OK;
+ char *zSql;
+
+ zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
+ for(i=0; zSql && i<pConfig->nCol; i++){
+ const char *zSep = (i==0?"":", ");
+ zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
+ }
+ zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
+ zSql, pConfig->zName, FTS5_RANK_NAME
+ );
+
+ assert( zSql || rc==SQLITE_NOMEM );
+ if( zSql ){
+ rc = sqlite3_declare_vtab(pConfig->db, zSql);
+ sqlite3_free(zSql);
+ }
+
+ return rc;
+}
+
+/*
+** Tokenize the text passed via the second and third arguments.
+**
+** The callback is invoked once for each token in the input text. The
+** arguments passed to it are, in order:
+**
+** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
+** const char *pToken // Pointer to buffer containing token
+** int nToken // Size of token in bytes
+** int iStart // Byte offset of start of token within input text
+** int iEnd // Byte offset of end of token within input text
+** int iPos // Position of token in input (first token is 0)
+**
+** If the callback returns a non-zero value the tokenization is abandoned
+** and no further callbacks are issued.
+**
+** This function returns SQLITE_OK if successful or an SQLite error code
+** if an error occurs. If the tokenization was abandoned early because
+** the callback returned SQLITE_DONE, this is not an error and this function
+** still returns SQLITE_OK. Or, if the tokenization was abandoned early
+** because the callback returned another non-zero value, it is assumed
+** to be an SQLite error code and returned to the caller.
+*/
+static int sqlite3Fts5Tokenize(
+ Fts5Config *pConfig, /* FTS5 Configuration object */
+ int flags, /* FTS5_TOKENIZE_* flags */
+ const char *pText, int nText, /* Text to tokenize */
+ void *pCtx, /* Context passed to xToken() */
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
+){
+ if( pText==0 ) return SQLITE_OK;
+ return pConfig->pTokApi->xTokenize(
+ pConfig->pTok, pCtx, flags, pText, nText, xToken
+ );
+}
+
+/*
+** Argument pIn points to the first character in what is expected to be
+** a comma-separated list of SQL literals followed by a ')' character.
+** If it actually is this, return a pointer to the ')'. Otherwise, return
+** NULL to indicate a parse error.
+*/
+static const char *fts5ConfigSkipArgs(const char *pIn){
+ const char *p = pIn;
+
+ while( 1 ){
+ p = fts5ConfigSkipWhitespace(p);
+ p = fts5ConfigSkipLiteral(p);
+ p = fts5ConfigSkipWhitespace(p);
+ if( p==0 || *p==')' ) break;
+ if( *p!=',' ){
+ p = 0;
+ break;
+ }
+ p++;
+ }
+
+ return p;
+}
+
+/*
+** Parameter zIn contains a rank() function specification. The format of
+** this is:
+**
+** + Bareword (function name)
+** + Open parenthesis - "("
+** + Zero or more SQL literals in a comma separated list
+** + Close parenthesis - ")"
+*/
+static int sqlite3Fts5ConfigParseRank(
+ const char *zIn, /* Input string */
+ char **pzRank, /* OUT: Rank function name */
+ char **pzRankArgs /* OUT: Rank function arguments */
+){
+ const char *p = zIn;
+ const char *pRank;
+ char *zRank = 0;
+ char *zRankArgs = 0;
+ int rc = SQLITE_OK;
+
+ *pzRank = 0;
+ *pzRankArgs = 0;
+
+ if( p==0 ){
+ rc = SQLITE_ERROR;
+ }else{
+ p = fts5ConfigSkipWhitespace(p);
+ pRank = p;
+ p = fts5ConfigSkipBareword(p);
+
+ if( p ){
+ zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
+ if( zRank ) memcpy(zRank, pRank, p-pRank);
+ }else{
+ rc = SQLITE_ERROR;
+ }
+
+ if( rc==SQLITE_OK ){
+ p = fts5ConfigSkipWhitespace(p);
+ if( *p!='(' ) rc = SQLITE_ERROR;
+ p++;
+ }
+ if( rc==SQLITE_OK ){
+ const char *pArgs;
+ p = fts5ConfigSkipWhitespace(p);
+ pArgs = p;
+ if( *p!=')' ){
+ p = fts5ConfigSkipArgs(p);
+ if( p==0 ){
+ rc = SQLITE_ERROR;
+ }else{
+ zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
+ if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
+ }
+ }
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(zRank);
+ assert( zRankArgs==0 );
+ }else{
+ *pzRank = zRank;
+ *pzRankArgs = zRankArgs;
+ }
+ return rc;
+}
+
+static int sqlite3Fts5ConfigSetValue(
+ Fts5Config *pConfig,
+ const char *zKey,
+ sqlite3_value *pVal,
+ int *pbBadkey
+){
+ int rc = SQLITE_OK;
+
+ if( 0==sqlite3_stricmp(zKey, "pgsz") ){
+ int pgsz = 0;
+ if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
+ pgsz = sqlite3_value_int(pVal);
+ }
+ if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){
+ *pbBadkey = 1;
+ }else{
+ pConfig->pgsz = pgsz;
+ }
+ }
+
+ else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
+ int nHashSize = -1;
+ if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
+ nHashSize = sqlite3_value_int(pVal);
+ }
+ if( nHashSize<=0 ){
+ *pbBadkey = 1;
+ }else{
+ pConfig->nHashSize = nHashSize;
+ }
+ }
+
+ else if( 0==sqlite3_stricmp(zKey, "automerge") ){
+ int nAutomerge = -1;
+ if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
+ nAutomerge = sqlite3_value_int(pVal);
+ }
+ if( nAutomerge<0 || nAutomerge>64 ){
+ *pbBadkey = 1;
+ }else{
+ if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
+ pConfig->nAutomerge = nAutomerge;
+ }
+ }
+
+ else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
+ int nCrisisMerge = -1;
+ if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
+ nCrisisMerge = sqlite3_value_int(pVal);
+ }
+ if( nCrisisMerge<0 ){
+ *pbBadkey = 1;
+ }else{
+ if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
+ pConfig->nCrisisMerge = nCrisisMerge;
+ }
+ }
+
+ else if( 0==sqlite3_stricmp(zKey, "rank") ){
+ const char *zIn = (const char*)sqlite3_value_text(pVal);
+ char *zRank;
+ char *zRankArgs;
+ rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
+ if( rc==SQLITE_OK ){
+ sqlite3_free(pConfig->zRank);
+ sqlite3_free(pConfig->zRankArgs);
+ pConfig->zRank = zRank;
+ pConfig->zRankArgs = zRankArgs;
+ }else if( rc==SQLITE_ERROR ){
+ rc = SQLITE_OK;
+ *pbBadkey = 1;
+ }
+ }else{
+ *pbBadkey = 1;
+ }
+ return rc;
+}
+
+/*
+** Load the contents of the %_config table into memory.
+*/
+static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
+ const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
+ char *zSql;
+ sqlite3_stmt *p = 0;
+ int rc = SQLITE_OK;
+ int iVersion = 0;
+
+ /* Set default values */
+ pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
+ pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
+ pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
+ pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
+
+ zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
+ if( zSql ){
+ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
+ sqlite3_free(zSql);
+ }
+
+ assert( rc==SQLITE_OK || p==0 );
+ if( rc==SQLITE_OK ){
+ while( SQLITE_ROW==sqlite3_step(p) ){
+ const char *zK = (const char*)sqlite3_column_text(p, 0);
+ sqlite3_value *pVal = sqlite3_column_value(p, 1);
+ if( 0==sqlite3_stricmp(zK, "version") ){
+ iVersion = sqlite3_value_int(pVal);
+ }else{
+ int bDummy = 0;
+ sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
+ }
+ }
+ rc = sqlite3_finalize(p);
+ }
+
+ if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
+ rc = SQLITE_ERROR;
+ if( pConfig->pzErrmsg ){
+ assert( 0==*pConfig->pzErrmsg );
+ *pConfig->pzErrmsg = sqlite3_mprintf(
+ "invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
+ iVersion, FTS5_CURRENT_VERSION
+ );
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ pConfig->iCookie = iCookie;
+ }
+ return rc;
+}
+
+#line 1 "fts5_expr.c"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+
+
+
+/* #include "fts5Int.h" */
+/* #include "fts5parse.h" */
+
+/*
+** All token types in the generated fts5parse.h file are greater than 0.
+*/
+#define FTS5_EOF 0
+
+#define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
+
+typedef struct Fts5ExprTerm Fts5ExprTerm;
+
+/*
+** Functions generated by lemon from fts5parse.y.
+*/
+static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
+static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
+static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
+#ifndef NDEBUG
+#include <stdio.h>
+static void sqlite3Fts5ParserTrace(FILE*, char*);
+#endif
+
+
+struct Fts5Expr {
+ Fts5Index *pIndex;
+ Fts5Config *pConfig;
+ Fts5ExprNode *pRoot;
+ int bDesc; /* Iterate in descending rowid order */
+ int nPhrase; /* Number of phrases in expression */
+ Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */
+};
+
+/*
+** eType:
+** Expression node type. Always one of:
+**
+** FTS5_AND (nChild, apChild valid)
+** FTS5_OR (nChild, apChild valid)
+** FTS5_NOT (nChild, apChild valid)
+** FTS5_STRING (pNear valid)
+** FTS5_TERM (pNear valid)
+*/
+struct Fts5ExprNode {
+ int eType; /* Node type */
+ int bEof; /* True at EOF */
+ int bNomatch; /* True if entry is not a match */
+
+ /* Next method for this node. */
+ int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
+
+ i64 iRowid; /* Current rowid */
+ Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */
+
+ /* Child nodes. For a NOT node, this array always contains 2 entries. For
+ ** AND or OR nodes, it contains 2 or more entries. */
+ int nChild; /* Number of child nodes */
+ Fts5ExprNode *apChild[1]; /* Array of child nodes */
+};
+
+#define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING)
+
+/*
+** Invoke the xNext method of an Fts5ExprNode object. This macro should be
+** used as if it has the same signature as the xNext() methods themselves.
+*/
+#define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d))
+
+/*
+** An instance of the following structure represents a single search term
+** or term prefix.
+*/
+struct Fts5ExprTerm {
+ int bPrefix; /* True for a prefix term */
+ char *zTerm; /* nul-terminated term */
+ Fts5IndexIter *pIter; /* Iterator for this term */
+ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
+};
+
+/*
+** A phrase. One or more terms that must appear in a contiguous sequence
+** within a document for it to match.
+*/
+struct Fts5ExprPhrase {
+ Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */
+ Fts5Buffer poslist; /* Current position list */
+ int nTerm; /* Number of entries in aTerm[] */
+ Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */
+};
+
+/*
+** One or more phrases that must appear within a certain token distance of
+** each other within each matching document.
+*/
+struct Fts5ExprNearset {
+ int nNear; /* NEAR parameter */
+ Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */
+ int nPhrase; /* Number of entries in aPhrase[] array */
+ Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */
+};
+
+
+/*
+** Parse context.
+*/
+struct Fts5Parse {
+ Fts5Config *pConfig;
+ char *zErr;
+ int rc;
+ int nPhrase; /* Size of apPhrase array */
+ Fts5ExprPhrase **apPhrase; /* Array of all phrases */
+ Fts5ExprNode *pExpr; /* Result of a successful parse */
+};
+
+static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
+ va_list ap;
+ va_start(ap, zFmt);
+ if( pParse->rc==SQLITE_OK ){
+ pParse->zErr = sqlite3_vmprintf(zFmt, ap);
+ pParse->rc = SQLITE_ERROR;
+ }
+ va_end(ap);
+}
+
+static int fts5ExprIsspace(char t){
+ return t==' ' || t=='\t' || t=='\n' || t=='\r';
+}
+
+/*
+** Read the first token from the nul-terminated string at *pz.
+*/
+static int fts5ExprGetToken(
+ Fts5Parse *pParse,
+ const char **pz, /* IN/OUT: Pointer into buffer */
+ Fts5Token *pToken
+){
+ const char *z = *pz;
+ int tok;
+
+ /* Skip past any whitespace */
+ while( fts5ExprIsspace(*z) ) z++;
+
+ pToken->p = z;
+ pToken->n = 1;
+ switch( *z ){
+ case '(': tok = FTS5_LP; break;
+ case ')': tok = FTS5_RP; break;
+ case '{': tok = FTS5_LCP; break;
+ case '}': tok = FTS5_RCP; break;
+ case ':': tok = FTS5_COLON; break;
+ case ',': tok = FTS5_COMMA; break;
+ case '+': tok = FTS5_PLUS; break;
+ case '*': tok = FTS5_STAR; break;
+ case '\0': tok = FTS5_EOF; break;
+
+ case '"': {
+ const char *z2;
+ tok = FTS5_STRING;
+
+ for(z2=&z[1]; 1; z2++){
+ if( z2[0]=='"' ){
+ z2++;
+ if( z2[0]!='"' ) break;
+ }
+ if( z2[0]=='\0' ){
+ sqlite3Fts5ParseError(pParse, "unterminated string");
+ return FTS5_EOF;
+ }
+ }
+ pToken->n = (z2 - z);
+ break;
+ }
+
+ default: {
+ const char *z2;
+ if( sqlite3Fts5IsBareword(z[0])==0 ){
+ sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
+ return FTS5_EOF;
+ }
+ tok = FTS5_STRING;
+ for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
+ pToken->n = (z2 - z);
+ if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR;
+ if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
+ if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
+ break;
+ }
+ }
+
+ *pz = &pToken->p[pToken->n];
+ return tok;
+}
+
+static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); }
+static void fts5ParseFree(void *p){ sqlite3_free(p); }
+
+static int sqlite3Fts5ExprNew(
+ Fts5Config *pConfig, /* FTS5 Configuration */
+ const char *zExpr, /* Expression text */
+ Fts5Expr **ppNew,
+ char **pzErr
+){
+ Fts5Parse sParse;
+ Fts5Token token;
+ const char *z = zExpr;
+ int t; /* Next token type */
+ void *pEngine;
+ Fts5Expr *pNew;
+
+ *ppNew = 0;
+ *pzErr = 0;
+ memset(&sParse, 0, sizeof(sParse));
+ pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
+ if( pEngine==0 ){ return SQLITE_NOMEM; }
+ sParse.pConfig = pConfig;
+
+ do {
+ t = fts5ExprGetToken(&sParse, &z, &token);
+ sqlite3Fts5Parser(pEngine, t, token, &sParse);
+ }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
+ sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
+
+ assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
+ if( sParse.rc==SQLITE_OK ){
+ *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
+ if( pNew==0 ){
+ sParse.rc = SQLITE_NOMEM;
+ sqlite3Fts5ParseNodeFree(sParse.pExpr);
+ }else{
+ if( !sParse.pExpr ){
+ const int nByte = sizeof(Fts5ExprNode);
+ pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte);
+ if( pNew->pRoot ){
+ pNew->pRoot->bEof = 1;
+ }
+ }else{
+ pNew->pRoot = sParse.pExpr;
+ }
+ pNew->pIndex = 0;
+ pNew->pConfig = pConfig;
+ pNew->apExprPhrase = sParse.apPhrase;
+ pNew->nPhrase = sParse.nPhrase;
+ sParse.apPhrase = 0;
+ }
+ }
+
+ sqlite3_free(sParse.apPhrase);
+ *pzErr = sParse.zErr;
+ return sParse.rc;
+}
+
+/*
+** Free the expression node object passed as the only argument.
+*/
+static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
+ if( p ){
+ int i;
+ for(i=0; i<p->nChild; i++){
+ sqlite3Fts5ParseNodeFree(p->apChild[i]);
+ }
+ sqlite3Fts5ParseNearsetFree(p->pNear);
+ sqlite3_free(p);
+ }
+}
+
+/*
+** Free the expression object passed as the only argument.
+*/
+static void sqlite3Fts5ExprFree(Fts5Expr *p){
+ if( p ){
+ sqlite3Fts5ParseNodeFree(p->pRoot);
+ sqlite3_free(p->apExprPhrase);
+ sqlite3_free(p);
+ }
+}
+
+/*
+** Argument pTerm must be a synonym iterator. Return the current rowid
+** that it points to.
+*/
+static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
+ i64 iRet = 0;
+ int bRetValid = 0;
+ Fts5ExprTerm *p;
+
+ assert( pTerm->pSynonym );
+ assert( bDesc==0 || bDesc==1 );
+ for(p=pTerm; p; p=p->pSynonym){
+ if( 0==sqlite3Fts5IterEof(p->pIter) ){
+ i64 iRowid = p->pIter->iRowid;
+ if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
+ iRet = iRowid;
+ bRetValid = 1;
+ }
+ }
+ }
+
+ if( pbEof && bRetValid==0 ) *pbEof = 1;
+ return iRet;
+}
+
+/*
+** Argument pTerm must be a synonym iterator.
+*/
+static int fts5ExprSynonymList(
+ Fts5ExprTerm *pTerm,
+ i64 iRowid,
+ Fts5Buffer *pBuf, /* Use this buffer for space if required */
+ u8 **pa, int *pn
+){
+ Fts5PoslistReader aStatic[4];
+ Fts5PoslistReader *aIter = aStatic;
+ int nIter = 0;
+ int nAlloc = 4;
+ int rc = SQLITE_OK;
+ Fts5ExprTerm *p;
+
+ assert( pTerm->pSynonym );
+ for(p=pTerm; p; p=p->pSynonym){
+ Fts5IndexIter *pIter = p->pIter;
+ if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){
+ if( pIter->nData==0 ) continue;
+ if( nIter==nAlloc ){
+ int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
+ Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
+ if( aNew==0 ){
+ rc = SQLITE_NOMEM;
+ goto synonym_poslist_out;
+ }
+ memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
+ nAlloc = nAlloc*2;
+ if( aIter!=aStatic ) sqlite3_free(aIter);
+ aIter = aNew;
+ }
+ sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
+ assert( aIter[nIter].bEof==0 );
+ nIter++;
+ }
+ }
+
+ if( nIter==1 ){
+ *pa = (u8*)aIter[0].a;
+ *pn = aIter[0].n;
+ }else{
+ Fts5PoslistWriter writer = {0};
+ i64 iPrev = -1;
+ fts5BufferZero(pBuf);
+ while( 1 ){
+ int i;
+ i64 iMin = FTS5_LARGEST_INT64;
+ for(i=0; i<nIter; i++){
+ if( aIter[i].bEof==0 ){
+ if( aIter[i].iPos==iPrev ){
+ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
+ }
+ if( aIter[i].iPos<iMin ){
+ iMin = aIter[i].iPos;
+ }
+ }
+ }
+ if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
+ rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
+ iPrev = iMin;
+ }
+ if( rc==SQLITE_OK ){
+ *pa = pBuf->p;
+ *pn = pBuf->n;
+ }
+ }
+
+ synonym_poslist_out:
+ if( aIter!=aStatic ) sqlite3_free(aIter);
+ return rc;
+}
+
+
+/*
+** All individual term iterators in pPhrase are guaranteed to be valid and
+** pointing to the same rowid when this function is called. This function
+** checks if the current rowid really is a match, and if so populates
+** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
+** is set to true if this is really a match, or false otherwise.
+**
+** SQLITE_OK is returned if an error occurs, or an SQLite error code
+** otherwise. It is not considered an error code if the current rowid is
+** not a match.
+*/
+static int fts5ExprPhraseIsMatch(
+ Fts5ExprNode *pNode, /* Node pPhrase belongs to */
+ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
+ int *pbMatch /* OUT: Set to true if really a match */
+){
+ Fts5PoslistWriter writer = {0};
+ Fts5PoslistReader aStatic[4];
+ Fts5PoslistReader *aIter = aStatic;
+ int i;
+ int rc = SQLITE_OK;
+
+ fts5BufferZero(&pPhrase->poslist);
+
+ /* If the aStatic[] array is not large enough, allocate a large array
+ ** using sqlite3_malloc(). This approach could be improved upon. */
+ if( pPhrase->nTerm>ArraySize(aStatic) ){
+ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
+ aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
+ if( !aIter ) return SQLITE_NOMEM;
+ }
+ memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
+
+ /* Initialize a term iterator for each term in the phrase */
+ for(i=0; i<pPhrase->nTerm; i++){
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
+ int n = 0;
+ int bFlag = 0;
+ u8 *a = 0;
+ if( pTerm->pSynonym ){
+ Fts5Buffer buf = {0, 0, 0};
+ rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n);
+ if( rc ){
+ sqlite3_free(a);
+ goto ismatch_out;
+ }
+ if( a==buf.p ) bFlag = 1;
+ }else{
+ a = (u8*)pTerm->pIter->pData;
+ n = pTerm->pIter->nData;
+ }
+ sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
+ aIter[i].bFlag = (u8)bFlag;
+ if( aIter[i].bEof ) goto ismatch_out;
+ }
+
+ while( 1 ){
+ int bMatch;
+ i64 iPos = aIter[0].iPos;
+ do {
+ bMatch = 1;
+ for(i=0; i<pPhrase->nTerm; i++){
+ Fts5PoslistReader *pPos = &aIter[i];
+ i64 iAdj = iPos + i;
+ if( pPos->iPos!=iAdj ){
+ bMatch = 0;
+ while( pPos->iPos<iAdj ){
+ if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
+ }
+ if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
+ }
+ }
+ }while( bMatch==0 );
+
+ /* Append position iPos to the output */
+ rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
+ if( rc!=SQLITE_OK ) goto ismatch_out;
+
+ for(i=0; i<pPhrase->nTerm; i++){
+ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
+ }
+ }
+
+ ismatch_out:
+ *pbMatch = (pPhrase->poslist.n>0);
+ for(i=0; i<pPhrase->nTerm; i++){
+ if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
+ }
+ if( aIter!=aStatic ) sqlite3_free(aIter);
+ return rc;
+}
+
+typedef struct Fts5LookaheadReader Fts5LookaheadReader;
+struct Fts5LookaheadReader {
+ const u8 *a; /* Buffer containing position list */
+ int n; /* Size of buffer a[] in bytes */
+ int i; /* Current offset in position list */
+ i64 iPos; /* Current position */
+ i64 iLookahead; /* Next position */
+};
+
+#define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)
+
+static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
+ p->iPos = p->iLookahead;
+ if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
+ p->iLookahead = FTS5_LOOKAHEAD_EOF;
+ }
+ return (p->iPos==FTS5_LOOKAHEAD_EOF);
+}
+
+static int fts5LookaheadReaderInit(
+ const u8 *a, int n, /* Buffer to read position list from */
+ Fts5LookaheadReader *p /* Iterator object to initialize */
+){
+ memset(p, 0, sizeof(Fts5LookaheadReader));
+ p->a = a;
+ p->n = n;
+ fts5LookaheadReaderNext(p);
+ return fts5LookaheadReaderNext(p);
+}
+
+typedef struct Fts5NearTrimmer Fts5NearTrimmer;
+struct Fts5NearTrimmer {
+ Fts5LookaheadReader reader; /* Input iterator */
+ Fts5PoslistWriter writer; /* Writer context */
+ Fts5Buffer *pOut; /* Output poslist */
+};
+
+/*
+** The near-set object passed as the first argument contains more than
+** one phrase. All phrases currently point to the same row. The
+** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
+** tests if the current row contains instances of each phrase sufficiently
+** close together to meet the NEAR constraint. Non-zero is returned if it
+** does, or zero otherwise.
+**
+** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
+** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
+** occurs within this function (*pRc) is set accordingly before returning.
+** The return value is undefined in both these cases.
+**
+** If no error occurs and non-zero (a match) is returned, the position-list
+** of each phrase object is edited to contain only those entries that
+** meet the constraint before returning.
+*/
+static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
+ Fts5NearTrimmer aStatic[4];
+ Fts5NearTrimmer *a = aStatic;
+ Fts5ExprPhrase **apPhrase = pNear->apPhrase;
+
+ int i;
+ int rc = *pRc;
+ int bMatch;
+
+ assert( pNear->nPhrase>1 );
+
+ /* If the aStatic[] array is not large enough, allocate a large array
+ ** using sqlite3_malloc(). This approach could be improved upon. */
+ if( pNear->nPhrase>ArraySize(aStatic) ){
+ int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
+ a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
+ }else{
+ memset(aStatic, 0, sizeof(aStatic));
+ }
+ if( rc!=SQLITE_OK ){
+ *pRc = rc;
+ return 0;
+ }
+
+ /* Initialize a lookahead iterator for each phrase. After passing the
+ ** buffer and buffer size to the lookaside-reader init function, zero
+ ** the phrase poslist buffer. The new poslist for the phrase (containing
+ ** the same entries as the original with some entries removed on account
+ ** of the NEAR constraint) is written over the original even as it is
+ ** being read. This is safe as the entries for the new poslist are a
+ ** subset of the old, so it is not possible for data yet to be read to
+ ** be overwritten. */
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
+ fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
+ pPoslist->n = 0;
+ a[i].pOut = pPoslist;
+ }
+
+ while( 1 ){
+ int iAdv;
+ i64 iMin;
+ i64 iMax;
+
+ /* This block advances the phrase iterators until they point to a set of
+ ** entries that together comprise a match. */
+ iMax = a[0].reader.iPos;
+ do {
+ bMatch = 1;
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5LookaheadReader *pPos = &a[i].reader;
+ iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
+ if( pPos->iPos<iMin || pPos->iPos>iMax ){
+ bMatch = 0;
+ while( pPos->iPos<iMin ){
+ if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
+ }
+ if( pPos->iPos>iMax ) iMax = pPos->iPos;
+ }
+ }
+ }while( bMatch==0 );
+
+ /* Add an entry to each output position list */
+ for(i=0; i<pNear->nPhrase; i++){
+ i64 iPos = a[i].reader.iPos;
+ Fts5PoslistWriter *pWriter = &a[i].writer;
+ if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
+ sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
+ }
+ }
+
+ iAdv = 0;
+ iMin = a[0].reader.iLookahead;
+ for(i=0; i<pNear->nPhrase; i++){
+ if( a[i].reader.iLookahead < iMin ){
+ iMin = a[i].reader.iLookahead;
+ iAdv = i;
+ }
+ }
+ if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
+ }
+
+ ismatch_out: {
+ int bRet = a[0].pOut->n>0;
+ *pRc = rc;
+ if( a!=aStatic ) sqlite3_free(a);
+ return bRet;
+ }
+}
+
+/*
+** Advance iterator pIter until it points to a value equal to or laster
+** than the initial value of *piLast. If this means the iterator points
+** to a value laster than *piLast, update *piLast to the new lastest value.
+**
+** If the iterator reaches EOF, set *pbEof to true before returning. If
+** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
+** are set, return a non-zero value. Otherwise, return zero.
+*/
+static int fts5ExprAdvanceto(
+ Fts5IndexIter *pIter, /* Iterator to advance */
+ int bDesc, /* True if iterator is "rowid DESC" */
+ i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
+ int *pRc, /* OUT: Error code */
+ int *pbEof /* OUT: Set to true if EOF */
+){
+ i64 iLast = *piLast;
+ i64 iRowid;
+
+ iRowid = pIter->iRowid;
+ if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
+ int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
+ if( rc || sqlite3Fts5IterEof(pIter) ){
+ *pRc = rc;
+ *pbEof = 1;
+ return 1;
+ }
+ iRowid = pIter->iRowid;
+ assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
+ }
+ *piLast = iRowid;
+
+ return 0;
+}
+
+static int fts5ExprSynonymAdvanceto(
+ Fts5ExprTerm *pTerm, /* Term iterator to advance */
+ int bDesc, /* True if iterator is "rowid DESC" */
+ i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
+ int *pRc /* OUT: Error code */
+){
+ int rc = SQLITE_OK;
+ i64 iLast = *piLast;
+ Fts5ExprTerm *p;
+ int bEof = 0;
+
+ for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
+ if( sqlite3Fts5IterEof(p->pIter)==0 ){
+ i64 iRowid = p->pIter->iRowid;
+ if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
+ rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
+ }
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ *pRc = rc;
+ bEof = 1;
+ }else{
+ *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
+ }
+ return bEof;
+}
+
+
+static int fts5ExprNearTest(
+ int *pRc,
+ Fts5Expr *pExpr, /* Expression that pNear is a part of */
+ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */
+){
+ Fts5ExprNearset *pNear = pNode->pNear;
+ int rc = *pRc;
+
+ if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
+ Fts5ExprTerm *pTerm;
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
+ pPhrase->poslist.n = 0;
+ for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
+ Fts5IndexIter *pIter = pTerm->pIter;
+ if( sqlite3Fts5IterEof(pIter)==0 ){
+ if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
+ pPhrase->poslist.n = 1;
+ }
+ }
+ }
+ return pPhrase->poslist.n;
+ }else{
+ int i;
+
+ /* Check that each phrase in the nearset matches the current row.
+ ** Populate the pPhrase->poslist buffers at the same time. If any
+ ** phrase is not a match, break out of the loop early. */
+ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
+ int bMatch = 0;
+ rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch);
+ if( bMatch==0 ) break;
+ }else{
+ Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
+ fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
+ }
+ }
+
+ *pRc = rc;
+ if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
+ return 1;
+ }
+ return 0;
+ }
+}
+
+
+/*
+** Initialize all term iterators in the pNear object. If any term is found
+** to match no documents at all, return immediately without initializing any
+** further iterators.
+*/
+static int fts5ExprNearInitAll(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode
+){
+ Fts5ExprNearset *pNear = pNode->pNear;
+ int i, j;
+ int rc = SQLITE_OK;
+
+ assert( pNode->bNomatch==0 );
+ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ for(j=0; j<pPhrase->nTerm; j++){
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+ Fts5ExprTerm *p;
+ int bEof = 1;
+
+ for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){
+ if( p->pIter ){
+ sqlite3Fts5IterClose(p->pIter);
+ p->pIter = 0;
+ }
+ rc = sqlite3Fts5IndexQuery(
+ pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
+ (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
+ (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
+ pNear->pColset,
+ &p->pIter
+ );
+ assert( rc==SQLITE_OK || p->pIter==0 );
+ if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){
+ bEof = 0;
+ }
+ }
+
+ if( bEof ){
+ pNode->bEof = 1;
+ return rc;
+ }
+ }
+ }
+
+ return rc;
+}
+
+/*
+** If pExpr is an ASC iterator, this function returns a value with the
+** same sign as:
+**
+** (iLhs - iRhs)
+**
+** Otherwise, if this is a DESC iterator, the opposite is returned:
+**
+** (iRhs - iLhs)
+*/
+static int fts5RowidCmp(
+ Fts5Expr *pExpr,
+ i64 iLhs,
+ i64 iRhs
+){
+ assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
+ if( pExpr->bDesc==0 ){
+ if( iLhs<iRhs ) return -1;
+ return (iLhs > iRhs);
+ }else{
+ if( iLhs>iRhs ) return -1;
+ return (iLhs < iRhs);
+ }
+}
+
+static void fts5ExprSetEof(Fts5ExprNode *pNode){
+ int i;
+ pNode->bEof = 1;
+ pNode->bNomatch = 0;
+ for(i=0; i<pNode->nChild; i++){
+ fts5ExprSetEof(pNode->apChild[i]);
+ }
+}
+
+static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
+ if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
+ Fts5ExprNearset *pNear = pNode->pNear;
+ int i;
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ pPhrase->poslist.n = 0;
+ }
+ }else{
+ int i;
+ for(i=0; i<pNode->nChild; i++){
+ fts5ExprNodeZeroPoslist(pNode->apChild[i]);
+ }
+ }
+}
+
+
+
+/*
+** Compare the values currently indicated by the two nodes as follows:
+**
+** res = (*p1) - (*p2)
+**
+** Nodes that point to values that come later in the iteration order are
+** considered to be larger. Nodes at EOF are the largest of all.
+**
+** This means that if the iteration order is ASC, then numerically larger
+** rowids are considered larger. Or if it is the default DESC, numerically
+** smaller rowids are larger.
+*/
+static int fts5NodeCompare(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *p1,
+ Fts5ExprNode *p2
+){
+ if( p2->bEof ) return -1;
+ if( p1->bEof ) return +1;
+ return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
+}
+
+/*
+** All individual term iterators in pNear are guaranteed to be valid when
+** this function is called. This function checks if all term iterators
+** point to the same rowid, and if not, advances them until they do.
+** If an EOF is reached before this happens, *pbEof is set to true before
+** returning.
+**
+** SQLITE_OK is returned if an error occurs, or an SQLite error code
+** otherwise. It is not considered an error code if an iterator reaches
+** EOF.
+*/
+static int fts5ExprNodeTest_STRING(
+ Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ Fts5ExprNode *pNode
+){
+ Fts5ExprNearset *pNear = pNode->pNear;
+ Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
+ int rc = SQLITE_OK;
+ i64 iLast; /* Lastest rowid any iterator points to */
+ int i, j; /* Phrase and token index, respectively */
+ int bMatch; /* True if all terms are at the same rowid */
+ const int bDesc = pExpr->bDesc;
+
+ /* Check that this node should not be FTS5_TERM */
+ assert( pNear->nPhrase>1
+ || pNear->apPhrase[0]->nTerm>1
+ || pNear->apPhrase[0]->aTerm[0].pSynonym
+ );
+
+ /* Initialize iLast, the "lastest" rowid any iterator points to. If the
+ ** iterator skips through rowids in the default ascending order, this means
+ ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
+ ** means the minimum rowid. */
+ if( pLeft->aTerm[0].pSynonym ){
+ iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
+ }else{
+ iLast = pLeft->aTerm[0].pIter->iRowid;
+ }
+
+ do {
+ bMatch = 1;
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ for(j=0; j<pPhrase->nTerm; j++){
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
+ if( pTerm->pSynonym ){
+ i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
+ if( iRowid==iLast ) continue;
+ bMatch = 0;
+ if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
+ pNode->bNomatch = 0;
+ pNode->bEof = 1;
+ return rc;
+ }
+ }else{
+ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
+ if( pIter->iRowid==iLast ) continue;
+ bMatch = 0;
+ if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
+ return rc;
+ }
+ }
+ }
+ }
+ }while( bMatch==0 );
+
+ pNode->iRowid = iLast;
+ pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK);
+ assert( pNode->bEof==0 || pNode->bNomatch==0 );
+
+ return rc;
+}
+
+/*
+** Advance the first term iterator in the first phrase of pNear. Set output
+** variable *pbEof to true if it reaches EOF or if an error occurs.
+**
+** Return SQLITE_OK if successful, or an SQLite error code if an error
+** occurs.
+*/
+static int fts5ExprNodeNext_STRING(
+ Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */
+ int bFromValid,
+ i64 iFrom
+){
+ Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
+ int rc = SQLITE_OK;
+
+ pNode->bNomatch = 0;
+ if( pTerm->pSynonym ){
+ int bEof = 1;
+ Fts5ExprTerm *p;
+
+ /* Find the firstest rowid any synonym points to. */
+ i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
+
+ /* Advance each iterator that currently points to iRowid. Or, if iFrom
+ ** is valid - each iterator that points to a rowid before iFrom. */
+ for(p=pTerm; p; p=p->pSynonym){
+ if( sqlite3Fts5IterEof(p->pIter)==0 ){
+ i64 ii = p->pIter->iRowid;
+ if( ii==iRowid
+ || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
+ ){
+ if( bFromValid ){
+ rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
+ }else{
+ rc = sqlite3Fts5IterNext(p->pIter);
+ }
+ if( rc!=SQLITE_OK ) break;
+ if( sqlite3Fts5IterEof(p->pIter)==0 ){
+ bEof = 0;
+ }
+ }else{
+ bEof = 0;
+ }
+ }
+ }
+
+ /* Set the EOF flag if either all synonym iterators are at EOF or an
+ ** error has occurred. */
+ pNode->bEof = (rc || bEof);
+ }else{
+ Fts5IndexIter *pIter = pTerm->pIter;
+
+ assert( Fts5NodeIsString(pNode) );
+ if( bFromValid ){
+ rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+ }else{
+ rc = sqlite3Fts5IterNext(pIter);
+ }
+
+ pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
+ }
+
+ if( pNode->bEof==0 ){
+ assert( rc==SQLITE_OK );
+ rc = fts5ExprNodeTest_STRING(pExpr, pNode);
+ }
+
+ return rc;
+}
+
+
+static int fts5ExprNodeTest_TERM(
+ Fts5Expr *pExpr, /* Expression that pNear is a part of */
+ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */
+){
+ /* As this "NEAR" object is actually a single phrase that consists
+ ** of a single term only, grab pointers into the poslist managed by the
+ ** fts5_index.c iterator object. This is much faster than synthesizing
+ ** a new poslist the way we have to for more complicated phrase or NEAR
+ ** expressions. */
+ Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
+ Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
+
+ assert( pNode->eType==FTS5_TERM );
+ assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 );
+ assert( pPhrase->aTerm[0].pSynonym==0 );
+
+ pPhrase->poslist.n = pIter->nData;
+ if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){
+ pPhrase->poslist.p = (u8*)pIter->pData;
+ }
+ pNode->iRowid = pIter->iRowid;
+ pNode->bNomatch = (pPhrase->poslist.n==0);
+ return SQLITE_OK;
+}
+
+/*
+** xNext() method for a node of type FTS5_TERM.
+*/
+static int fts5ExprNodeNext_TERM(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode,
+ int bFromValid,
+ i64 iFrom
+){
+ int rc;
+ Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
+
+ assert( pNode->bEof==0 );
+ if( bFromValid ){
+ rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
+ }else{
+ rc = sqlite3Fts5IterNext(pIter);
+ }
+ if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
+ rc = fts5ExprNodeTest_TERM(pExpr, pNode);
+ }else{
+ pNode->bEof = 1;
+ pNode->bNomatch = 0;
+ }
+ return rc;
+}
+
+static void fts5ExprNodeTest_OR(
+ Fts5Expr *pExpr, /* Expression of which pNode is a part */
+ Fts5ExprNode *pNode /* Expression node to test */
+){
+ Fts5ExprNode *pNext = pNode->apChild[0];
+ int i;
+
+ for(i=1; i<pNode->nChild; i++){
+ Fts5ExprNode *pChild = pNode->apChild[i];
+ int cmp = fts5NodeCompare(pExpr, pNext, pChild);
+ if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
+ pNext = pChild;
+ }
+ }
+ pNode->iRowid = pNext->iRowid;
+ pNode->bEof = pNext->bEof;
+ pNode->bNomatch = pNext->bNomatch;
+}
+
+static int fts5ExprNodeNext_OR(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode,
+ int bFromValid,
+ i64 iFrom
+){
+ int i;
+ i64 iLast = pNode->iRowid;
+
+ for(i=0; i<pNode->nChild; i++){
+ Fts5ExprNode *p1 = pNode->apChild[i];
+ assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 );
+ if( p1->bEof==0 ){
+ if( (p1->iRowid==iLast)
+ || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
+ ){
+ int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
+ if( rc!=SQLITE_OK ) return rc;
+ }
+ }
+ }
+
+ fts5ExprNodeTest_OR(pExpr, pNode);
+ return SQLITE_OK;
+}
+
+/*
+** Argument pNode is an FTS5_AND node.
+*/
+static int fts5ExprNodeTest_AND(
+ Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ Fts5ExprNode *pAnd /* FTS5_AND node to advance */
+){
+ int iChild;
+ i64 iLast = pAnd->iRowid;
+ int rc = SQLITE_OK;
+ int bMatch;
+
+ assert( pAnd->bEof==0 );
+ do {
+ pAnd->bNomatch = 0;
+ bMatch = 1;
+ for(iChild=0; iChild<pAnd->nChild; iChild++){
+ Fts5ExprNode *pChild = pAnd->apChild[iChild];
+ int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
+ if( cmp>0 ){
+ /* Advance pChild until it points to iLast or laster */
+ rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast);
+ if( rc!=SQLITE_OK ) return rc;
+ }
+
+ /* If the child node is now at EOF, so is the parent AND node. Otherwise,
+ ** the child node is guaranteed to have advanced at least as far as
+ ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
+ ** new lastest rowid seen so far. */
+ assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 );
+ if( pChild->bEof ){
+ fts5ExprSetEof(pAnd);
+ bMatch = 1;
+ break;
+ }else if( iLast!=pChild->iRowid ){
+ bMatch = 0;
+ iLast = pChild->iRowid;
+ }
+
+ if( pChild->bNomatch ){
+ pAnd->bNomatch = 1;
+ }
+ }
+ }while( bMatch==0 );
+
+ if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
+ fts5ExprNodeZeroPoslist(pAnd);
+ }
+ pAnd->iRowid = iLast;
+ return SQLITE_OK;
+}
+
+static int fts5ExprNodeNext_AND(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode,
+ int bFromValid,
+ i64 iFrom
+){
+ int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNodeTest_AND(pExpr, pNode);
+ }
+ return rc;
+}
+
+static int fts5ExprNodeTest_NOT(
+ Fts5Expr *pExpr, /* Expression pPhrase belongs to */
+ Fts5ExprNode *pNode /* FTS5_NOT node to advance */
+){
+ int rc = SQLITE_OK;
+ Fts5ExprNode *p1 = pNode->apChild[0];
+ Fts5ExprNode *p2 = pNode->apChild[1];
+ assert( pNode->nChild==2 );
+
+ while( rc==SQLITE_OK && p1->bEof==0 ){
+ int cmp = fts5NodeCompare(pExpr, p1, p2);
+ if( cmp>0 ){
+ rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid);
+ cmp = fts5NodeCompare(pExpr, p1, p2);
+ }
+ assert( rc!=SQLITE_OK || cmp<=0 );
+ if( cmp || p2->bNomatch ) break;
+ rc = fts5ExprNodeNext(pExpr, p1, 0, 0);
+ }
+ pNode->bEof = p1->bEof;
+ pNode->bNomatch = p1->bNomatch;
+ pNode->iRowid = p1->iRowid;
+ if( p1->bEof ){
+ fts5ExprNodeZeroPoslist(p2);
+ }
+ return rc;
+}
+
+static int fts5ExprNodeNext_NOT(
+ Fts5Expr *pExpr,
+ Fts5ExprNode *pNode,
+ int bFromValid,
+ i64 iFrom
+){
+ int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNodeTest_NOT(pExpr, pNode);
+ }
+ return rc;
+}
+
+/*
+** If pNode currently points to a match, this function returns SQLITE_OK
+** without modifying it. Otherwise, pNode is advanced until it does point
+** to a match or EOF is reached.
+*/
+static int fts5ExprNodeTest(
+ Fts5Expr *pExpr, /* Expression of which pNode is a part */
+ Fts5ExprNode *pNode /* Expression node to test */
+){
+ int rc = SQLITE_OK;
+ if( pNode->bEof==0 ){
+ switch( pNode->eType ){
+
+ case FTS5_STRING: {
+ rc = fts5ExprNodeTest_STRING(pExpr, pNode);
+ break;
+ }
+
+ case FTS5_TERM: {
+ rc = fts5ExprNodeTest_TERM(pExpr, pNode);
+ break;
+ }
+
+ case FTS5_AND: {
+ rc = fts5ExprNodeTest_AND(pExpr, pNode);
+ break;
+ }
+
+ case FTS5_OR: {
+ fts5ExprNodeTest_OR(pExpr, pNode);
+ break;
+ }
+
+ default: assert( pNode->eType==FTS5_NOT ); {
+ rc = fts5ExprNodeTest_NOT(pExpr, pNode);
+ break;
+ }
+ }
+ }
+ return rc;
+}
+
+
+/*
+** Set node pNode, which is part of expression pExpr, to point to the first
+** match. If there are no matches, set the Node.bEof flag to indicate EOF.
+**
+** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
+** It is not an error if there are no matches.
+*/
+static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
+ int rc = SQLITE_OK;
+ pNode->bEof = 0;
+ pNode->bNomatch = 0;
+
+ if( Fts5NodeIsString(pNode) ){
+ /* Initialize all term iterators in the NEAR object. */
+ rc = fts5ExprNearInitAll(pExpr, pNode);
+ }else{
+ int i;
+ int nEof = 0;
+ for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
+ Fts5ExprNode *pChild = pNode->apChild[i];
+ rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
+ assert( pChild->bEof==0 || pChild->bEof==1 );
+ nEof += pChild->bEof;
+ }
+ pNode->iRowid = pNode->apChild[0]->iRowid;
+
+ switch( pNode->eType ){
+ case FTS5_AND:
+ if( nEof>0 ) fts5ExprSetEof(pNode);
+ break;
+
+ case FTS5_OR:
+ if( pNode->nChild==nEof ) fts5ExprSetEof(pNode);
+ break;
+
+ default:
+ assert( pNode->eType==FTS5_NOT );
+ pNode->bEof = pNode->apChild[0]->bEof;
+ break;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = fts5ExprNodeTest(pExpr, pNode);
+ }
+ return rc;
+}
+
+
+/*
+** Begin iterating through the set of documents in index pIdx matched by
+** the MATCH expression passed as the first argument. If the "bDesc"
+** parameter is passed a non-zero value, iteration is in descending rowid
+** order. Or, if it is zero, in ascending order.
+**
+** If iterating in ascending rowid order (bDesc==0), the first document
+** visited is that with the smallest rowid that is larger than or equal
+** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
+** then the first document visited must have a rowid smaller than or
+** equal to iFirst.
+**
+** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
+** is not considered an error if the query does not match any documents.
+*/
+static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
+ Fts5ExprNode *pRoot = p->pRoot;
+ int rc = SQLITE_OK;
+ if( pRoot->xNext ){
+ p->pIndex = pIdx;
+ p->bDesc = bDesc;
+ rc = fts5ExprNodeFirst(p, pRoot);
+
+ /* If not at EOF but the current rowid occurs earlier than iFirst in
+ ** the iteration order, move to document iFirst or later. */
+ if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){
+ rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
+ }
+
+ /* If the iterator is not at a real match, skip forward until it is. */
+ while( pRoot->bNomatch ){
+ assert( pRoot->bEof==0 && rc==SQLITE_OK );
+ rc = fts5ExprNodeNext(p, pRoot, 0, 0);
+ }
+ }
+ return rc;
+}
+
+/*
+** Move to the next document
+**
+** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
+** is not considered an error if the query does not match any documents.
+*/
+static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
+ int rc;
+ Fts5ExprNode *pRoot = p->pRoot;
+ assert( pRoot->bEof==0 && pRoot->bNomatch==0 );
+ do {
+ rc = fts5ExprNodeNext(p, pRoot, 0, 0);
+ assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) );
+ }while( pRoot->bNomatch );
+ if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
+ pRoot->bEof = 1;
+ }
+ return rc;
+}
+
+static int sqlite3Fts5ExprEof(Fts5Expr *p){
+ return p->pRoot->bEof;
+}
+
+static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
+ return p->pRoot->iRowid;
+}
+
+static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
+ int rc = SQLITE_OK;
+ *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
+ return rc;
+}
+
+/*
+** Free the phrase object passed as the only argument.
+*/
+static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
+ if( pPhrase ){
+ int i;
+ for(i=0; i<pPhrase->nTerm; i++){
+ Fts5ExprTerm *pSyn;
+ Fts5ExprTerm *pNext;
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
+ sqlite3_free(pTerm->zTerm);
+ sqlite3Fts5IterClose(pTerm->pIter);
+ for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
+ pNext = pSyn->pSynonym;
+ sqlite3Fts5IterClose(pSyn->pIter);
+ fts5BufferFree((Fts5Buffer*)&pSyn[1]);
+ sqlite3_free(pSyn);
+ }
+ }
+ if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
+ sqlite3_free(pPhrase);
+ }
+}
+
+/*
+** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
+** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
+** appended to it and the results returned.
+**
+** If an OOM error occurs, both the pNear and pPhrase objects are freed and
+** NULL returned.
+*/
+static Fts5ExprNearset *sqlite3Fts5ParseNearset(
+ Fts5Parse *pParse, /* Parse context */
+ Fts5ExprNearset *pNear, /* Existing nearset, or NULL */
+ Fts5ExprPhrase *pPhrase /* Recently parsed phrase */
+){
+ const int SZALLOC = 8;
+ Fts5ExprNearset *pRet = 0;
+
+ if( pParse->rc==SQLITE_OK ){
+ if( pPhrase==0 ){
+ return pNear;
+ }
+ if( pNear==0 ){
+ int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
+ pRet = sqlite3_malloc(nByte);
+ if( pRet==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ }else{
+ memset(pRet, 0, nByte);
+ }
+ }else if( (pNear->nPhrase % SZALLOC)==0 ){
+ int nNew = pNear->nPhrase + SZALLOC;
+ int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);
+
+ pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte);
+ if( pRet==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ }
+ }else{
+ pRet = pNear;
+ }
+ }
+
+ if( pRet==0 ){
+ assert( pParse->rc!=SQLITE_OK );
+ sqlite3Fts5ParseNearsetFree(pNear);
+ sqlite3Fts5ParsePhraseFree(pPhrase);
+ }else{
+ pRet->apPhrase[pRet->nPhrase++] = pPhrase;
+ }
+ return pRet;
+}
+
+typedef struct TokenCtx TokenCtx;
+struct TokenCtx {
+ Fts5ExprPhrase *pPhrase;
+ int rc;
+};
+
+/*
+** Callback for tokenizing terms used by ParseTerm().
+*/
+static int fts5ParseTokenize(
+ void *pContext, /* Pointer to Fts5InsertCtx object */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iUnused1, /* Start offset of token */
+ int iUnused2 /* End offset of token */
+){
+ int rc = SQLITE_OK;
+ const int SZALLOC = 8;
+ TokenCtx *pCtx = (TokenCtx*)pContext;
+ Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
+
+ UNUSED_PARAM2(iUnused1, iUnused2);
+
+ /* If an error has already occurred, this is a no-op */
+ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
+
+ assert( pPhrase==0 || pPhrase->nTerm>0 );
+ if( pPhrase && (tflags & FTS5_TOKEN_COLOCATED) ){
+ Fts5ExprTerm *pSyn;
+ int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
+ pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte);
+ if( pSyn==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pSyn, 0, nByte);
+ pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
+ memcpy(pSyn->zTerm, pToken, nToken);
+ pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
+ pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
+ }
+ }else{
+ Fts5ExprTerm *pTerm;
+ if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
+ Fts5ExprPhrase *pNew;
+ int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
+
+ pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase,
+ sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
+ );
+ if( pNew==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
+ pCtx->pPhrase = pPhrase = pNew;
+ pNew->nTerm = nNew - SZALLOC;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
+ memset(pTerm, 0, sizeof(Fts5ExprTerm));
+ pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
+ }
+ }
+
+ pCtx->rc = rc;
+ return rc;
+}
+
+
+/*
+** Free the phrase object passed as the only argument.
+*/
+static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
+ fts5ExprPhraseFree(pPhrase);
+}
+
+/*
+** Free the phrase object passed as the second argument.
+*/
+static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
+ if( pNear ){
+ int i;
+ for(i=0; i<pNear->nPhrase; i++){
+ fts5ExprPhraseFree(pNear->apPhrase[i]);
+ }
+ sqlite3_free(pNear->pColset);
+ sqlite3_free(pNear);
+ }
+}
+
+static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
+ assert( pParse->pExpr==0 );
+ pParse->pExpr = p;
+}
+
+/*
+** This function is called by the parser to process a string token. The
+** string may or may not be quoted. In any case it is tokenized and a
+** phrase object consisting of all tokens returned.
+*/
+static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
+ Fts5Parse *pParse, /* Parse context */
+ Fts5ExprPhrase *pAppend, /* Phrase to append to */
+ Fts5Token *pToken, /* String to tokenize */
+ int bPrefix /* True if there is a trailing "*" */
+){
+ Fts5Config *pConfig = pParse->pConfig;
+ TokenCtx sCtx; /* Context object passed to callback */
+ int rc; /* Tokenize return code */
+ char *z = 0;
+
+ memset(&sCtx, 0, sizeof(TokenCtx));
+ sCtx.pPhrase = pAppend;
+
+ rc = fts5ParseStringFromToken(pToken, &z);
+ if( rc==SQLITE_OK ){
+ int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
+ int n;
+ sqlite3Fts5Dequote(z);
+ n = (int)strlen(z);
+ rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
+ }
+ sqlite3_free(z);
+ if( rc || (rc = sCtx.rc) ){
+ pParse->rc = rc;
+ fts5ExprPhraseFree(sCtx.pPhrase);
+ sCtx.pPhrase = 0;
+ }else if( sCtx.pPhrase ){
+
+ if( pAppend==0 ){
+ if( (pParse->nPhrase % 8)==0 ){
+ int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
+ Fts5ExprPhrase **apNew;
+ apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte);
+ if( apNew==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ fts5ExprPhraseFree(sCtx.pPhrase);
+ return 0;
+ }
+ pParse->apPhrase = apNew;
+ }
+ pParse->nPhrase++;
+ }
+
+ pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
+ assert( sCtx.pPhrase->nTerm>0 );
+ sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
+ }
+
+ return sCtx.pPhrase;
+}
+
+/*
+** Create a new FTS5 expression by cloning phrase iPhrase of the
+** expression passed as the second argument.
+*/
+static int sqlite3Fts5ExprClonePhrase(
+ Fts5Expr *pExpr,
+ int iPhrase,
+ Fts5Expr **ppNew
+){
+ int rc = SQLITE_OK; /* Return code */
+ Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
+ int i; /* Used to iterate through phrase terms */
+ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
+ TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
+
+ pOrig = pExpr->apExprPhrase[iPhrase];
+ pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
+ if( rc==SQLITE_OK ){
+ pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
+ sizeof(Fts5ExprPhrase*));
+ }
+ if( rc==SQLITE_OK ){
+ pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
+ sizeof(Fts5ExprNode));
+ }
+ if( rc==SQLITE_OK ){
+ pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
+ sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
+ }
+
+ for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
+ int tflags = 0;
+ Fts5ExprTerm *p;
+ for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
+ const char *zTerm = p->zTerm;
+ rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
+ 0, 0);
+ tflags = FTS5_TOKEN_COLOCATED;
+ }
+ if( rc==SQLITE_OK ){
+ sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ /* All the allocations succeeded. Put the expression object together. */
+ pNew->pIndex = pExpr->pIndex;
+ pNew->pConfig = pExpr->pConfig;
+ pNew->nPhrase = 1;
+ pNew->apExprPhrase[0] = sCtx.pPhrase;
+ pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
+ pNew->pRoot->pNear->nPhrase = 1;
+ sCtx.pPhrase->pNode = pNew->pRoot;
+
+ if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
+ pNew->pRoot->eType = FTS5_TERM;
+ pNew->pRoot->xNext = fts5ExprNodeNext_TERM;
+ }else{
+ pNew->pRoot->eType = FTS5_STRING;
+ pNew->pRoot->xNext = fts5ExprNodeNext_STRING;
+ }
+ }else{
+ sqlite3Fts5ExprFree(pNew);
+ fts5ExprPhraseFree(sCtx.pPhrase);
+ pNew = 0;
+ }
+
+ *ppNew = pNew;
+ return rc;
+}
+
+
+/*
+** Token pTok has appeared in a MATCH expression where the NEAR operator
+** is expected. If token pTok does not contain "NEAR", store an error
+** in the pParse object.
+*/
+static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
+ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
+ sqlite3Fts5ParseError(
+ pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
+ );
+ }
+}
+
+static void sqlite3Fts5ParseSetDistance(
+ Fts5Parse *pParse,
+ Fts5ExprNearset *pNear,
+ Fts5Token *p
+){
+ int nNear = 0;
+ int i;
+ if( p->n ){
+ for(i=0; i<p->n; i++){
+ char c = (char)p->p[i];
+ if( c<'0' || c>'9' ){
+ sqlite3Fts5ParseError(
+ pParse, "expected integer, got \"%.*s\"", p->n, p->p
+ );
+ return;
+ }
+ nNear = nNear * 10 + (p->p[i] - '0');
+ }
+ }else{
+ nNear = FTS5_DEFAULT_NEARDIST;
+ }
+ pNear->nNear = nNear;
+}
+
+/*
+** The second argument passed to this function may be NULL, or it may be
+** an existing Fts5Colset object. This function returns a pointer to
+** a new colset object containing the contents of (p) with new value column
+** number iCol appended.
+**
+** If an OOM error occurs, store an error code in pParse and return NULL.
+** The old colset object (if any) is not freed in this case.
+*/
+static Fts5Colset *fts5ParseColset(
+ Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
+ Fts5Colset *p, /* Existing colset object */
+ int iCol /* New column to add to colset object */
+){
+ int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */
+ Fts5Colset *pNew; /* New colset object to return */
+
+ assert( pParse->rc==SQLITE_OK );
+ assert( iCol>=0 && iCol<pParse->pConfig->nCol );
+
+ pNew = sqlite3_realloc(p, sizeof(Fts5Colset) + sizeof(int)*nCol);
+ if( pNew==0 ){
+ pParse->rc = SQLITE_NOMEM;
+ }else{
+ int *aiCol = pNew->aiCol;
+ int i, j;
+ for(i=0; i<nCol; i++){
+ if( aiCol[i]==iCol ) return pNew;
+ if( aiCol[i]>iCol ) break;
+ }
+ for(j=nCol; j>i; j--){
+ aiCol[j] = aiCol[j-1];
+ }
+ aiCol[i] = iCol;
+ pNew->nCol = nCol+1;
+
+#ifndef NDEBUG
+ /* Check that the array is in order and contains no duplicate entries. */
+ for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] );
+#endif
+ }
+
+ return pNew;
+}
+
+static Fts5Colset *sqlite3Fts5ParseColset(
+ Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
+ Fts5Colset *pColset, /* Existing colset object */
+ Fts5Token *p
+){
+ Fts5Colset *pRet = 0;
+ int iCol;
+ char *z; /* Dequoted copy of token p */
+
+ z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
+ if( pParse->rc==SQLITE_OK ){
+ Fts5Config *pConfig = pParse->pConfig;
+ sqlite3Fts5Dequote(z);
+ for(iCol=0; iCol<pConfig->nCol; iCol++){
+ if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
+ }
+ if( iCol==pConfig->nCol ){
+ sqlite3Fts5ParseError(pParse, "no such column: %s", z);
+ }else{
+ pRet = fts5ParseColset(pParse, pColset, iCol);
+ }
+ sqlite3_free(z);
+ }
+
+ if( pRet==0 ){
+ assert( pParse->rc!=SQLITE_OK );
+ sqlite3_free(pColset);
+ }
+
+ return pRet;
+}
+
+static void sqlite3Fts5ParseSetColset(
+ Fts5Parse *pParse,
+ Fts5ExprNearset *pNear,
+ Fts5Colset *pColset
+){
+ if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){
+ pParse->rc = SQLITE_ERROR;
+ pParse->zErr = sqlite3_mprintf(
+ "fts5: column queries are not supported (detail=none)"
+ );
+ sqlite3_free(pColset);
+ return;
+ }
+
+ if( pNear ){
+ pNear->pColset = pColset;
+ }else{
+ sqlite3_free(pColset);
+ }
+}
+
+static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
+ switch( pNode->eType ){
+ case FTS5_STRING: {
+ Fts5ExprNearset *pNear = pNode->pNear;
+ if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1
+ && pNear->apPhrase[0]->aTerm[0].pSynonym==0
+ ){
+ pNode->eType = FTS5_TERM;
+ pNode->xNext = fts5ExprNodeNext_TERM;
+ }else{
+ pNode->xNext = fts5ExprNodeNext_STRING;
+ }
+ break;
+ };
+
+ case FTS5_OR: {
+ pNode->xNext = fts5ExprNodeNext_OR;
+ break;
+ };
+
+ case FTS5_AND: {
+ pNode->xNext = fts5ExprNodeNext_AND;
+ break;
+ };
+
+ default: assert( pNode->eType==FTS5_NOT ); {
+ pNode->xNext = fts5ExprNodeNext_NOT;
+ break;
+ };
+ }
+}
+
+static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
+ if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
+ int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
+ memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
+ p->nChild += pSub->nChild;
+ sqlite3_free(pSub);
+ }else{
+ p->apChild[p->nChild++] = pSub;
+ }
+}
+
+/*
+** Allocate and return a new expression object. If anything goes wrong (i.e.
+** OOM error), leave an error code in pParse and return NULL.
+*/
+static Fts5ExprNode *sqlite3Fts5ParseNode(
+ Fts5Parse *pParse, /* Parse context */
+ int eType, /* FTS5_STRING, AND, OR or NOT */
+ Fts5ExprNode *pLeft, /* Left hand child expression */
+ Fts5ExprNode *pRight, /* Right hand child expression */
+ Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */
+){
+ Fts5ExprNode *pRet = 0;
+
+ if( pParse->rc==SQLITE_OK ){
+ int nChild = 0; /* Number of children of returned node */
+ int nByte; /* Bytes of space to allocate for this node */
+
+ assert( (eType!=FTS5_STRING && !pNear)
+ || (eType==FTS5_STRING && !pLeft && !pRight)
+ );
+ if( eType==FTS5_STRING && pNear==0 ) return 0;
+ if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
+ if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
+
+ if( eType==FTS5_NOT ){
+ nChild = 2;
+ }else if( eType==FTS5_AND || eType==FTS5_OR ){
+ nChild = 2;
+ if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
+ if( pRight->eType==eType ) nChild += pRight->nChild-1;
+ }
+
+ nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1);
+ pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
+
+ if( pRet ){
+ pRet->eType = eType;
+ pRet->pNear = pNear;
+ fts5ExprAssignXNext(pRet);
+ if( eType==FTS5_STRING ){
+ int iPhrase;
+ for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
+ pNear->apPhrase[iPhrase]->pNode = pRet;
+ }
+
+ if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL
+ && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm!=1)
+ ){
+ assert( pParse->rc==SQLITE_OK );
+ pParse->rc = SQLITE_ERROR;
+ assert( pParse->zErr==0 );
+ pParse->zErr = sqlite3_mprintf(
+ "fts5: %s queries are not supported (detail!=full)",
+ pNear->nPhrase==1 ? "phrase": "NEAR"
+ );
+ sqlite3_free(pRet);
+ pRet = 0;
+ }
+
+ }else{
+ fts5ExprAddChildren(pRet, pLeft);
+ fts5ExprAddChildren(pRet, pRight);
+ }
+ }
+ }
+
+ if( pRet==0 ){
+ assert( pParse->rc!=SQLITE_OK );
+ sqlite3Fts5ParseNodeFree(pLeft);
+ sqlite3Fts5ParseNodeFree(pRight);
+ sqlite3Fts5ParseNearsetFree(pNear);
+ }
+ return pRet;
+}
+
+static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
+ int nByte = 0;
+ Fts5ExprTerm *p;
+ char *zQuoted;
+
+ /* Determine the maximum amount of space required. */
+ for(p=pTerm; p; p=p->pSynonym){
+ nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
+ }
+ zQuoted = sqlite3_malloc(nByte);
+
+ if( zQuoted ){
+ int i = 0;
+ for(p=pTerm; p; p=p->pSynonym){
+ char *zIn = p->zTerm;
+ zQuoted[i++] = '"';
+ while( *zIn ){
+ if( *zIn=='"' ) zQuoted[i++] = '"';
+ zQuoted[i++] = *zIn++;
+ }
+ zQuoted[i++] = '"';
+ if( p->pSynonym ) zQuoted[i++] = '|';
+ }
+ if( pTerm->bPrefix ){
+ zQuoted[i++] = ' ';
+ zQuoted[i++] = '*';
+ }
+ zQuoted[i++] = '\0';
+ }
+ return zQuoted;
+}
+
+static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
+ char *zNew;
+ va_list ap;
+ va_start(ap, zFmt);
+ zNew = sqlite3_vmprintf(zFmt, ap);
+ va_end(ap);
+ if( zApp && zNew ){
+ char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
+ sqlite3_free(zNew);
+ zNew = zNew2;
+ }
+ sqlite3_free(zApp);
+ return zNew;
+}
+
+/*
+** Compose a tcl-readable representation of expression pExpr. Return a
+** pointer to a buffer containing that representation. It is the
+** responsibility of the caller to at some point free the buffer using
+** sqlite3_free().
+*/
+static char *fts5ExprPrintTcl(
+ Fts5Config *pConfig,
+ const char *zNearsetCmd,
+ Fts5ExprNode *pExpr
+){
+ char *zRet = 0;
+ if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
+ Fts5ExprNearset *pNear = pExpr->pNear;
+ int i;
+ int iTerm;
+
+ zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
+ if( zRet==0 ) return 0;
+ if( pNear->pColset ){
+ int *aiCol = pNear->pColset->aiCol;
+ int nCol = pNear->pColset->nCol;
+ if( nCol==1 ){
+ zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
+ }else{
+ zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
+ for(i=1; i<pNear->pColset->nCol; i++){
+ zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
+ }
+ zRet = fts5PrintfAppend(zRet, "} ");
+ }
+ if( zRet==0 ) return 0;
+ }
+
+ if( pNear->nPhrase>1 ){
+ zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
+ if( zRet==0 ) return 0;
+ }
+
+ zRet = fts5PrintfAppend(zRet, "--");
+ if( zRet==0 ) return 0;
+
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+
+ zRet = fts5PrintfAppend(zRet, " {");
+ for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
+ char *zTerm = pPhrase->aTerm[iTerm].zTerm;
+ zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
+ if( pPhrase->aTerm[iTerm].bPrefix ){
+ zRet = fts5PrintfAppend(zRet, "*");
+ }
+ }
+
+ if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
+ if( zRet==0 ) return 0;
+ }
+
+ }else{
+ char const *zOp = 0;
+ int i;
+ switch( pExpr->eType ){
+ case FTS5_AND: zOp = "AND"; break;
+ case FTS5_NOT: zOp = "NOT"; break;
+ default:
+ assert( pExpr->eType==FTS5_OR );
+ zOp = "OR";
+ break;
+ }
+
+ zRet = sqlite3_mprintf("%s", zOp);
+ for(i=0; zRet && i<pExpr->nChild; i++){
+ char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
+ if( !z ){
+ sqlite3_free(zRet);
+ zRet = 0;
+ }else{
+ zRet = fts5PrintfAppend(zRet, " [%z]", z);
+ }
+ }
+ }
+
+ return zRet;
+}
+
+static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
+ char *zRet = 0;
+ if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
+ Fts5ExprNearset *pNear = pExpr->pNear;
+ int i;
+ int iTerm;
+
+ if( pNear->pColset ){
+ int iCol = pNear->pColset->aiCol[0];
+ zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]);
+ if( zRet==0 ) return 0;
+ }
+
+ if( pNear->nPhrase>1 ){
+ zRet = fts5PrintfAppend(zRet, "NEAR(");
+ if( zRet==0 ) return 0;
+ }
+
+ for(i=0; i<pNear->nPhrase; i++){
+ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
+ if( i!=0 ){
+ zRet = fts5PrintfAppend(zRet, " ");
+ if( zRet==0 ) return 0;
+ }
+ for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
+ char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
+ if( zTerm ){
+ zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
+ sqlite3_free(zTerm);
+ }
+ if( zTerm==0 || zRet==0 ){
+ sqlite3_free(zRet);
+ return 0;
+ }
+ }
+ }
+
+ if( pNear->nPhrase>1 ){
+ zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
+ if( zRet==0 ) return 0;
+ }
+
+ }else{
+ char const *zOp = 0;
+ int i;
+
+ switch( pExpr->eType ){
+ case FTS5_AND: zOp = " AND "; break;
+ case FTS5_NOT: zOp = " NOT "; break;
+ default:
+ assert( pExpr->eType==FTS5_OR );
+ zOp = " OR ";
+ break;
+ }
+
+ for(i=0; i<pExpr->nChild; i++){
+ char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
+ if( z==0 ){
+ sqlite3_free(zRet);
+ zRet = 0;
+ }else{
+ int e = pExpr->apChild[i]->eType;
+ int b = (e!=FTS5_STRING && e!=FTS5_TERM);
+ zRet = fts5PrintfAppend(zRet, "%s%s%z%s",
+ (i==0 ? "" : zOp),
+ (b?"(":""), z, (b?")":"")
+ );
+ }
+ if( zRet==0 ) break;
+ }
+ }
+
+ return zRet;
+}
+
+/*
+** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
+** and fts5_expr_tcl() (bTcl!=0).
+*/
+static void fts5ExprFunction(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal, /* Function arguments */
+ int bTcl
+){
+ Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
+ sqlite3 *db = sqlite3_context_db_handle(pCtx);
+ const char *zExpr = 0;
+ char *zErr = 0;
+ Fts5Expr *pExpr = 0;
+ int rc;
+ int i;
+
+ const char **azConfig; /* Array of arguments for Fts5Config */
+ const char *zNearsetCmd = "nearset";
+ int nConfig; /* Size of azConfig[] */
+ Fts5Config *pConfig = 0;
+ int iArg = 1;
+
+ if( nArg<1 ){
+ zErr = sqlite3_mprintf("wrong number of arguments to function %s",
+ bTcl ? "fts5_expr_tcl" : "fts5_expr"
+ );
+ sqlite3_result_error(pCtx, zErr, -1);
+ sqlite3_free(zErr);
+ return;
+ }
+
+ if( bTcl && nArg>1 ){
+ zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
+ iArg = 2;
+ }
+
+ nConfig = 3 + (nArg-iArg);
+ azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig);
+ if( azConfig==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ return;
+ }
+ azConfig[0] = 0;
+ azConfig[1] = "main";
+ azConfig[2] = "tbl";
+ for(i=3; iArg<nArg; iArg++){
+ azConfig[i++] = (const char*)sqlite3_value_text(apVal[iArg]);
+ }
+
+ zExpr = (const char*)sqlite3_value_text(apVal[0]);
+
+ rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr);
+ }
+ if( rc==SQLITE_OK ){
+ char *zText;
+ if( pExpr->pRoot->xNext==0 ){
+ zText = sqlite3_mprintf("");
+ }else if( bTcl ){
+ zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
+ }else{
+ zText = fts5ExprPrint(pConfig, pExpr->pRoot);
+ }
+ if( zText==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
+ sqlite3_free(zText);
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ if( zErr ){
+ sqlite3_result_error(pCtx, zErr, -1);
+ sqlite3_free(zErr);
+ }else{
+ sqlite3_result_error_code(pCtx, rc);
+ }
+ }
+ sqlite3_free((void *)azConfig);
+ sqlite3Fts5ConfigFree(pConfig);
+ sqlite3Fts5ExprFree(pExpr);
+}
+
+static void fts5ExprFunctionHr(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal /* Function arguments */
+){
+ fts5ExprFunction(pCtx, nArg, apVal, 0);
+}
+static void fts5ExprFunctionTcl(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal /* Function arguments */
+){
+ fts5ExprFunction(pCtx, nArg, apVal, 1);
+}
+
+/*
+** The implementation of an SQLite user-defined-function that accepts a
+** single integer as an argument. If the integer is an alpha-numeric
+** unicode code point, 1 is returned. Otherwise 0.
+*/
+static void fts5ExprIsAlnum(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal /* Function arguments */
+){
+ int iCode;
+ if( nArg!=1 ){
+ sqlite3_result_error(pCtx,
+ "wrong number of arguments to function fts5_isalnum", -1
+ );
+ return;
+ }
+ iCode = sqlite3_value_int(apVal[0]);
+ sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode));
+}
+
+static void fts5ExprFold(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apVal /* Function arguments */
+){
+ if( nArg!=1 && nArg!=2 ){
+ sqlite3_result_error(pCtx,
+ "wrong number of arguments to function fts5_fold", -1
+ );
+ }else{
+ int iCode;
+ int bRemoveDiacritics = 0;
+ iCode = sqlite3_value_int(apVal[0]);
+ if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
+ sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
+ }
+}
+
+/*
+** This is called during initialization to register the fts5_expr() scalar
+** UDF with the SQLite handle passed as the only argument.
+*/
+static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
+ struct Fts5ExprFunc {
+ const char *z;
+ void (*x)(sqlite3_context*,int,sqlite3_value**);
+ } aFunc[] = {
+ { "fts5_expr", fts5ExprFunctionHr },
+ { "fts5_expr_tcl", fts5ExprFunctionTcl },
+ { "fts5_isalnum", fts5ExprIsAlnum },
+ { "fts5_fold", fts5ExprFold },
+ };
+ int i;
+ int rc = SQLITE_OK;
+ void *pCtx = (void*)pGlobal;
+
+ for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){
+ struct Fts5ExprFunc *p = &aFunc[i];
+ rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
+ }
+
+ /* Avoid a warning indicating that sqlite3Fts5ParserTrace() is unused */
+#ifndef NDEBUG
+ (void)sqlite3Fts5ParserTrace;
+#endif
+
+ return rc;
+}
+
+/*
+** Return the number of phrases in expression pExpr.
+*/
+static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
+ return (pExpr ? pExpr->nPhrase : 0);
+}
+
+/*
+** Return the number of terms in the iPhrase'th phrase in pExpr.
+*/
+static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
+ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
+ return pExpr->apExprPhrase[iPhrase]->nTerm;
+}
+
+/*
+** This function is used to access the current position list for phrase
+** iPhrase.
+*/
+static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
+ int nRet;
+ Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
+ Fts5ExprNode *pNode = pPhrase->pNode;
+ if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
+ *pa = pPhrase->poslist.p;
+ nRet = pPhrase->poslist.n;
+ }else{
+ *pa = 0;
+ nRet = 0;
+ }
+ return nRet;
+}
+
+struct Fts5PoslistPopulator {
+ Fts5PoslistWriter writer;
+ int bOk; /* True if ok to populate */
+ int bMiss;
+};
+
+static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){
+ Fts5PoslistPopulator *pRet;
+ pRet = sqlite3_malloc(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
+ if( pRet ){
+ int i;
+ memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
+ for(i=0; i<pExpr->nPhrase; i++){
+ Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
+ Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
+ assert( pExpr->apExprPhrase[i]->nTerm==1 );
+ if( bLive &&
+ (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
+ ){
+ pRet[i].bMiss = 1;
+ }else{
+ pBuf->n = 0;
+ }
+ }
+ }
+ return pRet;
+}
+
+struct Fts5ExprCtx {
+ Fts5Expr *pExpr;
+ Fts5PoslistPopulator *aPopulator;
+ i64 iOff;
+};
+typedef struct Fts5ExprCtx Fts5ExprCtx;
+
+/*
+** TODO: Make this more efficient!
+*/
+static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
+ int i;
+ for(i=0; i<pColset->nCol; i++){
+ if( pColset->aiCol[i]==iCol ) return 1;
+ }
+ return 0;
+}
+
+static int fts5ExprPopulatePoslistsCb(
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Pointer to buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iUnused1, /* Byte offset of token within input text */
+ int iUnused2 /* Byte offset of end of token within input text */
+){
+ Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
+ Fts5Expr *pExpr = p->pExpr;
+ int i;
+
+ UNUSED_PARAM2(iUnused1, iUnused2);
+
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
+ for(i=0; i<pExpr->nPhrase; i++){
+ Fts5ExprTerm *pTerm;
+ if( p->aPopulator[i].bOk==0 ) continue;
+ for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
+ int nTerm = strlen(pTerm->zTerm);
+ if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
+ && memcmp(pTerm->zTerm, pToken, nTerm)==0
+ ){
+ int rc = sqlite3Fts5PoslistWriterAppend(
+ &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
+ );
+ if( rc ) return rc;
+ break;
+ }
+ }
+ }
+ return SQLITE_OK;
+}
+
+static int sqlite3Fts5ExprPopulatePoslists(
+ Fts5Config *pConfig,
+ Fts5Expr *pExpr,
+ Fts5PoslistPopulator *aPopulator,
+ int iCol,
+ const char *z, int n
+){
+ int i;
+ Fts5ExprCtx sCtx;
+ sCtx.pExpr = pExpr;
+ sCtx.aPopulator = aPopulator;
+ sCtx.iOff = (((i64)iCol) << 32) - 1;
+
+ for(i=0; i<pExpr->nPhrase; i++){
+ Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
+ Fts5Colset *pColset = pNode->pNear->pColset;
+ if( (pColset && 0==fts5ExprColsetTest(pColset, iCol))
+ || aPopulator[i].bMiss
+ ){
+ aPopulator[i].bOk = 0;
+ }else{
+ aPopulator[i].bOk = 1;
+ }
+ }
+
+ return sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
+ );
+}
+
+static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
+ if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
+ pNode->pNear->apPhrase[0]->poslist.n = 0;
+ }else{
+ int i;
+ for(i=0; i<pNode->nChild; i++){
+ fts5ExprClearPoslists(pNode->apChild[i]);
+ }
+ }
+}
+
+static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
+ pNode->iRowid = iRowid;
+ pNode->bEof = 0;
+ switch( pNode->eType ){
+ case FTS5_TERM:
+ case FTS5_STRING:
+ return (pNode->pNear->apPhrase[0]->poslist.n>0);
+
+ case FTS5_AND: {
+ int i;
+ for(i=0; i<pNode->nChild; i++){
+ if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
+ fts5ExprClearPoslists(pNode);
+ return 0;
+ }
+ }
+ break;
+ }
+
+ case FTS5_OR: {
+ int i;
+ int bRet = 0;
+ for(i=0; i<pNode->nChild; i++){
+ if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
+ bRet = 1;
+ }
+ }
+ return bRet;
+ }
+
+ default: {
+ assert( pNode->eType==FTS5_NOT );
+ if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
+ || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
+ ){
+ fts5ExprClearPoslists(pNode);
+ return 0;
+ }
+ break;
+ }
+ }
+ return 1;
+}
+
+static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
+ fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
+}
+
+static void fts5ExprClearEof(Fts5ExprNode *pNode){
+ int i;
+ for(i=0; i<pNode->nChild; i++){
+ fts5ExprClearEof(pNode->apChild[i]);
+ }
+ pNode->bEof = 0;
+}
+static void sqlite3Fts5ExprClearEof(Fts5Expr *pExpr){
+ fts5ExprClearEof(pExpr->pRoot);
+}
+
+/*
+** This function is only called for detail=columns tables.
+*/
+static int sqlite3Fts5ExprPhraseCollist(
+ Fts5Expr *pExpr,
+ int iPhrase,
+ const u8 **ppCollist,
+ int *pnCollist
+){
+ Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
+ Fts5ExprNode *pNode = pPhrase->pNode;
+ int rc = SQLITE_OK;
+
+ assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
+ assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
+
+ if( pNode->bEof==0
+ && pNode->iRowid==pExpr->pRoot->iRowid
+ && pPhrase->poslist.n>0
+ ){
+ Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
+ if( pTerm->pSynonym ){
+ Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
+ rc = fts5ExprSynonymList(
+ pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
+ );
+ }else{
+ *ppCollist = pPhrase->aTerm[0].pIter->pData;
+ *pnCollist = pPhrase->aTerm[0].pIter->nData;
+ }
+ }else{
+ *ppCollist = 0;
+ *pnCollist = 0;
+ }
+
+ return rc;
+}
+
+
+#line 1 "fts5_hash.c"
+/*
+** 2014 August 11
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+
+
+
+/* #include "fts5Int.h" */
+
+typedef struct Fts5HashEntry Fts5HashEntry;
+
+/*
+** This file contains the implementation of an in-memory hash table used
+** to accumuluate "term -> doclist" content before it is flused to a level-0
+** segment.
+*/
+
+
+struct Fts5Hash {
+ int eDetail; /* Copy of Fts5Config.eDetail */
+ int *pnByte; /* Pointer to bytes counter */
+ int nEntry; /* Number of entries currently in hash */
+ int nSlot; /* Size of aSlot[] array */
+ Fts5HashEntry *pScan; /* Current ordered scan item */
+ Fts5HashEntry **aSlot; /* Array of hash slots */
+};
+
+/*
+** Each entry in the hash table is represented by an object of the
+** following type. Each object, its key (zKey[]) and its current data
+** are stored in a single memory allocation. The position list data
+** immediately follows the key data in memory.
+**
+** The data that follows the key is in a similar, but not identical format
+** to the doclist data stored in the database. It is:
+**
+** * Rowid, as a varint
+** * Position list, without 0x00 terminator.
+** * Size of previous position list and rowid, as a 4 byte
+** big-endian integer.
+**
+** iRowidOff:
+** Offset of last rowid written to data area. Relative to first byte of
+** structure.
+**
+** nData:
+** Bytes of data written since iRowidOff.
+*/
+struct Fts5HashEntry {
+ Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
+ Fts5HashEntry *pScanNext; /* Next entry in sorted order */
+
+ int nAlloc; /* Total size of allocation */
+ int iSzPoslist; /* Offset of space for 4-byte poslist size */
+ int nData; /* Total bytes of data (incl. structure) */
+ int nKey; /* Length of zKey[] in bytes */
+ u8 bDel; /* Set delete-flag @ iSzPoslist */
+ u8 bContent; /* Set content-flag (detail=none mode) */
+ i16 iCol; /* Column of last value written */
+ int iPos; /* Position of last value written */
+ i64 iRowid; /* Rowid of last value written */
+ char zKey[8]; /* Nul-terminated entry key */
+};
+
+/*
+** Size of Fts5HashEntry without the zKey[] array.
+*/
+#define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8)
+
+
+
+/*
+** Allocate a new hash table.
+*/
+static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){
+ int rc = SQLITE_OK;
+ Fts5Hash *pNew;
+
+ *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
+ if( pNew==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ int nByte;
+ memset(pNew, 0, sizeof(Fts5Hash));
+ pNew->pnByte = pnByte;
+ pNew->eDetail = pConfig->eDetail;
+
+ pNew->nSlot = 1024;
+ nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
+ pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
+ if( pNew->aSlot==0 ){
+ sqlite3_free(pNew);
+ *ppNew = 0;
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pNew->aSlot, 0, nByte);
+ }
+ }
+ return rc;
+}
+
+/*
+** Free a hash table object.
+*/
+static void sqlite3Fts5HashFree(Fts5Hash *pHash){
+ if( pHash ){
+ sqlite3Fts5HashClear(pHash);
+ sqlite3_free(pHash->aSlot);
+ sqlite3_free(pHash);
+ }
+}
+
+/*
+** Empty (but do not delete) a hash table.
+*/
+static void sqlite3Fts5HashClear(Fts5Hash *pHash){
+ int i;
+ for(i=0; i<pHash->nSlot; i++){
+ Fts5HashEntry *pNext;
+ Fts5HashEntry *pSlot;
+ for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
+ pNext = pSlot->pHashNext;
+ sqlite3_free(pSlot);
+ }
+ }
+ memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
+ pHash->nEntry = 0;
+}
+
+static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
+ int i;
+ unsigned int h = 13;
+ for(i=n-1; i>=0; i--){
+ h = (h << 3) ^ h ^ p[i];
+ }
+ return (h % nSlot);
+}
+
+static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
+ int i;
+ unsigned int h = 13;
+ for(i=n-1; i>=0; i--){
+ h = (h << 3) ^ h ^ p[i];
+ }
+ h = (h << 3) ^ h ^ b;
+ return (h % nSlot);
+}
+
+/*
+** Resize the hash table by doubling the number of slots.
+*/
+static int fts5HashResize(Fts5Hash *pHash){
+ int nNew = pHash->nSlot*2;
+ int i;
+ Fts5HashEntry **apNew;
+ Fts5HashEntry **apOld = pHash->aSlot;
+
+ apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
+ if( !apNew ) return SQLITE_NOMEM;
+ memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
+
+ for(i=0; i<pHash->nSlot; i++){
+ while( apOld[i] ){
+ int iHash;
+ Fts5HashEntry *p = apOld[i];
+ apOld[i] = p->pHashNext;
+ iHash = fts5HashKey(nNew, (u8*)p->zKey, (int)strlen(p->zKey));
+ p->pHashNext = apNew[iHash];
+ apNew[iHash] = p;
+ }
+ }
+
+ sqlite3_free(apOld);
+ pHash->nSlot = nNew;
+ pHash->aSlot = apNew;
+ return SQLITE_OK;
+}
+
+static void fts5HashAddPoslistSize(Fts5Hash *pHash, Fts5HashEntry *p){
+ if( p->iSzPoslist ){
+ u8 *pPtr = (u8*)p;
+ if( pHash->eDetail==FTS5_DETAIL_NONE ){
+ assert( p->nData==p->iSzPoslist );
+ if( p->bDel ){
+ pPtr[p->nData++] = 0x00;
+ if( p->bContent ){
+ pPtr[p->nData++] = 0x00;
+ }
+ }
+ }else{
+ int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */
+ int nPos = nSz*2 + p->bDel; /* Value of nPos field */
+
+ assert( p->bDel==0 || p->bDel==1 );
+ if( nPos<=127 ){
+ pPtr[p->iSzPoslist] = (u8)nPos;
+ }else{
+ int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
+ memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
+ sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
+ p->nData += (nByte-1);
+ }
+ }
+
+ p->iSzPoslist = 0;
+ p->bDel = 0;
+ p->bContent = 0;
+ }
+}
+
+/*
+** Add an entry to the in-memory hash table. The key is the concatenation
+** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
+**
+** (bByte || pToken) -> (iRowid,iCol,iPos)
+**
+** Or, if iCol is negative, then the value is a delete marker.
+*/
+static int sqlite3Fts5HashWrite(
+ Fts5Hash *pHash,
+ i64 iRowid, /* Rowid for this entry */
+ int iCol, /* Column token appears in (-ve -> delete) */
+ int iPos, /* Position of token within column */
+ char bByte, /* First byte of token */
+ const char *pToken, int nToken /* Token to add or remove to or from index */
+){
+ unsigned int iHash;
+ Fts5HashEntry *p;
+ u8 *pPtr;
+ int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
+ int bNew; /* If non-delete entry should be written */
+
+ bNew = (pHash->eDetail==FTS5_DETAIL_FULL);
+
+ /* Attempt to locate an existing hash entry */
+ iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
+ for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
+ if( p->zKey[0]==bByte
+ && p->nKey==nToken
+ && memcmp(&p->zKey[1], pToken, nToken)==0
+ ){
+ break;
+ }
+ }
+
+ /* If an existing hash entry cannot be found, create a new one. */
+ if( p==0 ){
+ /* Figure out how much space to allocate */
+ int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64;
+ if( nByte<128 ) nByte = 128;
+
+ /* Grow the Fts5Hash.aSlot[] array if necessary. */
+ if( (pHash->nEntry*2)>=pHash->nSlot ){
+ int rc = fts5HashResize(pHash);
+ if( rc!=SQLITE_OK ) return rc;
+ iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
+ }
+
+ /* Allocate new Fts5HashEntry and add it to the hash table. */
+ p = (Fts5HashEntry*)sqlite3_malloc(nByte);
+ if( !p ) return SQLITE_NOMEM;
+ memset(p, 0, FTS5_HASHENTRYSIZE);
+ p->nAlloc = nByte;
+ p->zKey[0] = bByte;
+ memcpy(&p->zKey[1], pToken, nToken);
+ assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) );
+ p->nKey = nToken;
+ p->zKey[nToken+1] = '\0';
+ p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE;
+ p->pHashNext = pHash->aSlot[iHash];
+ pHash->aSlot[iHash] = p;
+ pHash->nEntry++;
+
+ /* Add the first rowid field to the hash-entry */
+ p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
+ p->iRowid = iRowid;
+
+ p->iSzPoslist = p->nData;
+ if( pHash->eDetail!=FTS5_DETAIL_NONE ){
+ p->nData += 1;
+ p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
+ }
+
+ nIncr += p->nData;
+ }else{
+
+ /* Appending to an existing hash-entry. Check that there is enough
+ ** space to append the largest possible new entry. Worst case scenario
+ ** is:
+ **
+ ** + 9 bytes for a new rowid,
+ ** + 4 byte reserved for the "poslist size" varint.
+ ** + 1 byte for a "new column" byte,
+ ** + 3 bytes for a new column number (16-bit max) as a varint,
+ ** + 5 bytes for the new position offset (32-bit max).
+ */
+ if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
+ int nNew = p->nAlloc * 2;
+ Fts5HashEntry *pNew;
+ Fts5HashEntry **pp;
+ pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
+ if( pNew==0 ) return SQLITE_NOMEM;
+ pNew->nAlloc = nNew;
+ for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
+ *pp = pNew;
+ p = pNew;
+ }
+ nIncr -= p->nData;
+ }
+ assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) );
+
+ pPtr = (u8*)p;
+
+ /* If this is a new rowid, append the 4-byte size field for the previous
+ ** entry, and the new rowid for this entry. */
+ if( iRowid!=p->iRowid ){
+ fts5HashAddPoslistSize(pHash, p);
+ p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
+ p->iRowid = iRowid;
+ bNew = 1;
+ p->iSzPoslist = p->nData;
+ if( pHash->eDetail!=FTS5_DETAIL_NONE ){
+ p->nData += 1;
+ p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
+ p->iPos = 0;
+ }
+ }
+
+ if( iCol>=0 ){
+ if( pHash->eDetail==FTS5_DETAIL_NONE ){
+ p->bContent = 1;
+ }else{
+ /* Append a new column value, if necessary */
+ assert( iCol>=p->iCol );
+ if( iCol!=p->iCol ){
+ if( pHash->eDetail==FTS5_DETAIL_FULL ){
+ pPtr[p->nData++] = 0x01;
+ p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
+ p->iCol = iCol;
+ p->iPos = 0;
+ }else{
+ bNew = 1;
+ p->iCol = iPos = iCol;
+ }
+ }
+
+ /* Append the new position offset, if necessary */
+ if( bNew ){
+ p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
+ p->iPos = iPos;
+ }
+ }
+ }else{
+ /* This is a delete. Set the delete flag. */
+ p->bDel = 1;
+ }
+
+ nIncr += p->nData;
+ *pHash->pnByte += nIncr;
+ return SQLITE_OK;
+}
+
+
+/*
+** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
+** each sorted in key order. This function merges the two lists into a
+** single list and returns a pointer to its first element.
+*/
+static Fts5HashEntry *fts5HashEntryMerge(
+ Fts5HashEntry *pLeft,
+ Fts5HashEntry *pRight
+){
+ Fts5HashEntry *p1 = pLeft;
+ Fts5HashEntry *p2 = pRight;
+ Fts5HashEntry *pRet = 0;
+ Fts5HashEntry **ppOut = &pRet;
+
+ while( p1 || p2 ){
+ if( p1==0 ){
+ *ppOut = p2;
+ p2 = 0;
+ }else if( p2==0 ){
+ *ppOut = p1;
+ p1 = 0;
+ }else{
+ int i = 0;
+ while( p1->zKey[i]==p2->zKey[i] ) i++;
+
+ if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
+ /* p2 is smaller */
+ *ppOut = p2;
+ ppOut = &p2->pScanNext;
+ p2 = p2->pScanNext;
+ }else{
+ /* p1 is smaller */
+ *ppOut = p1;
+ ppOut = &p1->pScanNext;
+ p1 = p1->pScanNext;
+ }
+ *ppOut = 0;
+ }
+ }
+
+ return pRet;
+}
+
+/*
+** Extract all tokens from hash table iHash and link them into a list
+** in sorted order. The hash table is cleared before returning. It is
+** the responsibility of the caller to free the elements of the returned
+** list.
+*/
+static int fts5HashEntrySort(
+ Fts5Hash *pHash,
+ const char *pTerm, int nTerm, /* Query prefix, if any */
+ Fts5HashEntry **ppSorted
+){
+ const int nMergeSlot = 32;
+ Fts5HashEntry **ap;
+ Fts5HashEntry *pList;
+ int iSlot;
+ int i;
+
+ *ppSorted = 0;
+ ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
+ if( !ap ) return SQLITE_NOMEM;
+ memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
+
+ for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
+ Fts5HashEntry *pIter;
+ for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
+ if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
+ Fts5HashEntry *pEntry = pIter;
+ pEntry->pScanNext = 0;
+ for(i=0; ap[i]; i++){
+ pEntry = fts5HashEntryMerge(pEntry, ap[i]);
+ ap[i] = 0;
+ }
+ ap[i] = pEntry;
+ }
+ }
+ }
+
+ pList = 0;
+ for(i=0; i<nMergeSlot; i++){
+ pList = fts5HashEntryMerge(pList, ap[i]);
+ }
+
+ pHash->nEntry = 0;
+ sqlite3_free(ap);
+ *ppSorted = pList;
+ return SQLITE_OK;
+}
+
+/*
+** Query the hash table for a doclist associated with term pTerm/nTerm.
+*/
+static int sqlite3Fts5HashQuery(
+ Fts5Hash *pHash, /* Hash table to query */
+ const char *pTerm, int nTerm, /* Query term */
+ const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
+ int *pnDoclist /* OUT: Size of doclist in bytes */
+){
+ unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
+ Fts5HashEntry *p;
+
+ for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
+ if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
+ }
+
+ if( p ){
+ fts5HashAddPoslistSize(pHash, p);
+ *ppDoclist = (const u8*)&p->zKey[nTerm+1];
+ *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
+ }else{
+ *ppDoclist = 0;
+ *pnDoclist = 0;
+ }
+
+ return SQLITE_OK;
+}
+
+static int sqlite3Fts5HashScanInit(
+ Fts5Hash *p, /* Hash table to query */
+ const char *pTerm, int nTerm /* Query prefix */
+){
+ return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
+}
+
+static void sqlite3Fts5HashScanNext(Fts5Hash *p){
+ assert( !sqlite3Fts5HashScanEof(p) );
+ p->pScan = p->pScan->pScanNext;
+}
+
+static int sqlite3Fts5HashScanEof(Fts5Hash *p){
+ return (p->pScan==0);
+}
+
+static void sqlite3Fts5HashScanEntry(
+ Fts5Hash *pHash,
+ const char **pzTerm, /* OUT: term (nul-terminated) */
+ const u8 **ppDoclist, /* OUT: pointer to doclist */
+ int *pnDoclist /* OUT: size of doclist in bytes */
+){
+ Fts5HashEntry *p;
+ if( (p = pHash->pScan) ){
+ int nTerm = (int)strlen(p->zKey);
+ fts5HashAddPoslistSize(pHash, p);
+ *pzTerm = p->zKey;
+ *ppDoclist = (const u8*)&p->zKey[nTerm+1];
+ *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
+ }else{
+ *pzTerm = 0;
+ *ppDoclist = 0;
+ *pnDoclist = 0;
+ }
+}
+
+
+#line 1 "fts5_index.c"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Low level access to the FTS index stored in the database file. The
+** routines in this file file implement all read and write access to the
+** %_data table. Other parts of the system access this functionality via
+** the interface defined in fts5Int.h.
+*/
+
+
+/* #include "fts5Int.h" */
+
+/*
+** Overview:
+**
+** The %_data table contains all the FTS indexes for an FTS5 virtual table.
+** As well as the main term index, there may be up to 31 prefix indexes.
+** The format is similar to FTS3/4, except that:
+**
+** * all segment b-tree leaf data is stored in fixed size page records
+** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
+** taken to ensure it is possible to iterate in either direction through
+** the entries in a doclist, or to seek to a specific entry within a
+** doclist, without loading it into memory.
+**
+** * large doclists that span many pages have associated "doclist index"
+** records that contain a copy of the first rowid on each page spanned by
+** the doclist. This is used to speed up seek operations, and merges of
+** large doclists with very small doclists.
+**
+** * extra fields in the "structure record" record the state of ongoing
+** incremental merge operations.
+**
+*/
+
+
+#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
+#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
+
+#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
+
+#define FTS5_MAIN_PREFIX '0'
+
+#if FTS5_MAX_PREFIX_INDEXES > 31
+# error "FTS5_MAX_PREFIX_INDEXES is too large"
+#endif
+
+/*
+** Details:
+**
+** The %_data table managed by this module,
+**
+** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
+**
+** , contains the following 5 types of records. See the comments surrounding
+** the FTS5_*_ROWID macros below for a description of how %_data rowids are
+** assigned to each fo them.
+**
+** 1. Structure Records:
+**
+** The set of segments that make up an index - the index structure - are
+** recorded in a single record within the %_data table. The record consists
+** of a single 32-bit configuration cookie value followed by a list of
+** SQLite varints. If the FTS table features more than one index (because
+** there are one or more prefix indexes), it is guaranteed that all share
+** the same cookie value.
+**
+** Immediately following the configuration cookie, the record begins with
+** three varints:
+**
+** + number of levels,
+** + total number of segments on all levels,
+** + value of write counter.
+**
+** Then, for each level from 0 to nMax:
+**
+** + number of input segments in ongoing merge.
+** + total number of segments in level.
+** + for each segment from oldest to newest:
+** + segment id (always > 0)
+** + first leaf page number (often 1, always greater than 0)
+** + final leaf page number
+**
+** 2. The Averages Record:
+**
+** A single record within the %_data table. The data is a list of varints.
+** The first value is the number of rows in the index. Then, for each column
+** from left to right, the total number of tokens in the column for all
+** rows of the table.
+**
+** 3. Segment leaves:
+**
+** TERM/DOCLIST FORMAT:
+**
+** Most of each segment leaf is taken up by term/doclist data. The
+** general format of term/doclist, starting with the first term
+** on the leaf page, is:
+**
+** varint : size of first term
+** blob: first term data
+** doclist: first doclist
+** zero-or-more {
+** varint: number of bytes in common with previous term
+** varint: number of bytes of new term data (nNew)
+** blob: nNew bytes of new term data
+** doclist: next doclist
+** }
+**
+** doclist format:
+**
+** varint: first rowid
+** poslist: first poslist
+** zero-or-more {
+** varint: rowid delta (always > 0)
+** poslist: next poslist
+** }
+**
+** poslist format:
+**
+** varint: size of poslist in bytes multiplied by 2, not including
+** this field. Plus 1 if this entry carries the "delete" flag.
+** collist: collist for column 0
+** zero-or-more {
+** 0x01 byte
+** varint: column number (I)
+** collist: collist for column I
+** }
+**
+** collist format:
+**
+** varint: first offset + 2
+** zero-or-more {
+** varint: offset delta + 2
+** }
+**
+** PAGE FORMAT
+**
+** Each leaf page begins with a 4-byte header containing 2 16-bit
+** unsigned integer fields in big-endian format. They are:
+**
+** * The byte offset of the first rowid on the page, if it exists
+** and occurs before the first term (otherwise 0).
+**
+** * The byte offset of the start of the page footer. If the page
+** footer is 0 bytes in size, then this field is the same as the
+** size of the leaf page in bytes.
+**
+** The page footer consists of a single varint for each term located
+** on the page. Each varint is the byte offset of the current term
+** within the page, delta-compressed against the previous value. In
+** other words, the first varint in the footer is the byte offset of
+** the first term, the second is the byte offset of the second less that
+** of the first, and so on.
+**
+** The term/doclist format described above is accurate if the entire
+** term/doclist data fits on a single leaf page. If this is not the case,
+** the format is changed in two ways:
+**
+** + if the first rowid on a page occurs before the first term, it
+** is stored as a literal value:
+**
+** varint: first rowid
+**
+** + the first term on each page is stored in the same way as the
+** very first term of the segment:
+**
+** varint : size of first term
+** blob: first term data
+**
+** 5. Segment doclist indexes:
+**
+** Doclist indexes are themselves b-trees, however they usually consist of
+** a single leaf record only. The format of each doclist index leaf page
+** is:
+**
+** * Flags byte. Bits are:
+** 0x01: Clear if leaf is also the root page, otherwise set.
+**
+** * Page number of fts index leaf page. As a varint.
+**
+** * First rowid on page indicated by previous field. As a varint.
+**
+** * A list of varints, one for each subsequent termless page. A
+** positive delta if the termless page contains at least one rowid,
+** or an 0x00 byte otherwise.
+**
+** Internal doclist index nodes are:
+**
+** * Flags byte. Bits are:
+** 0x01: Clear for root page, otherwise set.
+**
+** * Page number of first child page. As a varint.
+**
+** * Copy of first rowid on page indicated by previous field. As a varint.
+**
+** * A list of delta-encoded varints - the first rowid on each subsequent
+** child page.
+**
+*/
+
+/*
+** Rowids for the averages and structure records in the %_data table.
+*/
+#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
+#define FTS5_STRUCTURE_ROWID 10 /* The structure record */
+
+/*
+** Macros determining the rowids used by segment leaves and dlidx leaves
+** and nodes. All nodes and leaves are stored in the %_data table with large
+** positive rowids.
+**
+** Each segment has a unique non-zero 16-bit id.
+**
+** The rowid for each segment leaf is found by passing the segment id and
+** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
+** sequentially starting from 1.
+*/
+#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
+#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
+#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
+#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
+
+#define fts5_dri(segid, dlidx, height, pgno) ( \
+ ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
+ ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
+ ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
+ ((i64)(pgno)) \
+)
+
+#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
+#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
+
+/*
+** Maximum segments permitted in a single index
+*/
+#define FTS5_MAX_SEGMENT 2000
+
+#ifdef SQLITE_DEBUG
+static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
+#endif
+
+
+/*
+** Each time a blob is read from the %_data table, it is padded with this
+** many zero bytes. This makes it easier to decode the various record formats
+** without overreading if the records are corrupt.
+*/
+#define FTS5_DATA_ZERO_PADDING 8
+#define FTS5_DATA_PADDING 20
+
+typedef struct Fts5Data Fts5Data;
+typedef struct Fts5DlidxIter Fts5DlidxIter;
+typedef struct Fts5DlidxLvl Fts5DlidxLvl;
+typedef struct Fts5DlidxWriter Fts5DlidxWriter;
+typedef struct Fts5Iter Fts5Iter;
+typedef struct Fts5PageWriter Fts5PageWriter;
+typedef struct Fts5SegIter Fts5SegIter;
+typedef struct Fts5DoclistIter Fts5DoclistIter;
+typedef struct Fts5SegWriter Fts5SegWriter;
+typedef struct Fts5Structure Fts5Structure;
+typedef struct Fts5StructureLevel Fts5StructureLevel;
+typedef struct Fts5StructureSegment Fts5StructureSegment;
+
+struct Fts5Data {
+ u8 *p; /* Pointer to buffer containing record */
+ int nn; /* Size of record in bytes */
+ int szLeaf; /* Size of leaf without page-index */
+};
+
+/*
+** One object per %_data table.
+*/
+struct Fts5Index {
+ Fts5Config *pConfig; /* Virtual table configuration */
+ char *zDataTbl; /* Name of %_data table */
+ int nWorkUnit; /* Leaf pages in a "unit" of work */
+
+ /*
+ ** Variables related to the accumulation of tokens and doclists within the
+ ** in-memory hash tables before they are flushed to disk.
+ */
+ Fts5Hash *pHash; /* Hash table for in-memory data */
+ int nPendingData; /* Current bytes of pending data */
+ i64 iWriteRowid; /* Rowid for current doc being written */
+ int bDelete; /* Current write is a delete */
+
+ /* Error state. */
+ int rc; /* Current error code */
+
+ /* State used by the fts5DataXXX() functions. */
+ sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
+ sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
+ sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
+ sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
+ sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
+ sqlite3_stmt *pIdxSelect;
+ int nRead; /* Total number of blocks read */
+};
+
+struct Fts5DoclistIter {
+ u8 *aEof; /* Pointer to 1 byte past end of doclist */
+
+ /* Output variables. aPoslist==0 at EOF */
+ i64 iRowid;
+ u8 *aPoslist;
+ int nPoslist;
+ int nSize;
+};
+
+/*
+** The contents of the "structure" record for each index are represented
+** using an Fts5Structure record in memory. Which uses instances of the
+** other Fts5StructureXXX types as components.
+*/
+struct Fts5StructureSegment {
+ int iSegid; /* Segment id */
+ int pgnoFirst; /* First leaf page number in segment */
+ int pgnoLast; /* Last leaf page number in segment */
+};
+struct Fts5StructureLevel {
+ int nMerge; /* Number of segments in incr-merge */
+ int nSeg; /* Total number of segments on level */
+ Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
+};
+struct Fts5Structure {
+ int nRef; /* Object reference count */
+ u64 nWriteCounter; /* Total leaves written to level 0 */
+ int nSegment; /* Total segments in this structure */
+ int nLevel; /* Number of levels in this index */
+ Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
+};
+
+/*
+** An object of type Fts5SegWriter is used to write to segments.
+*/
+struct Fts5PageWriter {
+ int pgno; /* Page number for this page */
+ int iPrevPgidx; /* Previous value written into pgidx */
+ Fts5Buffer buf; /* Buffer containing leaf data */
+ Fts5Buffer pgidx; /* Buffer containing page-index */
+ Fts5Buffer term; /* Buffer containing previous term on page */
+};
+struct Fts5DlidxWriter {
+ int pgno; /* Page number for this page */
+ int bPrevValid; /* True if iPrev is valid */
+ i64 iPrev; /* Previous rowid value written to page */
+ Fts5Buffer buf; /* Buffer containing page data */
+};
+struct Fts5SegWriter {
+ int iSegid; /* Segid to write to */
+ Fts5PageWriter writer; /* PageWriter object */
+ i64 iPrevRowid; /* Previous rowid written to current leaf */
+ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
+ u8 bFirstRowidInPage; /* True if next rowid is first in page */
+ /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
+ u8 bFirstTermInPage; /* True if next term will be first in leaf */
+ int nLeafWritten; /* Number of leaf pages written */
+ int nEmpty; /* Number of contiguous term-less nodes */
+
+ int nDlidx; /* Allocated size of aDlidx[] array */
+ Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
+
+ /* Values to insert into the %_idx table */
+ Fts5Buffer btterm; /* Next term to insert into %_idx table */
+ int iBtPage; /* Page number corresponding to btterm */
+};
+
+typedef struct Fts5CResult Fts5CResult;
+struct Fts5CResult {
+ u16 iFirst; /* aSeg[] index of firstest iterator */
+ u8 bTermEq; /* True if the terms are equal */
+};
+
+/*
+** Object for iterating through a single segment, visiting each term/rowid
+** pair in the segment.
+**
+** pSeg:
+** The segment to iterate through.
+**
+** iLeafPgno:
+** Current leaf page number within segment.
+**
+** iLeafOffset:
+** Byte offset within the current leaf that is the first byte of the
+** position list data (one byte passed the position-list size field).
+** rowid field of the current entry. Usually this is the size field of the
+** position list data. The exception is if the rowid for the current entry
+** is the last thing on the leaf page.
+**
+** pLeaf:
+** Buffer containing current leaf page data. Set to NULL at EOF.
+**
+** iTermLeafPgno, iTermLeafOffset:
+** Leaf page number containing the last term read from the segment. And
+** the offset immediately following the term data.
+**
+** flags:
+** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
+**
+** FTS5_SEGITER_ONETERM:
+** If set, set the iterator to point to EOF after the current doclist
+** has been exhausted. Do not proceed to the next term in the segment.
+**
+** FTS5_SEGITER_REVERSE:
+** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
+** it is set, iterate through rowid in descending order instead of the
+** default ascending order.
+**
+** iRowidOffset/nRowidOffset/aRowidOffset:
+** These are used if the FTS5_SEGITER_REVERSE flag is set.
+**
+** For each rowid on the page corresponding to the current term, the
+** corresponding aRowidOffset[] entry is set to the byte offset of the
+** start of the "position-list-size" field within the page.
+**
+** iTermIdx:
+** Index of current term on iTermLeafPgno.
+*/
+struct Fts5SegIter {
+ Fts5StructureSegment *pSeg; /* Segment to iterate through */
+ int flags; /* Mask of configuration flags */
+ int iLeafPgno; /* Current leaf page number */
+ Fts5Data *pLeaf; /* Current leaf data */
+ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
+ int iLeafOffset; /* Byte offset within current leaf */
+
+ /* Next method */
+ void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
+
+ /* The page and offset from which the current term was read. The offset
+ ** is the offset of the first rowid in the current doclist. */
+ int iTermLeafPgno;
+ int iTermLeafOffset;
+
+ int iPgidxOff; /* Next offset in pgidx */
+ int iEndofDoclist;
+
+ /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
+ int iRowidOffset; /* Current entry in aRowidOffset[] */
+ int nRowidOffset; /* Allocated size of aRowidOffset[] array */
+ int *aRowidOffset; /* Array of offset to rowid fields */
+
+ Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
+
+ /* Variables populated based on current entry. */
+ Fts5Buffer term; /* Current term */
+ i64 iRowid; /* Current rowid */
+ int nPos; /* Number of bytes in current position list */
+ u8 bDel; /* True if the delete flag is set */
+};
+
+/*
+** Argument is a pointer to an Fts5Data structure that contains a
+** leaf page.
+*/
+#define ASSERT_SZLEAF_OK(x) assert( \
+ (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
+)
+
+#define FTS5_SEGITER_ONETERM 0x01
+#define FTS5_SEGITER_REVERSE 0x02
+
+/*
+** Argument is a pointer to an Fts5Data structure that contains a leaf
+** page. This macro evaluates to true if the leaf contains no terms, or
+** false if it contains at least one term.
+*/
+#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
+
+#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
+
+#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
+
+/*
+** Object for iterating through the merged results of one or more segments,
+** visiting each term/rowid pair in the merged data.
+**
+** nSeg is always a power of two greater than or equal to the number of
+** segments that this object is merging data from. Both the aSeg[] and
+** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
+** with zeroed objects - these are handled as if they were iterators opened
+** on empty segments.
+**
+** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
+** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
+** comparison in this context is the index of the iterator that currently
+** points to the smaller term/rowid combination. Iterators at EOF are
+** considered to be greater than all other iterators.
+**
+** aFirst[1] contains the index in aSeg[] of the iterator that points to
+** the smallest key overall. aFirst[0] is unused.
+**
+** poslist:
+** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
+** There is no way to tell if this is populated or not.
+*/
+struct Fts5Iter {
+ Fts5IndexIter base; /* Base class containing output vars */
+
+ Fts5Index *pIndex; /* Index that owns this iterator */
+ Fts5Structure *pStruct; /* Database structure for this iterator */
+ Fts5Buffer poslist; /* Buffer containing current poslist */
+ Fts5Colset *pColset; /* Restrict matches to these columns */
+
+ /* Invoked to set output variables. */
+ void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
+
+ int nSeg; /* Size of aSeg[] array */
+ int bRev; /* True to iterate in reverse order */
+ u8 bSkipEmpty; /* True to skip deleted entries */
+
+ i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
+ Fts5CResult *aFirst; /* Current merge state (see above) */
+ Fts5SegIter aSeg[1]; /* Array of segment iterators */
+};
+
+
+/*
+** An instance of the following type is used to iterate through the contents
+** of a doclist-index record.
+**
+** pData:
+** Record containing the doclist-index data.
+**
+** bEof:
+** Set to true once iterator has reached EOF.
+**
+** iOff:
+** Set to the current offset within record pData.
+*/
+struct Fts5DlidxLvl {
+ Fts5Data *pData; /* Data for current page of this level */
+ int iOff; /* Current offset into pData */
+ int bEof; /* At EOF already */
+ int iFirstOff; /* Used by reverse iterators */
+
+ /* Output variables */
+ int iLeafPgno; /* Page number of current leaf page */
+ i64 iRowid; /* First rowid on leaf iLeafPgno */
+};
+struct Fts5DlidxIter {
+ int nLvl;
+ int iSegid;
+ Fts5DlidxLvl aLvl[1];
+};
+
+static void fts5PutU16(u8 *aOut, u16 iVal){
+ aOut[0] = (iVal>>8);
+ aOut[1] = (iVal&0xFF);
+}
+
+static u16 fts5GetU16(const u8 *aIn){
+ return ((u16)aIn[0] << 8) + aIn[1];
+}
+
+/*
+** Allocate and return a buffer at least nByte bytes in size.
+**
+** If an OOM error is encountered, return NULL and set the error code in
+** the Fts5Index handle passed as the first argument.
+*/
+static void *fts5IdxMalloc(Fts5Index *p, int nByte){
+ return sqlite3Fts5MallocZero(&p->rc, nByte);
+}
+
+/*
+** Compare the contents of the pLeft buffer with the pRight/nRight blob.
+**
+** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
+** +ve if pRight is smaller than pLeft. In other words:
+**
+** res = *pLeft - *pRight
+*/
+#ifdef SQLITE_DEBUG
+static int fts5BufferCompareBlob(
+ Fts5Buffer *pLeft, /* Left hand side of comparison */
+ const u8 *pRight, int nRight /* Right hand side of comparison */
+){
+ int nCmp = MIN(pLeft->n, nRight);
+ int res = memcmp(pLeft->p, pRight, nCmp);
+ return (res==0 ? (pLeft->n - nRight) : res);
+}
+#endif
+
+/*
+** Compare the contents of the two buffers using memcmp(). If one buffer
+** is a prefix of the other, it is considered the lesser.
+**
+** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
+** +ve if pRight is smaller than pLeft. In other words:
+**
+** res = *pLeft - *pRight
+*/
+static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
+ int nCmp = MIN(pLeft->n, pRight->n);
+ int res = memcmp(pLeft->p, pRight->p, nCmp);
+ return (res==0 ? (pLeft->n - pRight->n) : res);
+}
+
+static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
+ int ret;
+ fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
+ return ret;
+}
+
+/*
+** Close the read-only blob handle, if it is open.
+*/
+static void fts5CloseReader(Fts5Index *p){
+ if( p->pReader ){
+ sqlite3_blob *pReader = p->pReader;
+ p->pReader = 0;
+ sqlite3_blob_close(pReader);
+ }
+}
+
+
+/*
+** Retrieve a record from the %_data table.
+**
+** If an error occurs, NULL is returned and an error left in the
+** Fts5Index object.
+*/
+static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
+ Fts5Data *pRet = 0;
+ if( p->rc==SQLITE_OK ){
+ int rc = SQLITE_OK;
+
+ if( p->pReader ){
+ /* This call may return SQLITE_ABORT if there has been a savepoint
+ ** rollback since it was last used. In this case a new blob handle
+ ** is required. */
+ sqlite3_blob *pBlob = p->pReader;
+ p->pReader = 0;
+ rc = sqlite3_blob_reopen(pBlob, iRowid);
+ assert( p->pReader==0 );
+ p->pReader = pBlob;
+ if( rc!=SQLITE_OK ){
+ fts5CloseReader(p);
+ }
+ if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
+ }
+
+ /* If the blob handle is not open at this point, open it and seek
+ ** to the requested entry. */
+ if( p->pReader==0 && rc==SQLITE_OK ){
+ Fts5Config *pConfig = p->pConfig;
+ rc = sqlite3_blob_open(pConfig->db,
+ pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
+ );
+ }
+
+ /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
+ ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
+ ** All the reasons those functions might return SQLITE_ERROR - missing
+ ** table, missing row, non-blob/text in block column - indicate
+ ** backing store corruption. */
+ if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
+
+ if( rc==SQLITE_OK ){
+ u8 *aOut = 0; /* Read blob data into this buffer */
+ int nByte = sqlite3_blob_bytes(p->pReader);
+ int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
+ pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
+ if( pRet ){
+ pRet->nn = nByte;
+ aOut = pRet->p = (u8*)&pRet[1];
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
+ }
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(pRet);
+ pRet = 0;
+ }else{
+ /* TODO1: Fix this */
+ pRet->szLeaf = fts5GetU16(&pRet->p[2]);
+ }
+ }
+ p->rc = rc;
+ p->nRead++;
+ }
+
+ assert( (pRet==0)==(p->rc!=SQLITE_OK) );
+ return pRet;
+}
+
+/*
+** Release a reference to data record returned by an earlier call to
+** fts5DataRead().
+*/
+static void fts5DataRelease(Fts5Data *pData){
+ sqlite3_free(pData);
+}
+
+static int fts5IndexPrepareStmt(
+ Fts5Index *p,
+ sqlite3_stmt **ppStmt,
+ char *zSql
+){
+ if( p->rc==SQLITE_OK ){
+ if( zSql ){
+ p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0);
+ }else{
+ p->rc = SQLITE_NOMEM;
+ }
+ }
+ sqlite3_free(zSql);
+ return p->rc;
+}
+
+
+/*
+** INSERT OR REPLACE a record into the %_data table.
+*/
+static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
+ if( p->rc!=SQLITE_OK ) return;
+
+ if( p->pWriter==0 ){
+ Fts5Config *pConfig = p->pConfig;
+ fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
+ "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
+ pConfig->zDb, pConfig->zName
+ ));
+ if( p->rc ) return;
+ }
+
+ sqlite3_bind_int64(p->pWriter, 1, iRowid);
+ sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
+ sqlite3_step(p->pWriter);
+ p->rc = sqlite3_reset(p->pWriter);
+}
+
+/*
+** Execute the following SQL:
+**
+** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
+*/
+static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
+ if( p->rc!=SQLITE_OK ) return;
+
+ if( p->pDeleter==0 ){
+ int rc;
+ Fts5Config *pConfig = p->pConfig;
+ char *zSql = sqlite3_mprintf(
+ "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
+ pConfig->zDb, pConfig->zName
+ );
+ if( zSql==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0);
+ sqlite3_free(zSql);
+ }
+ if( rc!=SQLITE_OK ){
+ p->rc = rc;
+ return;
+ }
+ }
+
+ sqlite3_bind_int64(p->pDeleter, 1, iFirst);
+ sqlite3_bind_int64(p->pDeleter, 2, iLast);
+ sqlite3_step(p->pDeleter);
+ p->rc = sqlite3_reset(p->pDeleter);
+}
+
+/*
+** Remove all records associated with segment iSegid.
+*/
+static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
+ i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
+ i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
+ fts5DataDelete(p, iFirst, iLast);
+ if( p->pIdxDeleter==0 ){
+ Fts5Config *pConfig = p->pConfig;
+ fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
+ "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
+ pConfig->zDb, pConfig->zName
+ ));
+ }
+ if( p->rc==SQLITE_OK ){
+ sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
+ sqlite3_step(p->pIdxDeleter);
+ p->rc = sqlite3_reset(p->pIdxDeleter);
+ }
+}
+
+/*
+** Release a reference to an Fts5Structure object returned by an earlier
+** call to fts5StructureRead() or fts5StructureDecode().
+*/
+static void fts5StructureRelease(Fts5Structure *pStruct){
+ if( pStruct && 0>=(--pStruct->nRef) ){
+ int i;
+ assert( pStruct->nRef==0 );
+ for(i=0; i<pStruct->nLevel; i++){
+ sqlite3_free(pStruct->aLevel[i].aSeg);
+ }
+ sqlite3_free(pStruct);
+ }
+}
+
+static void fts5StructureRef(Fts5Structure *pStruct){
+ pStruct->nRef++;
+}
+
+/*
+** Deserialize and return the structure record currently stored in serialized
+** form within buffer pData/nData.
+**
+** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
+** are over-allocated by one slot. This allows the structure contents
+** to be more easily edited.
+**
+** If an error occurs, *ppOut is set to NULL and an SQLite error code
+** returned. Otherwise, *ppOut is set to point to the new object and
+** SQLITE_OK returned.
+*/
+static int fts5StructureDecode(
+ const u8 *pData, /* Buffer containing serialized structure */
+ int nData, /* Size of buffer pData in bytes */
+ int *piCookie, /* Configuration cookie value */
+ Fts5Structure **ppOut /* OUT: Deserialized object */
+){
+ int rc = SQLITE_OK;
+ int i = 0;
+ int iLvl;
+ int nLevel = 0;
+ int nSegment = 0;
+ int nByte; /* Bytes of space to allocate at pRet */
+ Fts5Structure *pRet = 0; /* Structure object to return */
+
+ /* Grab the cookie value */
+ if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
+ i = 4;
+
+ /* Read the total number of levels and segments from the start of the
+ ** structure record. */
+ i += fts5GetVarint32(&pData[i], nLevel);
+ i += fts5GetVarint32(&pData[i], nSegment);
+ nByte = (
+ sizeof(Fts5Structure) + /* Main structure */
+ sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
+ );
+ pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
+
+ if( pRet ){
+ pRet->nRef = 1;
+ pRet->nLevel = nLevel;
+ pRet->nSegment = nSegment;
+ i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
+
+ for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
+ Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
+ int nTotal = 0;
+ int iSeg;
+
+ if( i>=nData ){
+ rc = FTS5_CORRUPT;
+ }else{
+ i += fts5GetVarint32(&pData[i], pLvl->nMerge);
+ i += fts5GetVarint32(&pData[i], nTotal);
+ assert( nTotal>=pLvl->nMerge );
+ pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
+ nTotal * sizeof(Fts5StructureSegment)
+ );
+ }
+
+ if( rc==SQLITE_OK ){
+ pLvl->nSeg = nTotal;
+ for(iSeg=0; iSeg<nTotal; iSeg++){
+ if( i>=nData ){
+ rc = FTS5_CORRUPT;
+ break;
+ }
+ i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
+ i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
+ i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
+ }
+ }
+ }
+ if( rc!=SQLITE_OK ){
+ fts5StructureRelease(pRet);
+ pRet = 0;
+ }
+ }
+
+ *ppOut = pRet;
+ return rc;
+}
+
+/*
+**
+*/
+static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
+ if( *pRc==SQLITE_OK ){
+ Fts5Structure *pStruct = *ppStruct;
+ int nLevel = pStruct->nLevel;
+ int nByte = (
+ sizeof(Fts5Structure) + /* Main structure */
+ sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
+ );
+
+ pStruct = sqlite3_realloc(pStruct, nByte);
+ if( pStruct ){
+ memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
+ pStruct->nLevel++;
+ *ppStruct = pStruct;
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+}
+
+/*
+** Extend level iLvl so that there is room for at least nExtra more
+** segments.
+*/
+static void fts5StructureExtendLevel(
+ int *pRc,
+ Fts5Structure *pStruct,
+ int iLvl,
+ int nExtra,
+ int bInsert
+){
+ if( *pRc==SQLITE_OK ){
+ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
+ Fts5StructureSegment *aNew;
+ int nByte;
+
+ nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
+ aNew = sqlite3_realloc(pLvl->aSeg, nByte);
+ if( aNew ){
+ if( bInsert==0 ){
+ memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
+ }else{
+ int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
+ memmove(&aNew[nExtra], aNew, nMove);
+ memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
+ }
+ pLvl->aSeg = aNew;
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+}
+
+/*
+** Read, deserialize and return the structure record.
+**
+** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
+** are over-allocated as described for function fts5StructureDecode()
+** above.
+**
+** If an error occurs, NULL is returned and an error code left in the
+** Fts5Index handle. If an error has already occurred when this function
+** is called, it is a no-op.
+*/
+static Fts5Structure *fts5StructureRead(Fts5Index *p){
+ Fts5Config *pConfig = p->pConfig;
+ Fts5Structure *pRet = 0; /* Object to return */
+ int iCookie; /* Configuration cookie */
+ Fts5Data *pData;
+
+ pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
+ if( p->rc ) return 0;
+ /* TODO: Do we need this if the leaf-index is appended? Probably... */
+ memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
+ p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
+ if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
+ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
+ }
+
+ fts5DataRelease(pData);
+ if( p->rc!=SQLITE_OK ){
+ fts5StructureRelease(pRet);
+ pRet = 0;
+ }
+ return pRet;
+}
+
+/*
+** Return the total number of segments in index structure pStruct. This
+** function is only ever used as part of assert() conditions.
+*/
+#ifdef SQLITE_DEBUG
+static int fts5StructureCountSegments(Fts5Structure *pStruct){
+ int nSegment = 0; /* Total number of segments */
+ if( pStruct ){
+ int iLvl; /* Used to iterate through levels */
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ nSegment += pStruct->aLevel[iLvl].nSeg;
+ }
+ }
+
+ return nSegment;
+}
+#endif
+
+#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
+ assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
+ memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
+ (pBuf)->n += nBlob; \
+}
+
+#define fts5BufferSafeAppendVarint(pBuf, iVal) { \
+ (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
+ assert( (pBuf)->nSpace>=(pBuf)->n ); \
+}
+
+
+/*
+** Serialize and store the "structure" record.
+**
+** If an error occurs, leave an error code in the Fts5Index object. If an
+** error has already occurred, this function is a no-op.
+*/
+static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
+ if( p->rc==SQLITE_OK ){
+ Fts5Buffer buf; /* Buffer to serialize record into */
+ int iLvl; /* Used to iterate through levels */
+ int iCookie; /* Cookie value to store */
+
+ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
+ memset(&buf, 0, sizeof(Fts5Buffer));
+
+ /* Append the current configuration cookie */
+ iCookie = p->pConfig->iCookie;
+ if( iCookie<0 ) iCookie = 0;
+
+ if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
+ sqlite3Fts5Put32(buf.p, iCookie);
+ buf.n = 4;
+ fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
+ fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
+ fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
+ }
+
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ int iSeg; /* Used to iterate through segments */
+ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
+ fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
+ fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
+ assert( pLvl->nMerge<=pLvl->nSeg );
+
+ for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
+ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
+ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
+ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
+ }
+ }
+
+ fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
+ fts5BufferFree(&buf);
+ }
+}
+
+#if 0
+static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
+static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
+ int rc = SQLITE_OK;
+ Fts5Buffer buf;
+ memset(&buf, 0, sizeof(buf));
+ fts5DebugStructure(&rc, &buf, pStruct);
+ fprintf(stdout, "%s: %s\n", zCaption, buf.p);
+ fflush(stdout);
+ fts5BufferFree(&buf);
+}
+#else
+# define fts5PrintStructure(x,y)
+#endif
+
+static int fts5SegmentSize(Fts5StructureSegment *pSeg){
+ return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
+}
+
+/*
+** Return a copy of index structure pStruct. Except, promote as many
+** segments as possible to level iPromote. If an OOM occurs, NULL is
+** returned.
+*/
+static void fts5StructurePromoteTo(
+ Fts5Index *p,
+ int iPromote,
+ int szPromote,
+ Fts5Structure *pStruct
+){
+ int il, is;
+ Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
+
+ if( pOut->nMerge==0 ){
+ for(il=iPromote+1; il<pStruct->nLevel; il++){
+ Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
+ if( pLvl->nMerge ) return;
+ for(is=pLvl->nSeg-1; is>=0; is--){
+ int sz = fts5SegmentSize(&pLvl->aSeg[is]);
+ if( sz>szPromote ) return;
+ fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
+ if( p->rc ) return;
+ memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
+ pOut->nSeg++;
+ pLvl->nSeg--;
+ }
+ }
+ }
+}
+
+/*
+** A new segment has just been written to level iLvl of index structure
+** pStruct. This function determines if any segments should be promoted
+** as a result. Segments are promoted in two scenarios:
+**
+** a) If the segment just written is smaller than one or more segments
+** within the previous populated level, it is promoted to the previous
+** populated level.
+**
+** b) If the segment just written is larger than the newest segment on
+** the next populated level, then that segment, and any other adjacent
+** segments that are also smaller than the one just written, are
+** promoted.
+**
+** If one or more segments are promoted, the structure object is updated
+** to reflect this.
+*/
+static void fts5StructurePromote(
+ Fts5Index *p, /* FTS5 backend object */
+ int iLvl, /* Index level just updated */
+ Fts5Structure *pStruct /* Index structure */
+){
+ if( p->rc==SQLITE_OK ){
+ int iTst;
+ int iPromote = -1;
+ int szPromote = 0; /* Promote anything this size or smaller */
+ Fts5StructureSegment *pSeg; /* Segment just written */
+ int szSeg; /* Size of segment just written */
+ int nSeg = pStruct->aLevel[iLvl].nSeg;
+
+ if( nSeg==0 ) return;
+ pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
+ szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
+
+ /* Check for condition (a) */
+ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
+ if( iTst>=0 ){
+ int i;
+ int szMax = 0;
+ Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
+ assert( pTst->nMerge==0 );
+ for(i=0; i<pTst->nSeg; i++){
+ int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
+ if( sz>szMax ) szMax = sz;
+ }
+ if( szMax>=szSeg ){
+ /* Condition (a) is true. Promote the newest segment on level
+ ** iLvl to level iTst. */
+ iPromote = iTst;
+ szPromote = szMax;
+ }
+ }
+
+ /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
+ ** is a no-op if it is not. */
+ if( iPromote<0 ){
+ iPromote = iLvl;
+ szPromote = szSeg;
+ }
+ fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
+ }
+}
+
+
+/*
+** Advance the iterator passed as the only argument. If the end of the
+** doclist-index page is reached, return non-zero.
+*/
+static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
+ Fts5Data *pData = pLvl->pData;
+
+ if( pLvl->iOff==0 ){
+ assert( pLvl->bEof==0 );
+ pLvl->iOff = 1;
+ pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
+ pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
+ pLvl->iFirstOff = pLvl->iOff;
+ }else{
+ int iOff;
+ for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
+ if( pData->p[iOff] ) break;
+ }
+
+ if( iOff<pData->nn ){
+ i64 iVal;
+ pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
+ iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
+ pLvl->iRowid += iVal;
+ pLvl->iOff = iOff;
+ }else{
+ pLvl->bEof = 1;
+ }
+ }
+
+ return pLvl->bEof;
+}
+
+/*
+** Advance the iterator passed as the only argument.
+*/
+static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
+ Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
+
+ assert( iLvl<pIter->nLvl );
+ if( fts5DlidxLvlNext(pLvl) ){
+ if( (iLvl+1) < pIter->nLvl ){
+ fts5DlidxIterNextR(p, pIter, iLvl+1);
+ if( pLvl[1].bEof==0 ){
+ fts5DataRelease(pLvl->pData);
+ memset(pLvl, 0, sizeof(Fts5DlidxLvl));
+ pLvl->pData = fts5DataRead(p,
+ FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
+ );
+ if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
+ }
+ }
+ }
+
+ return pIter->aLvl[0].bEof;
+}
+static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
+ return fts5DlidxIterNextR(p, pIter, 0);
+}
+
+/*
+** The iterator passed as the first argument has the following fields set
+** as follows. This function sets up the rest of the iterator so that it
+** points to the first rowid in the doclist-index.
+**
+** pData:
+** pointer to doclist-index record,
+**
+** When this function is called pIter->iLeafPgno is the page number the
+** doclist is associated with (the one featuring the term).
+*/
+static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
+ int i;
+ for(i=0; i<pIter->nLvl; i++){
+ fts5DlidxLvlNext(&pIter->aLvl[i]);
+ }
+ return pIter->aLvl[0].bEof;
+}
+
+
+static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
+ return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
+}
+
+static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
+ int i;
+
+ /* Advance each level to the last entry on the last page */
+ for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
+ Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
+ while( fts5DlidxLvlNext(pLvl)==0 );
+ pLvl->bEof = 0;
+
+ if( i>0 ){
+ Fts5DlidxLvl *pChild = &pLvl[-1];
+ fts5DataRelease(pChild->pData);
+ memset(pChild, 0, sizeof(Fts5DlidxLvl));
+ pChild->pData = fts5DataRead(p,
+ FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
+ );
+ }
+ }
+}
+
+/*
+** Move the iterator passed as the only argument to the previous entry.
+*/
+static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
+ int iOff = pLvl->iOff;
+
+ assert( pLvl->bEof==0 );
+ if( iOff<=pLvl->iFirstOff ){
+ pLvl->bEof = 1;
+ }else{
+ u8 *a = pLvl->pData->p;
+ i64 iVal;
+ int iLimit;
+ int ii;
+ int nZero = 0;
+
+ /* Currently iOff points to the first byte of a varint. This block
+ ** decrements iOff until it points to the first byte of the previous
+ ** varint. Taking care not to read any memory locations that occur
+ ** before the buffer in memory. */
+ iLimit = (iOff>9 ? iOff-9 : 0);
+ for(iOff--; iOff>iLimit; iOff--){
+ if( (a[iOff-1] & 0x80)==0 ) break;
+ }
+
+ fts5GetVarint(&a[iOff], (u64*)&iVal);
+ pLvl->iRowid -= iVal;
+ pLvl->iLeafPgno--;
+
+ /* Skip backwards past any 0x00 varints. */
+ for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
+ nZero++;
+ }
+ if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
+ /* The byte immediately before the last 0x00 byte has the 0x80 bit
+ ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
+ ** bytes before a[ii]. */
+ int bZero = 0; /* True if last 0x00 counts */
+ if( (ii-8)>=pLvl->iFirstOff ){
+ int j;
+ for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
+ bZero = (j>8);
+ }
+ if( bZero==0 ) nZero--;
+ }
+ pLvl->iLeafPgno -= nZero;
+ pLvl->iOff = iOff - nZero;
+ }
+
+ return pLvl->bEof;
+}
+
+static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
+ Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
+
+ assert( iLvl<pIter->nLvl );
+ if( fts5DlidxLvlPrev(pLvl) ){
+ if( (iLvl+1) < pIter->nLvl ){
+ fts5DlidxIterPrevR(p, pIter, iLvl+1);
+ if( pLvl[1].bEof==0 ){
+ fts5DataRelease(pLvl->pData);
+ memset(pLvl, 0, sizeof(Fts5DlidxLvl));
+ pLvl->pData = fts5DataRead(p,
+ FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
+ );
+ if( pLvl->pData ){
+ while( fts5DlidxLvlNext(pLvl)==0 );
+ pLvl->bEof = 0;
+ }
+ }
+ }
+ }
+
+ return pIter->aLvl[0].bEof;
+}
+static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
+ return fts5DlidxIterPrevR(p, pIter, 0);
+}
+
+/*
+** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
+*/
+static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
+ if( pIter ){
+ int i;
+ for(i=0; i<pIter->nLvl; i++){
+ fts5DataRelease(pIter->aLvl[i].pData);
+ }
+ sqlite3_free(pIter);
+ }
+}
+
+static Fts5DlidxIter *fts5DlidxIterInit(
+ Fts5Index *p, /* Fts5 Backend to iterate within */
+ int bRev, /* True for ORDER BY ASC */
+ int iSegid, /* Segment id */
+ int iLeafPg /* Leaf page number to load dlidx for */
+){
+ Fts5DlidxIter *pIter = 0;
+ int i;
+ int bDone = 0;
+
+ for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
+ int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
+ Fts5DlidxIter *pNew;
+
+ pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
+ if( pNew==0 ){
+ p->rc = SQLITE_NOMEM;
+ }else{
+ i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
+ Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
+ pIter = pNew;
+ memset(pLvl, 0, sizeof(Fts5DlidxLvl));
+ pLvl->pData = fts5DataRead(p, iRowid);
+ if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
+ bDone = 1;
+ }
+ pIter->nLvl = i+1;
+ }
+ }
+
+ if( p->rc==SQLITE_OK ){
+ pIter->iSegid = iSegid;
+ if( bRev==0 ){
+ fts5DlidxIterFirst(pIter);
+ }else{
+ fts5DlidxIterLast(p, pIter);
+ }
+ }
+
+ if( p->rc!=SQLITE_OK ){
+ fts5DlidxIterFree(pIter);
+ pIter = 0;
+ }
+
+ return pIter;
+}
+
+static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
+ return pIter->aLvl[0].iRowid;
+}
+static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
+ return pIter->aLvl[0].iLeafPgno;
+}
+
+/*
+** Load the next leaf page into the segment iterator.
+*/
+static void fts5SegIterNextPage(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter /* Iterator to advance to next page */
+){
+ Fts5Data *pLeaf;
+ Fts5StructureSegment *pSeg = pIter->pSeg;
+ fts5DataRelease(pIter->pLeaf);
+ pIter->iLeafPgno++;
+ if( pIter->pNextLeaf ){
+ pIter->pLeaf = pIter->pNextLeaf;
+ pIter->pNextLeaf = 0;
+ }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
+ pIter->pLeaf = fts5DataRead(p,
+ FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
+ );
+ }else{
+ pIter->pLeaf = 0;
+ }
+ pLeaf = pIter->pLeaf;
+
+ if( pLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf;
+ if( fts5LeafIsTermless(pLeaf) ){
+ pIter->iEndofDoclist = pLeaf->nn+1;
+ }else{
+ pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
+ pIter->iEndofDoclist
+ );
+ }
+ }
+}
+
+/*
+** Argument p points to a buffer containing a varint to be interpreted as a
+** position list size field. Read the varint and return the number of bytes
+** read. Before returning, set *pnSz to the number of bytes in the position
+** list, and *pbDel to true if the delete flag is set, or false otherwise.
+*/
+static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
+ int nSz;
+ int n = 0;
+ fts5FastGetVarint32(p, n, nSz);
+ assert_nc( nSz>=0 );
+ *pnSz = nSz/2;
+ *pbDel = nSz & 0x0001;
+ return n;
+}
+
+/*
+** Fts5SegIter.iLeafOffset currently points to the first byte of a
+** position-list size field. Read the value of the field and store it
+** in the following variables:
+**
+** Fts5SegIter.nPos
+** Fts5SegIter.bDel
+**
+** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
+** position list content (if any).
+*/
+static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
+ if( p->rc==SQLITE_OK ){
+ int iOff = pIter->iLeafOffset; /* Offset to read at */
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
+ int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
+ pIter->bDel = 0;
+ pIter->nPos = 1;
+ if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
+ pIter->bDel = 1;
+ iOff++;
+ if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
+ pIter->nPos = 1;
+ iOff++;
+ }else{
+ pIter->nPos = 0;
+ }
+ }
+ }else{
+ int nSz;
+ fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
+ pIter->bDel = (nSz & 0x0001);
+ pIter->nPos = nSz>>1;
+ assert_nc( pIter->nPos>=0 );
+ }
+ pIter->iLeafOffset = iOff;
+ }
+}
+
+static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
+ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
+ int iOff = pIter->iLeafOffset;
+
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ if( iOff>=pIter->pLeaf->szLeaf ){
+ fts5SegIterNextPage(p, pIter);
+ if( pIter->pLeaf==0 ){
+ if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
+ return;
+ }
+ iOff = 4;
+ a = pIter->pLeaf->p;
+ }
+ iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
+ pIter->iLeafOffset = iOff;
+}
+
+/*
+** Fts5SegIter.iLeafOffset currently points to the first byte of the
+** "nSuffix" field of a term. Function parameter nKeep contains the value
+** of the "nPrefix" field (if there was one - it is passed 0 if this is
+** the first term in the segment).
+**
+** This function populates:
+**
+** Fts5SegIter.term
+** Fts5SegIter.rowid
+**
+** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
+** the first position list. The position list belonging to document
+** (Fts5SegIter.iRowid).
+*/
+static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
+ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
+ int iOff = pIter->iLeafOffset; /* Offset to read at */
+ int nNew; /* Bytes of new data */
+
+ iOff += fts5GetVarint32(&a[iOff], nNew);
+ if( iOff+nNew>pIter->pLeaf->nn ){
+ p->rc = FTS5_CORRUPT;
+ return;
+ }
+ pIter->term.n = nKeep;
+ fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
+ iOff += nNew;
+ pIter->iTermLeafOffset = iOff;
+ pIter->iTermLeafPgno = pIter->iLeafPgno;
+ pIter->iLeafOffset = iOff;
+
+ if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
+ }else{
+ int nExtra;
+ pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
+ pIter->iEndofDoclist += nExtra;
+ }
+
+ fts5SegIterLoadRowid(p, pIter);
+}
+
+static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
+static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
+static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
+
+static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
+ if( pIter->flags & FTS5_SEGITER_REVERSE ){
+ pIter->xNext = fts5SegIterNext_Reverse;
+ }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
+ pIter->xNext = fts5SegIterNext_None;
+ }else{
+ pIter->xNext = fts5SegIterNext;
+ }
+}
+
+/*
+** Initialize the iterator object pIter to iterate through the entries in
+** segment pSeg. The iterator is left pointing to the first entry when
+** this function returns.
+**
+** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
+** an error has already occurred when this function is called, it is a no-op.
+*/
+static void fts5SegIterInit(
+ Fts5Index *p, /* FTS index object */
+ Fts5StructureSegment *pSeg, /* Description of segment */
+ Fts5SegIter *pIter /* Object to populate */
+){
+ if( pSeg->pgnoFirst==0 ){
+ /* This happens if the segment is being used as an input to an incremental
+ ** merge and all data has already been "trimmed". See function
+ ** fts5TrimSegments() for details. In this case leave the iterator empty.
+ ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
+ ** at EOF already. */
+ assert( pIter->pLeaf==0 );
+ return;
+ }
+
+ if( p->rc==SQLITE_OK ){
+ memset(pIter, 0, sizeof(*pIter));
+ fts5SegIterSetNext(p, pIter);
+ pIter->pSeg = pSeg;
+ pIter->iLeafPgno = pSeg->pgnoFirst-1;
+ fts5SegIterNextPage(p, pIter);
+ }
+
+ if( p->rc==SQLITE_OK ){
+ pIter->iLeafOffset = 4;
+ assert_nc( pIter->pLeaf->nn>4 );
+ assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
+ pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
+ fts5SegIterLoadTerm(p, pIter, 0);
+ fts5SegIterLoadNPos(p, pIter);
+ }
+}
+
+/*
+** This function is only ever called on iterators created by calls to
+** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
+**
+** The iterator is in an unusual state when this function is called: the
+** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
+** the position-list size field for the first relevant rowid on the page.
+** Fts5SegIter.rowid is set, but nPos and bDel are not.
+**
+** This function advances the iterator so that it points to the last
+** relevant rowid on the page and, if necessary, initializes the
+** aRowidOffset[] and iRowidOffset variables. At this point the iterator
+** is in its regular state - Fts5SegIter.iLeafOffset points to the first
+** byte of the position list content associated with said rowid.
+*/
+static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
+ int eDetail = p->pConfig->eDetail;
+ int n = pIter->pLeaf->szLeaf;
+ int i = pIter->iLeafOffset;
+ u8 *a = pIter->pLeaf->p;
+ int iRowidOffset = 0;
+
+ if( n>pIter->iEndofDoclist ){
+ n = pIter->iEndofDoclist;
+ }
+
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ while( 1 ){
+ i64 iDelta = 0;
+
+ if( eDetail==FTS5_DETAIL_NONE ){
+ /* todo */
+ if( i<n && a[i]==0 ){
+ i++;
+ if( i<n && a[i]==0 ) i++;
+ }
+ }else{
+ int nPos;
+ int bDummy;
+ i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
+ i += nPos;
+ }
+ if( i>=n ) break;
+ i += fts5GetVarint(&a[i], (u64*)&iDelta);
+ pIter->iRowid += iDelta;
+
+ /* If necessary, grow the pIter->aRowidOffset[] array. */
+ if( iRowidOffset>=pIter->nRowidOffset ){
+ int nNew = pIter->nRowidOffset + 8;
+ int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
+ if( aNew==0 ){
+ p->rc = SQLITE_NOMEM;
+ break;
+ }
+ pIter->aRowidOffset = aNew;
+ pIter->nRowidOffset = nNew;
+ }
+
+ pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
+ pIter->iLeafOffset = i;
+ }
+ pIter->iRowidOffset = iRowidOffset;
+ fts5SegIterLoadNPos(p, pIter);
+}
+
+/*
+**
+*/
+static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
+ assert( pIter->flags & FTS5_SEGITER_REVERSE );
+ assert( pIter->flags & FTS5_SEGITER_ONETERM );
+
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+ while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
+ Fts5Data *pNew;
+ pIter->iLeafPgno--;
+ pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
+ pIter->pSeg->iSegid, pIter->iLeafPgno
+ ));
+ if( pNew ){
+ /* iTermLeafOffset may be equal to szLeaf if the term is the last
+ ** thing on the page - i.e. the first rowid is on the following page.
+ ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
+ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
+ assert( pIter->pLeaf==0 );
+ if( pIter->iTermLeafOffset<pNew->szLeaf ){
+ pIter->pLeaf = pNew;
+ pIter->iLeafOffset = pIter->iTermLeafOffset;
+ }
+ }else{
+ int iRowidOff;
+ iRowidOff = fts5LeafFirstRowidOff(pNew);
+ if( iRowidOff ){
+ pIter->pLeaf = pNew;
+ pIter->iLeafOffset = iRowidOff;
+ }
+ }
+
+ if( pIter->pLeaf ){
+ u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
+ pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
+ break;
+ }else{
+ fts5DataRelease(pNew);
+ }
+ }
+ }
+
+ if( pIter->pLeaf ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
+ fts5SegIterReverseInitPage(p, pIter);
+ }
+}
+
+/*
+** Return true if the iterator passed as the second argument currently
+** points to a delete marker. A delete marker is an entry with a 0 byte
+** position-list.
+*/
+static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
+ Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
+ return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
+}
+
+/*
+** Advance iterator pIter to the next entry.
+**
+** This version of fts5SegIterNext() is only used by reverse iterators.
+*/
+static void fts5SegIterNext_Reverse(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter, /* Iterator to advance */
+ int *pbUnused /* Unused */
+){
+ assert( pIter->flags & FTS5_SEGITER_REVERSE );
+ assert( pIter->pNextLeaf==0 );
+ UNUSED_PARAM(pbUnused);
+
+ if( pIter->iRowidOffset>0 ){
+ u8 *a = pIter->pLeaf->p;
+ int iOff;
+ i64 iDelta;
+
+ pIter->iRowidOffset--;
+ pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
+ fts5SegIterLoadNPos(p, pIter);
+ iOff = pIter->iLeafOffset;
+ if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
+ iOff += pIter->nPos;
+ }
+ fts5GetVarint(&a[iOff], (u64*)&iDelta);
+ pIter->iRowid -= iDelta;
+ }else{
+ fts5SegIterReverseNewPage(p, pIter);
+ }
+}
+
+/*
+** Advance iterator pIter to the next entry.
+**
+** This version of fts5SegIterNext() is only used if detail=none and the
+** iterator is not a reverse direction iterator.
+*/
+static void fts5SegIterNext_None(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter, /* Iterator to advance */
+ int *pbNewTerm /* OUT: Set for new term */
+){
+ int iOff;
+
+ assert( p->rc==SQLITE_OK );
+ assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
+ assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
+
+ ASSERT_SZLEAF_OK(pIter->pLeaf);
+ iOff = pIter->iLeafOffset;
+
+ /* Next entry is on the next page */
+ if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
+ fts5SegIterNextPage(p, pIter);
+ if( p->rc || pIter->pLeaf==0 ) return;
+ pIter->iRowid = 0;
+ iOff = 4;
+ }
+
+ if( iOff<pIter->iEndofDoclist ){
+ /* Next entry is on the current page */
+ i64 iDelta;
+ iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
+ pIter->iLeafOffset = iOff;
+ pIter->iRowid += iDelta;
+ }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
+ if( pIter->pSeg ){
+ int nKeep = 0;
+ if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
+ iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
+ }
+ pIter->iLeafOffset = iOff;
+ fts5SegIterLoadTerm(p, pIter, nKeep);
+ }else{
+ const u8 *pList = 0;
+ const char *zTerm = 0;
+ int nList;
+ sqlite3Fts5HashScanNext(p->pHash);
+ sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
+ if( pList==0 ) goto next_none_eof;
+ pIter->pLeaf->p = (u8*)pList;
+ pIter->pLeaf->nn = nList;
+ pIter->pLeaf->szLeaf = nList;
+ pIter->iEndofDoclist = nList;
+ sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
+ pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
+ }
+
+ if( pbNewTerm ) *pbNewTerm = 1;
+ }else{
+ goto next_none_eof;
+ }
+
+ fts5SegIterLoadNPos(p, pIter);
+
+ return;
+ next_none_eof:
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+}
+
+
+/*
+** Advance iterator pIter to the next entry.
+**
+** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
+** is not considered an error if the iterator reaches EOF. If an error has
+** already occurred when this function is called, it is a no-op.
+*/
+static void fts5SegIterNext(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter, /* Iterator to advance */
+ int *pbNewTerm /* OUT: Set for new term */
+){
+ Fts5Data *pLeaf = pIter->pLeaf;
+ int iOff;
+ int bNewTerm = 0;
+ int nKeep = 0;
+ u8 *a;
+ int n;
+
+ assert( pbNewTerm==0 || *pbNewTerm==0 );
+ assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
+
+ /* Search for the end of the position list within the current page. */
+ a = pLeaf->p;
+ n = pLeaf->szLeaf;
+
+ ASSERT_SZLEAF_OK(pLeaf);
+ iOff = pIter->iLeafOffset + pIter->nPos;
+
+ if( iOff<n ){
+ /* The next entry is on the current page. */
+ assert_nc( iOff<=pIter->iEndofDoclist );
+ if( iOff>=pIter->iEndofDoclist ){
+ bNewTerm = 1;
+ if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
+ iOff += fts5GetVarint32(&a[iOff], nKeep);
+ }
+ }else{
+ u64 iDelta;
+ iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
+ pIter->iRowid += iDelta;
+ assert_nc( iDelta>0 );
+ }
+ pIter->iLeafOffset = iOff;
+
+ }else if( pIter->pSeg==0 ){
+ const u8 *pList = 0;
+ const char *zTerm = 0;
+ int nList = 0;
+ assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
+ if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
+ sqlite3Fts5HashScanNext(p->pHash);
+ sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
+ }
+ if( pList==0 ){
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+ }else{
+ pIter->pLeaf->p = (u8*)pList;
+ pIter->pLeaf->nn = nList;
+ pIter->pLeaf->szLeaf = nList;
+ pIter->iEndofDoclist = nList+1;
+ sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
+ (u8*)zTerm);
+ pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
+ *pbNewTerm = 1;
+ }
+ }else{
+ iOff = 0;
+ /* Next entry is not on the current page */
+ while( iOff==0 ){
+ fts5SegIterNextPage(p, pIter);
+ pLeaf = pIter->pLeaf;
+ if( pLeaf==0 ) break;
+ ASSERT_SZLEAF_OK(pLeaf);
+ if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
+ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
+ pIter->iLeafOffset = iOff;
+
+ if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
+ );
+ }
+
+ }
+ else if( pLeaf->nn>pLeaf->szLeaf ){
+ pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
+ &pLeaf->p[pLeaf->szLeaf], iOff
+ );
+ pIter->iLeafOffset = iOff;
+ pIter->iEndofDoclist = iOff;
+ bNewTerm = 1;
+ }
+ assert_nc( iOff<pLeaf->szLeaf );
+ if( iOff>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ return;
+ }
+ }
+ }
+
+ /* Check if the iterator is now at EOF. If so, return early. */
+ if( pIter->pLeaf ){
+ if( bNewTerm ){
+ if( pIter->flags & FTS5_SEGITER_ONETERM ){
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+ }else{
+ fts5SegIterLoadTerm(p, pIter, nKeep);
+ fts5SegIterLoadNPos(p, pIter);
+ if( pbNewTerm ) *pbNewTerm = 1;
+ }
+ }else{
+ /* The following could be done by calling fts5SegIterLoadNPos(). But
+ ** this block is particularly performance critical, so equivalent
+ ** code is inlined.
+ **
+ ** Later: Switched back to fts5SegIterLoadNPos() because it supports
+ ** detail=none mode. Not ideal.
+ */
+ int nSz;
+ assert( p->rc==SQLITE_OK );
+ fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
+ pIter->bDel = (nSz & 0x0001);
+ pIter->nPos = nSz>>1;
+ assert_nc( pIter->nPos>=0 );
+ }
+ }
+}
+
+#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
+
+#define fts5IndexSkipVarint(a, iOff) { \
+ int iEnd = iOff+9; \
+ while( (a[iOff++] & 0x80) && iOff<iEnd ); \
+}
+
+/*
+** Iterator pIter currently points to the first rowid in a doclist. This
+** function sets the iterator up so that iterates in reverse order through
+** the doclist.
+*/
+static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
+ Fts5DlidxIter *pDlidx = pIter->pDlidx;
+ Fts5Data *pLast = 0;
+ int pgnoLast = 0;
+
+ if( pDlidx ){
+ int iSegid = pIter->pSeg->iSegid;
+ pgnoLast = fts5DlidxIterPgno(pDlidx);
+ pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
+ }else{
+ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
+
+ /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
+ ** position-list content for the current rowid. Back it up so that it
+ ** points to the start of the position-list size field. */
+ int iPoslist;
+ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
+ iPoslist = pIter->iTermLeafOffset;
+ }else{
+ iPoslist = 4;
+ }
+ fts5IndexSkipVarint(pLeaf->p, iPoslist);
+ pIter->iLeafOffset = iPoslist;
+
+ /* If this condition is true then the largest rowid for the current
+ ** term may not be stored on the current page. So search forward to
+ ** see where said rowid really is. */
+ if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
+ int pgno;
+ Fts5StructureSegment *pSeg = pIter->pSeg;
+
+ /* The last rowid in the doclist may not be on the current page. Search
+ ** forward to find the page containing the last rowid. */
+ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
+ i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
+ Fts5Data *pNew = fts5DataRead(p, iAbs);
+ if( pNew ){
+ int iRowid, bTermless;
+ iRowid = fts5LeafFirstRowidOff(pNew);
+ bTermless = fts5LeafIsTermless(pNew);
+ if( iRowid ){
+ SWAPVAL(Fts5Data*, pNew, pLast);
+ pgnoLast = pgno;
+ }
+ fts5DataRelease(pNew);
+ if( bTermless==0 ) break;
+ }
+ }
+ }
+ }
+
+ /* If pLast is NULL at this point, then the last rowid for this doclist
+ ** lies on the page currently indicated by the iterator. In this case
+ ** pIter->iLeafOffset is already set to point to the position-list size
+ ** field associated with the first relevant rowid on the page.
+ **
+ ** Or, if pLast is non-NULL, then it is the page that contains the last
+ ** rowid. In this case configure the iterator so that it points to the
+ ** first rowid on this page.
+ */
+ if( pLast ){
+ int iOff;
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = pLast;
+ pIter->iLeafPgno = pgnoLast;
+ iOff = fts5LeafFirstRowidOff(pLast);
+ iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
+ pIter->iLeafOffset = iOff;
+
+ if( fts5LeafIsTermless(pLast) ){
+ pIter->iEndofDoclist = pLast->nn+1;
+ }else{
+ pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
+ }
+
+ }
+
+ fts5SegIterReverseInitPage(p, pIter);
+}
+
+/*
+** Iterator pIter currently points to the first rowid of a doclist.
+** There is a doclist-index associated with the final term on the current
+** page. If the current term is the last term on the page, load the
+** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
+*/
+static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
+ int iSeg = pIter->pSeg->iSegid;
+ int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
+ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
+
+ assert( pIter->flags & FTS5_SEGITER_ONETERM );
+ assert( pIter->pDlidx==0 );
+
+ /* Check if the current doclist ends on this page. If it does, return
+ ** early without loading the doclist-index (as it belongs to a different
+ ** term. */
+ if( pIter->iTermLeafPgno==pIter->iLeafPgno
+ && pIter->iEndofDoclist<pLeaf->szLeaf
+ ){
+ return;
+ }
+
+ pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
+}
+
+/*
+** The iterator object passed as the second argument currently contains
+** no valid values except for the Fts5SegIter.pLeaf member variable. This
+** function searches the leaf page for a term matching (pTerm/nTerm).
+**
+** If the specified term is found on the page, then the iterator is left
+** pointing to it. If argument bGe is zero and the term is not found,
+** the iterator is left pointing at EOF.
+**
+** If bGe is non-zero and the specified term is not found, then the
+** iterator is left pointing to the smallest term in the segment that
+** is larger than the specified term, even if this term is not on the
+** current page.
+*/
+static void fts5LeafSeek(
+ Fts5Index *p, /* Leave any error code here */
+ int bGe, /* True for a >= search */
+ Fts5SegIter *pIter, /* Iterator to seek */
+ const u8 *pTerm, int nTerm /* Term to search for */
+){
+ int iOff;
+ const u8 *a = pIter->pLeaf->p;
+ int szLeaf = pIter->pLeaf->szLeaf;
+ int n = pIter->pLeaf->nn;
+
+ int nMatch = 0;
+ int nKeep = 0;
+ int nNew = 0;
+ int iTermOff;
+ int iPgidx; /* Current offset in pgidx */
+ int bEndOfPage = 0;
+
+ assert( p->rc==SQLITE_OK );
+
+ iPgidx = szLeaf;
+ iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
+ iOff = iTermOff;
+
+ while( 1 ){
+
+ /* Figure out how many new bytes are in this term */
+ fts5FastGetVarint32(a, iOff, nNew);
+ if( nKeep<nMatch ){
+ goto search_failed;
+ }
+
+ assert( nKeep>=nMatch );
+ if( nKeep==nMatch ){
+ int nCmp;
+ int i;
+ nCmp = MIN(nNew, nTerm-nMatch);
+ for(i=0; i<nCmp; i++){
+ if( a[iOff+i]!=pTerm[nMatch+i] ) break;
+ }
+ nMatch += i;
+
+ if( nTerm==nMatch ){
+ if( i==nNew ){
+ goto search_success;
+ }else{
+ goto search_failed;
+ }
+ }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
+ goto search_failed;
+ }
+ }
+
+ if( iPgidx>=n ){
+ bEndOfPage = 1;
+ break;
+ }
+
+ iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
+ iTermOff += nKeep;
+ iOff = iTermOff;
+
+ /* Read the nKeep field of the next term. */
+ fts5FastGetVarint32(a, iOff, nKeep);
+ }
+
+ search_failed:
+ if( bGe==0 ){
+ fts5DataRelease(pIter->pLeaf);
+ pIter->pLeaf = 0;
+ return;
+ }else if( bEndOfPage ){
+ do {
+ fts5SegIterNextPage(p, pIter);
+ if( pIter->pLeaf==0 ) return;
+ a = pIter->pLeaf->p;
+ if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
+ iPgidx = pIter->pLeaf->szLeaf;
+ iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
+ if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ nKeep = 0;
+ iTermOff = iOff;
+ n = pIter->pLeaf->nn;
+ iOff += fts5GetVarint32(&a[iOff], nNew);
+ break;
+ }
+ }
+ }while( 1 );
+ }
+
+ search_success:
+
+ pIter->iLeafOffset = iOff + nNew;
+ pIter->iTermLeafOffset = pIter->iLeafOffset;
+ pIter->iTermLeafPgno = pIter->iLeafPgno;
+
+ fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
+ fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
+
+ if( iPgidx>=n ){
+ pIter->iEndofDoclist = pIter->pLeaf->nn+1;
+ }else{
+ int nExtra;
+ iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
+ pIter->iEndofDoclist = iTermOff + nExtra;
+ }
+ pIter->iPgidxOff = iPgidx;
+
+ fts5SegIterLoadRowid(p, pIter);
+ fts5SegIterLoadNPos(p, pIter);
+}
+
+/*
+** Initialize the object pIter to point to term pTerm/nTerm within segment
+** pSeg. If there is no such term in the index, the iterator is set to EOF.
+**
+** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
+** an error has already occurred when this function is called, it is a no-op.
+*/
+static void fts5SegIterSeekInit(
+ Fts5Index *p, /* FTS5 backend */
+ const u8 *pTerm, int nTerm, /* Term to seek to */
+ int flags, /* Mask of FTS5INDEX_XXX flags */
+ Fts5StructureSegment *pSeg, /* Description of segment */
+ Fts5SegIter *pIter /* Object to populate */
+){
+ int iPg = 1;
+ int bGe = (flags & FTS5INDEX_QUERY_SCAN);
+ int bDlidx = 0; /* True if there is a doclist-index */
+
+ assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
+ assert( pTerm && nTerm );
+ memset(pIter, 0, sizeof(*pIter));
+ pIter->pSeg = pSeg;
+
+ /* This block sets stack variable iPg to the leaf page number that may
+ ** contain term (pTerm/nTerm), if it is present in the segment. */
+ if( p->pIdxSelect==0 ){
+ Fts5Config *pConfig = p->pConfig;
+ fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
+ "SELECT pgno FROM '%q'.'%q_idx' WHERE "
+ "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
+ pConfig->zDb, pConfig->zName
+ ));
+ }
+ if( p->rc ) return;
+ sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid);
+ sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
+ if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){
+ i64 val = sqlite3_column_int(p->pIdxSelect, 0);
+ iPg = (int)(val>>1);
+ bDlidx = (val & 0x0001);
+ }
+ p->rc = sqlite3_reset(p->pIdxSelect);
+
+ if( iPg<pSeg->pgnoFirst ){
+ iPg = pSeg->pgnoFirst;
+ bDlidx = 0;
+ }
+
+ pIter->iLeafPgno = iPg - 1;
+ fts5SegIterNextPage(p, pIter);
+
+ if( pIter->pLeaf ){
+ fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
+ }
+
+ if( p->rc==SQLITE_OK && bGe==0 ){
+ pIter->flags |= FTS5_SEGITER_ONETERM;
+ if( pIter->pLeaf ){
+ if( flags & FTS5INDEX_QUERY_DESC ){
+ pIter->flags |= FTS5_SEGITER_REVERSE;
+ }
+ if( bDlidx ){
+ fts5SegIterLoadDlidx(p, pIter);
+ }
+ if( flags & FTS5INDEX_QUERY_DESC ){
+ fts5SegIterReverse(p, pIter);
+ }
+ }
+ }
+
+ fts5SegIterSetNext(p, pIter);
+
+ /* Either:
+ **
+ ** 1) an error has occurred, or
+ ** 2) the iterator points to EOF, or
+ ** 3) the iterator points to an entry with term (pTerm/nTerm), or
+ ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
+ ** to an entry with a term greater than or equal to (pTerm/nTerm).
+ */
+ assert( p->rc!=SQLITE_OK /* 1 */
+ || pIter->pLeaf==0 /* 2 */
+ || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
+ || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
+ );
+}
+
+/*
+** Initialize the object pIter to point to term pTerm/nTerm within the
+** in-memory hash table. If there is no such term in the hash-table, the
+** iterator is set to EOF.
+**
+** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
+** an error has already occurred when this function is called, it is a no-op.
+*/
+static void fts5SegIterHashInit(
+ Fts5Index *p, /* FTS5 backend */
+ const u8 *pTerm, int nTerm, /* Term to seek to */
+ int flags, /* Mask of FTS5INDEX_XXX flags */
+ Fts5SegIter *pIter /* Object to populate */
+){
+ const u8 *pList = 0;
+ int nList = 0;
+ const u8 *z = 0;
+ int n = 0;
+
+ assert( p->pHash );
+ assert( p->rc==SQLITE_OK );
+
+ if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
+ p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
+ sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
+ n = (z ? (int)strlen((const char*)z) : 0);
+ }else{
+ pIter->flags |= FTS5_SEGITER_ONETERM;
+ sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList);
+ z = pTerm;
+ n = nTerm;
+ }
+
+ if( pList ){
+ Fts5Data *pLeaf;
+ sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
+ pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
+ if( pLeaf==0 ) return;
+ pLeaf->p = (u8*)pList;
+ pLeaf->nn = pLeaf->szLeaf = nList;
+ pIter->pLeaf = pLeaf;
+ pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
+ pIter->iEndofDoclist = pLeaf->nn;
+
+ if( flags & FTS5INDEX_QUERY_DESC ){
+ pIter->flags |= FTS5_SEGITER_REVERSE;
+ fts5SegIterReverseInitPage(p, pIter);
+ }else{
+ fts5SegIterLoadNPos(p, pIter);
+ }
+ }
+
+ fts5SegIterSetNext(p, pIter);
+}
+
+/*
+** Zero the iterator passed as the only argument.
+*/
+static void fts5SegIterClear(Fts5SegIter *pIter){
+ fts5BufferFree(&pIter->term);
+ fts5DataRelease(pIter->pLeaf);
+ fts5DataRelease(pIter->pNextLeaf);
+ fts5DlidxIterFree(pIter->pDlidx);
+ sqlite3_free(pIter->aRowidOffset);
+ memset(pIter, 0, sizeof(Fts5SegIter));
+}
+
+#ifdef SQLITE_DEBUG
+
+/*
+** This function is used as part of the big assert() procedure implemented by
+** fts5AssertMultiIterSetup(). It ensures that the result currently stored
+** in *pRes is the correct result of comparing the current positions of the
+** two iterators.
+*/
+static void fts5AssertComparisonResult(
+ Fts5Iter *pIter,
+ Fts5SegIter *p1,
+ Fts5SegIter *p2,
+ Fts5CResult *pRes
+){
+ int i1 = p1 - pIter->aSeg;
+ int i2 = p2 - pIter->aSeg;
+
+ if( p1->pLeaf || p2->pLeaf ){
+ if( p1->pLeaf==0 ){
+ assert( pRes->iFirst==i2 );
+ }else if( p2->pLeaf==0 ){
+ assert( pRes->iFirst==i1 );
+ }else{
+ int nMin = MIN(p1->term.n, p2->term.n);
+ int res = memcmp(p1->term.p, p2->term.p, nMin);
+ if( res==0 ) res = p1->term.n - p2->term.n;
+
+ if( res==0 ){
+ assert( pRes->bTermEq==1 );
+ assert( p1->iRowid!=p2->iRowid );
+ res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
+ }else{
+ assert( pRes->bTermEq==0 );
+ }
+
+ if( res<0 ){
+ assert( pRes->iFirst==i1 );
+ }else{
+ assert( pRes->iFirst==i2 );
+ }
+ }
+ }
+}
+
+/*
+** This function is a no-op unless SQLITE_DEBUG is defined when this module
+** is compiled. In that case, this function is essentially an assert()
+** statement used to verify that the contents of the pIter->aFirst[] array
+** are correct.
+*/
+static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
+ if( p->rc==SQLITE_OK ){
+ Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
+ int i;
+
+ assert( (pFirst->pLeaf==0)==pIter->base.bEof );
+
+ /* Check that pIter->iSwitchRowid is set correctly. */
+ for(i=0; i<pIter->nSeg; i++){
+ Fts5SegIter *p1 = &pIter->aSeg[i];
+ assert( p1==pFirst
+ || p1->pLeaf==0
+ || fts5BufferCompare(&pFirst->term, &p1->term)
+ || p1->iRowid==pIter->iSwitchRowid
+ || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
+ );
+ }
+
+ for(i=0; i<pIter->nSeg; i+=2){
+ Fts5SegIter *p1 = &pIter->aSeg[i];
+ Fts5SegIter *p2 = &pIter->aSeg[i+1];
+ Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
+ fts5AssertComparisonResult(pIter, p1, p2, pRes);
+ }
+
+ for(i=1; i<(pIter->nSeg / 2); i+=2){
+ Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
+ Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
+ Fts5CResult *pRes = &pIter->aFirst[i];
+ fts5AssertComparisonResult(pIter, p1, p2, pRes);
+ }
+ }
+}
+#else
+# define fts5AssertMultiIterSetup(x,y)
+#endif
+
+/*
+** Do the comparison necessary to populate pIter->aFirst[iOut].
+**
+** If the returned value is non-zero, then it is the index of an entry
+** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
+** to a key that is a duplicate of another, higher priority,
+** segment-iterator in the pSeg->aSeg[] array.
+*/
+static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
+ int i1; /* Index of left-hand Fts5SegIter */
+ int i2; /* Index of right-hand Fts5SegIter */
+ int iRes;
+ Fts5SegIter *p1; /* Left-hand Fts5SegIter */
+ Fts5SegIter *p2; /* Right-hand Fts5SegIter */
+ Fts5CResult *pRes = &pIter->aFirst[iOut];
+
+ assert( iOut<pIter->nSeg && iOut>0 );
+ assert( pIter->bRev==0 || pIter->bRev==1 );
+
+ if( iOut>=(pIter->nSeg/2) ){
+ i1 = (iOut - pIter->nSeg/2) * 2;
+ i2 = i1 + 1;
+ }else{
+ i1 = pIter->aFirst[iOut*2].iFirst;
+ i2 = pIter->aFirst[iOut*2+1].iFirst;
+ }
+ p1 = &pIter->aSeg[i1];
+ p2 = &pIter->aSeg[i2];
+
+ pRes->bTermEq = 0;
+ if( p1->pLeaf==0 ){ /* If p1 is at EOF */
+ iRes = i2;
+ }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
+ iRes = i1;
+ }else{
+ int res = fts5BufferCompare(&p1->term, &p2->term);
+ if( res==0 ){
+ assert( i2>i1 );
+ assert( i2!=0 );
+ pRes->bTermEq = 1;
+ if( p1->iRowid==p2->iRowid ){
+ p1->bDel = p2->bDel;
+ return i2;
+ }
+ res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
+ }
+ assert( res!=0 );
+ if( res<0 ){
+ iRes = i1;
+ }else{
+ iRes = i2;
+ }
+ }
+
+ pRes->iFirst = (u16)iRes;
+ return 0;
+}
+
+/*
+** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
+** It is an error if leaf iLeafPgno does not exist or contains no rowids.
+*/
+static void fts5SegIterGotoPage(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter, /* Iterator to advance */
+ int iLeafPgno
+){
+ assert( iLeafPgno>pIter->iLeafPgno );
+
+ if( iLeafPgno>pIter->pSeg->pgnoLast ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ fts5DataRelease(pIter->pNextLeaf);
+ pIter->pNextLeaf = 0;
+ pIter->iLeafPgno = iLeafPgno-1;
+ fts5SegIterNextPage(p, pIter);
+ assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
+
+ if( p->rc==SQLITE_OK ){
+ int iOff;
+ u8 *a = pIter->pLeaf->p;
+ int n = pIter->pLeaf->szLeaf;
+
+ iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
+ if( iOff<4 || iOff>=n ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
+ pIter->iLeafOffset = iOff;
+ fts5SegIterLoadNPos(p, pIter);
+ }
+ }
+ }
+}
+
+/*
+** Advance the iterator passed as the second argument until it is at or
+** past rowid iFrom. Regardless of the value of iFrom, the iterator is
+** always advanced at least once.
+*/
+static void fts5SegIterNextFrom(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegIter *pIter, /* Iterator to advance */
+ i64 iMatch /* Advance iterator at least this far */
+){
+ int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
+ Fts5DlidxIter *pDlidx = pIter->pDlidx;
+ int iLeafPgno = pIter->iLeafPgno;
+ int bMove = 1;
+
+ assert( pIter->flags & FTS5_SEGITER_ONETERM );
+ assert( pIter->pDlidx );
+ assert( pIter->pLeaf );
+
+ if( bRev==0 ){
+ while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
+ iLeafPgno = fts5DlidxIterPgno(pDlidx);
+ fts5DlidxIterNext(p, pDlidx);
+ }
+ assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
+ if( iLeafPgno>pIter->iLeafPgno ){
+ fts5SegIterGotoPage(p, pIter, iLeafPgno);
+ bMove = 0;
+ }
+ }else{
+ assert( pIter->pNextLeaf==0 );
+ assert( iMatch<pIter->iRowid );
+ while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
+ fts5DlidxIterPrev(p, pDlidx);
+ }
+ iLeafPgno = fts5DlidxIterPgno(pDlidx);
+
+ assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
+
+ if( iLeafPgno<pIter->iLeafPgno ){
+ pIter->iLeafPgno = iLeafPgno+1;
+ fts5SegIterReverseNewPage(p, pIter);
+ bMove = 0;
+ }
+ }
+
+ do{
+ if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
+ if( pIter->pLeaf==0 ) break;
+ if( bRev==0 && pIter->iRowid>=iMatch ) break;
+ if( bRev!=0 && pIter->iRowid<=iMatch ) break;
+ bMove = 1;
+ }while( p->rc==SQLITE_OK );
+}
+
+
+/*
+** Free the iterator object passed as the second argument.
+*/
+static void fts5MultiIterFree(Fts5Iter *pIter){
+ if( pIter ){
+ int i;
+ for(i=0; i<pIter->nSeg; i++){
+ fts5SegIterClear(&pIter->aSeg[i]);
+ }
+ fts5StructureRelease(pIter->pStruct);
+ fts5BufferFree(&pIter->poslist);
+ sqlite3_free(pIter);
+ }
+}
+
+static void fts5MultiIterAdvanced(
+ Fts5Index *p, /* FTS5 backend to iterate within */
+ Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
+ int iChanged, /* Index of sub-iterator just advanced */
+ int iMinset /* Minimum entry in aFirst[] to set */
+){
+ int i;
+ for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
+ int iEq;
+ if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
+ Fts5SegIter *pSeg = &pIter->aSeg[iEq];
+ assert( p->rc==SQLITE_OK );
+ pSeg->xNext(p, pSeg, 0);
+ i = pIter->nSeg + iEq;
+ }
+ }
+}
+
+/*
+** Sub-iterator iChanged of iterator pIter has just been advanced. It still
+** points to the same term though - just a different rowid. This function
+** attempts to update the contents of the pIter->aFirst[] accordingly.
+** If it does so successfully, 0 is returned. Otherwise 1.
+**
+** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
+** on the iterator instead. That function does the same as this one, except
+** that it deals with more complicated cases as well.
+*/
+static int fts5MultiIterAdvanceRowid(
+ Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
+ int iChanged, /* Index of sub-iterator just advanced */
+ Fts5SegIter **ppFirst
+){
+ Fts5SegIter *pNew = &pIter->aSeg[iChanged];
+
+ if( pNew->iRowid==pIter->iSwitchRowid
+ || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
+ ){
+ int i;
+ Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
+ pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
+ for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
+ Fts5CResult *pRes = &pIter->aFirst[i];
+
+ assert( pNew->pLeaf );
+ assert( pRes->bTermEq==0 || pOther->pLeaf );
+
+ if( pRes->bTermEq ){
+ if( pNew->iRowid==pOther->iRowid ){
+ return 1;
+ }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
+ pIter->iSwitchRowid = pOther->iRowid;
+ pNew = pOther;
+ }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
+ pIter->iSwitchRowid = pOther->iRowid;
+ }
+ }
+ pRes->iFirst = (u16)(pNew - pIter->aSeg);
+ if( i==1 ) break;
+
+ pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
+ }
+ }
+
+ *ppFirst = pNew;
+ return 0;
+}
+
+/*
+** Set the pIter->bEof variable based on the state of the sub-iterators.
+*/
+static void fts5MultiIterSetEof(Fts5Iter *pIter){
+ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
+ pIter->base.bEof = pSeg->pLeaf==0;
+ pIter->iSwitchRowid = pSeg->iRowid;
+}
+
+/*
+** Move the iterator to the next entry.
+**
+** If an error occurs, an error code is left in Fts5Index.rc. It is not
+** considered an error if the iterator reaches EOF, or if it is already at
+** EOF when this function is called.
+*/
+static void fts5MultiIterNext(
+ Fts5Index *p,
+ Fts5Iter *pIter,
+ int bFrom, /* True if argument iFrom is valid */
+ i64 iFrom /* Advance at least as far as this */
+){
+ int bUseFrom = bFrom;
+ while( p->rc==SQLITE_OK ){
+ int iFirst = pIter->aFirst[1].iFirst;
+ int bNewTerm = 0;
+ Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
+ assert( p->rc==SQLITE_OK );
+ if( bUseFrom && pSeg->pDlidx ){
+ fts5SegIterNextFrom(p, pSeg, iFrom);
+ }else{
+ pSeg->xNext(p, pSeg, &bNewTerm);
+ }
+
+ if( pSeg->pLeaf==0 || bNewTerm
+ || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
+ ){
+ fts5MultiIterAdvanced(p, pIter, iFirst, 1);
+ fts5MultiIterSetEof(pIter);
+ pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
+ if( pSeg->pLeaf==0 ) return;
+ }
+
+ fts5AssertMultiIterSetup(p, pIter);
+ assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
+ if( pIter->bSkipEmpty==0 || pSeg->nPos ){
+ pIter->xSetOutputs(pIter, pSeg);
+ return;
+ }
+ bUseFrom = 0;
+ }
+}
+
+static void fts5MultiIterNext2(
+ Fts5Index *p,
+ Fts5Iter *pIter,
+ int *pbNewTerm /* OUT: True if *might* be new term */
+){
+ assert( pIter->bSkipEmpty );
+ if( p->rc==SQLITE_OK ){
+ do {
+ int iFirst = pIter->aFirst[1].iFirst;
+ Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
+ int bNewTerm = 0;
+
+ assert( p->rc==SQLITE_OK );
+ pSeg->xNext(p, pSeg, &bNewTerm);
+ if( pSeg->pLeaf==0 || bNewTerm
+ || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
+ ){
+ fts5MultiIterAdvanced(p, pIter, iFirst, 1);
+ fts5MultiIterSetEof(pIter);
+ *pbNewTerm = 1;
+ }else{
+ *pbNewTerm = 0;
+ }
+ fts5AssertMultiIterSetup(p, pIter);
+
+ }while( fts5MultiIterIsEmpty(p, pIter) );
+ }
+}
+
+static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
+ UNUSED_PARAM2(pUnused1, pUnused2);
+}
+
+static Fts5Iter *fts5MultiIterAlloc(
+ Fts5Index *p, /* FTS5 backend to iterate within */
+ int nSeg
+){
+ Fts5Iter *pNew;
+ int nSlot; /* Power of two >= nSeg */
+
+ for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
+ pNew = fts5IdxMalloc(p,
+ sizeof(Fts5Iter) + /* pNew */
+ sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
+ sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
+ );
+ if( pNew ){
+ pNew->nSeg = nSlot;
+ pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
+ pNew->pIndex = p;
+ pNew->xSetOutputs = fts5IterSetOutputs_Noop;
+ }
+ return pNew;
+}
+
+static void fts5PoslistCallback(
+ Fts5Index *pUnused,
+ void *pContext,
+ const u8 *pChunk, int nChunk
+){
+ UNUSED_PARAM(pUnused);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
+ }
+}
+
+typedef struct PoslistCallbackCtx PoslistCallbackCtx;
+struct PoslistCallbackCtx {
+ Fts5Buffer *pBuf; /* Append to this buffer */
+ Fts5Colset *pColset; /* Restrict matches to this column */
+ int eState; /* See above */
+};
+
+typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
+struct PoslistOffsetsCtx {
+ Fts5Buffer *pBuf; /* Append to this buffer */
+ Fts5Colset *pColset; /* Restrict matches to this column */
+ int iRead;
+ int iWrite;
+};
+
+/*
+** TODO: Make this more efficient!
+*/
+static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
+ int i;
+ for(i=0; i<pColset->nCol; i++){
+ if( pColset->aiCol[i]==iCol ) return 1;
+ }
+ return 0;
+}
+
+static void fts5PoslistOffsetsCallback(
+ Fts5Index *pUnused,
+ void *pContext,
+ const u8 *pChunk, int nChunk
+){
+ PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
+ UNUSED_PARAM(pUnused);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ int i = 0;
+ while( i<nChunk ){
+ int iVal;
+ i += fts5GetVarint32(&pChunk[i], iVal);
+ iVal += pCtx->iRead - 2;
+ pCtx->iRead = iVal;
+ if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
+ fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
+ pCtx->iWrite = iVal;
+ }
+ }
+ }
+}
+
+static void fts5PoslistFilterCallback(
+ Fts5Index *pUnused,
+ void *pContext,
+ const u8 *pChunk, int nChunk
+){
+ PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
+ UNUSED_PARAM(pUnused);
+ assert_nc( nChunk>=0 );
+ if( nChunk>0 ){
+ /* Search through to find the first varint with value 1. This is the
+ ** start of the next columns hits. */
+ int i = 0;
+ int iStart = 0;
+
+ if( pCtx->eState==2 ){
+ int iCol;
+ fts5FastGetVarint32(pChunk, i, iCol);
+ if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
+ pCtx->eState = 1;
+ fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
+ }else{
+ pCtx->eState = 0;
+ }
+ }
+
+ do {
+ while( i<nChunk && pChunk[i]!=0x01 ){
+ while( pChunk[i] & 0x80 ) i++;
+ i++;
+ }
+ if( pCtx->eState ){
+ fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
+ }
+ if( i<nChunk ){
+ int iCol;
+ iStart = i;
+ i++;
+ if( i>=nChunk ){
+ pCtx->eState = 2;
+ }else{
+ fts5FastGetVarint32(pChunk, i, iCol);
+ pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
+ if( pCtx->eState ){
+ fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
+ iStart = i;
+ }
+ }
+ }
+ }while( i<nChunk );
+ }
+}
+
+static void fts5ChunkIterate(
+ Fts5Index *p, /* Index object */
+ Fts5SegIter *pSeg, /* Poslist of this iterator */
+ void *pCtx, /* Context pointer for xChunk callback */
+ void (*xChunk)(Fts5Index*, void*, const u8*, int)
+){
+ int nRem = pSeg->nPos; /* Number of bytes still to come */
+ Fts5Data *pData = 0;
+ u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
+ int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
+ int pgno = pSeg->iLeafPgno;
+ int pgnoSave = 0;
+
+ /* This function does notmwork with detail=none databases. */
+ assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
+
+ if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
+ pgnoSave = pgno+1;
+ }
+
+ while( 1 ){
+ xChunk(p, pCtx, pChunk, nChunk);
+ nRem -= nChunk;
+ fts5DataRelease(pData);
+ if( nRem<=0 ){
+ break;
+ }else{
+ pgno++;
+ pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
+ if( pData==0 ) break;
+ pChunk = &pData->p[4];
+ nChunk = MIN(nRem, pData->szLeaf - 4);
+ if( pgno==pgnoSave ){
+ assert( pSeg->pNextLeaf==0 );
+ pSeg->pNextLeaf = pData;
+ pData = 0;
+ }
+ }
+ }
+}
+
+/*
+** Iterator pIter currently points to a valid entry (not EOF). This
+** function appends the position list data for the current entry to
+** buffer pBuf. It does not make a copy of the position-list size
+** field.
+*/
+static void fts5SegiterPoslist(
+ Fts5Index *p,
+ Fts5SegIter *pSeg,
+ Fts5Colset *pColset,
+ Fts5Buffer *pBuf
+){
+ if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
+ if( pColset==0 ){
+ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
+ }else{
+ if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
+ PoslistCallbackCtx sCtx;
+ sCtx.pBuf = pBuf;
+ sCtx.pColset = pColset;
+ sCtx.eState = fts5IndexColsetTest(pColset, 0);
+ assert( sCtx.eState==0 || sCtx.eState==1 );
+ fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
+ }else{
+ PoslistOffsetsCtx sCtx;
+ memset(&sCtx, 0, sizeof(sCtx));
+ sCtx.pBuf = pBuf;
+ sCtx.pColset = pColset;
+ fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
+ }
+ }
+ }
+}
+
+/*
+** IN/OUT parameter (*pa) points to a position list n bytes in size. If
+** the position list contains entries for column iCol, then (*pa) is set
+** to point to the sub-position-list for that column and the number of
+** bytes in it returned. Or, if the argument position list does not
+** contain any entries for column iCol, return 0.
+*/
+static int fts5IndexExtractCol(
+ const u8 **pa, /* IN/OUT: Pointer to poslist */
+ int n, /* IN: Size of poslist in bytes */
+ int iCol /* Column to extract from poslist */
+){
+ int iCurrent = 0; /* Anything before the first 0x01 is col 0 */
+ const u8 *p = *pa;
+ const u8 *pEnd = &p[n]; /* One byte past end of position list */
+
+ while( iCol>iCurrent ){
+ /* Advance pointer p until it points to pEnd or an 0x01 byte that is
+ ** not part of a varint. Note that it is not possible for a negative
+ ** or extremely large varint to occur within an uncorrupted position
+ ** list. So the last byte of each varint may be assumed to have a clear
+ ** 0x80 bit. */
+ while( *p!=0x01 ){
+ while( *p++ & 0x80 );
+ if( p>=pEnd ) return 0;
+ }
+ *pa = p++;
+ iCurrent = *p++;
+ if( iCurrent & 0x80 ){
+ p--;
+ p += fts5GetVarint32(p, iCurrent);
+ }
+ }
+ if( iCol!=iCurrent ) return 0;
+
+ /* Advance pointer p until it points to pEnd or an 0x01 byte that is
+ ** not part of a varint */
+ while( p<pEnd && *p!=0x01 ){
+ while( *p++ & 0x80 );
+ }
+
+ return p - (*pa);
+}
+
+static int fts5IndexExtractColset (
+ Fts5Colset *pColset, /* Colset to filter on */
+ const u8 *pPos, int nPos, /* Position list */
+ Fts5Buffer *pBuf /* Output buffer */
+){
+ int rc = SQLITE_OK;
+ int i;
+
+ fts5BufferZero(pBuf);
+ for(i=0; i<pColset->nCol; i++){
+ const u8 *pSub = pPos;
+ int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
+ if( nSub ){
+ fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
+ }
+ }
+ return rc;
+}
+
+/*
+** xSetOutputs callback used by detail=none tables.
+*/
+static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
+ pIter->base.iRowid = pSeg->iRowid;
+ pIter->base.nData = pSeg->nPos;
+}
+
+/*
+** xSetOutputs callback used by detail=full and detail=col tables when no
+** column filters are specified.
+*/
+static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ pIter->base.iRowid = pSeg->iRowid;
+ pIter->base.nData = pSeg->nPos;
+
+ assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
+ assert( pIter->pColset==0 );
+
+ if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
+ /* All data is stored on the current page. Populate the output
+ ** variables to point into the body of the page object. */
+ pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
+ }else{
+ /* The data is distributed over two or more pages. Copy it into the
+ ** Fts5Iter.poslist buffer and then set the output pointer to point
+ ** to this buffer. */
+ fts5BufferZero(&pIter->poslist);
+ fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
+ pIter->base.pData = pIter->poslist.p;
+ }
+}
+
+/*
+** xSetOutputs callback used by detail=col when there is a column filter
+** and there are 100 or more columns. Also called as a fallback from
+** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
+*/
+static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ fts5BufferZero(&pIter->poslist);
+ fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
+ pIter->base.iRowid = pSeg->iRowid;
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = pIter->poslist.n;
+}
+
+/*
+** xSetOutputs callback used when:
+**
+** * detail=col,
+** * there is a column filter, and
+** * the table contains 100 or fewer columns.
+**
+** The last point is to ensure all column numbers are stored as
+** single-byte varints.
+*/
+static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
+
+ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
+ assert( pIter->pColset );
+
+ if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
+ fts5IterSetOutputs_Col(pIter, pSeg);
+ }else{
+ u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
+ u8 *pEnd = (u8*)&a[pSeg->nPos];
+ int iPrev = 0;
+ int *aiCol = pIter->pColset->aiCol;
+ int *aiColEnd = &aiCol[pIter->pColset->nCol];
+
+ u8 *aOut = pIter->poslist.p;
+ int iPrevOut = 0;
+
+ pIter->base.iRowid = pSeg->iRowid;
+
+ while( a<pEnd ){
+ iPrev += (int)a++[0] - 2;
+ while( *aiCol<iPrev ){
+ aiCol++;
+ if( aiCol==aiColEnd ) goto setoutputs_col_out;
+ }
+ if( *aiCol==iPrev ){
+ *aOut++ = (iPrev - iPrevOut) + 2;
+ iPrevOut = iPrev;
+ }
+ }
+
+setoutputs_col_out:
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = aOut - pIter->poslist.p;
+ }
+}
+
+/*
+** xSetOutputs callback used by detail=full when there is a column filter.
+*/
+static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
+ Fts5Colset *pColset = pIter->pColset;
+ pIter->base.iRowid = pSeg->iRowid;
+
+ assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
+ assert( pColset );
+
+ if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
+ /* All data is stored on the current page. Populate the output
+ ** variables to point into the body of the page object. */
+ const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
+ if( pColset->nCol==1 ){
+ pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]);
+ pIter->base.pData = a;
+ }else{
+ fts5BufferZero(&pIter->poslist);
+ fts5IndexExtractColset(pColset, a, pSeg->nPos, &pIter->poslist);
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = pIter->poslist.n;
+ }
+ }else{
+ /* The data is distributed over two or more pages. Copy it into the
+ ** Fts5Iter.poslist buffer and then set the output pointer to point
+ ** to this buffer. */
+ fts5BufferZero(&pIter->poslist);
+ fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
+ pIter->base.pData = pIter->poslist.p;
+ pIter->base.nData = pIter->poslist.n;
+ }
+}
+
+static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
+ if( *pRc==SQLITE_OK ){
+ Fts5Config *pConfig = pIter->pIndex->pConfig;
+ if( pConfig->eDetail==FTS5_DETAIL_NONE ){
+ pIter->xSetOutputs = fts5IterSetOutputs_None;
+ }
+
+ else if( pIter->pColset==0 ){
+ pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
+ }
+
+ else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
+ pIter->xSetOutputs = fts5IterSetOutputs_Full;
+ }
+
+ else{
+ assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
+ if( pConfig->nCol<=100 ){
+ pIter->xSetOutputs = fts5IterSetOutputs_Col100;
+ sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
+ }else{
+ pIter->xSetOutputs = fts5IterSetOutputs_Col;
+ }
+ }
+ }
+}
+
+
+/*
+** Allocate a new Fts5Iter object.
+**
+** The new object will be used to iterate through data in structure pStruct.
+** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
+** is zero or greater, data from the first nSegment segments on level iLevel
+** is merged.
+**
+** The iterator initially points to the first term/rowid entry in the
+** iterated data.
+*/
+static void fts5MultiIterNew(
+ Fts5Index *p, /* FTS5 backend to iterate within */
+ Fts5Structure *pStruct, /* Structure of specific index */
+ int flags, /* FTS5INDEX_QUERY_XXX flags */
+ Fts5Colset *pColset, /* Colset to filter on (or NULL) */
+ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
+ int iLevel, /* Level to iterate (-1 for all) */
+ int nSegment, /* Number of segments to merge (iLevel>=0) */
+ Fts5Iter **ppOut /* New object */
+){
+ int nSeg = 0; /* Number of segment-iters in use */
+ int iIter = 0; /* */
+ int iSeg; /* Used to iterate through segments */
+ Fts5StructureLevel *pLvl;
+ Fts5Iter *pNew;
+
+ assert( (pTerm==0 && nTerm==0) || iLevel<0 );
+
+ /* Allocate space for the new multi-seg-iterator. */
+ if( p->rc==SQLITE_OK ){
+ if( iLevel<0 ){
+ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
+ nSeg = pStruct->nSegment;
+ nSeg += (p->pHash ? 1 : 0);
+ }else{
+ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
+ }
+ }
+ *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
+ if( pNew==0 ) return;
+ pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
+ pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
+ pNew->pStruct = pStruct;
+ pNew->pColset = pColset;
+ fts5StructureRef(pStruct);
+ if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
+ fts5IterSetOutputCb(&p->rc, pNew);
+ }
+
+ /* Initialize each of the component segment iterators. */
+ if( p->rc==SQLITE_OK ){
+ if( iLevel<0 ){
+ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
+ if( p->pHash ){
+ /* Add a segment iterator for the current contents of the hash table. */
+ Fts5SegIter *pIter = &pNew->aSeg[iIter++];
+ fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
+ }
+ for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
+ for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
+ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
+ Fts5SegIter *pIter = &pNew->aSeg[iIter++];
+ if( pTerm==0 ){
+ fts5SegIterInit(p, pSeg, pIter);
+ }else{
+ fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
+ }
+ }
+ }
+ }else{
+ pLvl = &pStruct->aLevel[iLevel];
+ for(iSeg=nSeg-1; iSeg>=0; iSeg--){
+ fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
+ }
+ }
+ assert( iIter==nSeg );
+ }
+
+ /* If the above was successful, each component iterators now points
+ ** to the first entry in its segment. In this case initialize the
+ ** aFirst[] array. Or, if an error has occurred, free the iterator
+ ** object and set the output variable to NULL. */
+ if( p->rc==SQLITE_OK ){
+ for(iIter=pNew->nSeg-1; iIter>0; iIter--){
+ int iEq;
+ if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
+ Fts5SegIter *pSeg = &pNew->aSeg[iEq];
+ if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
+ fts5MultiIterAdvanced(p, pNew, iEq, iIter);
+ }
+ }
+ fts5MultiIterSetEof(pNew);
+ fts5AssertMultiIterSetup(p, pNew);
+
+ if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
+ fts5MultiIterNext(p, pNew, 0, 0);
+ }else if( pNew->base.bEof==0 ){
+ Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
+ pNew->xSetOutputs(pNew, pSeg);
+ }
+
+ }else{
+ fts5MultiIterFree(pNew);
+ *ppOut = 0;
+ }
+}
+
+/*
+** Create an Fts5Iter that iterates through the doclist provided
+** as the second argument.
+*/
+static void fts5MultiIterNew2(
+ Fts5Index *p, /* FTS5 backend to iterate within */
+ Fts5Data *pData, /* Doclist to iterate through */
+ int bDesc, /* True for descending rowid order */
+ Fts5Iter **ppOut /* New object */
+){
+ Fts5Iter *pNew;
+ pNew = fts5MultiIterAlloc(p, 2);
+ if( pNew ){
+ Fts5SegIter *pIter = &pNew->aSeg[1];
+
+ pIter->flags = FTS5_SEGITER_ONETERM;
+ if( pData->szLeaf>0 ){
+ pIter->pLeaf = pData;
+ pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
+ pIter->iEndofDoclist = pData->nn;
+ pNew->aFirst[1].iFirst = 1;
+ if( bDesc ){
+ pNew->bRev = 1;
+ pIter->flags |= FTS5_SEGITER_REVERSE;
+ fts5SegIterReverseInitPage(p, pIter);
+ }else{
+ fts5SegIterLoadNPos(p, pIter);
+ }
+ pData = 0;
+ }else{
+ pNew->base.bEof = 1;
+ }
+ fts5SegIterSetNext(p, pIter);
+
+ *ppOut = pNew;
+ }
+
+ fts5DataRelease(pData);
+}
+
+/*
+** Return true if the iterator is at EOF or if an error has occurred.
+** False otherwise.
+*/
+static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
+ assert( p->rc
+ || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
+ );
+ return (p->rc || pIter->base.bEof);
+}
+
+/*
+** Return the rowid of the entry that the iterator currently points
+** to. If the iterator points to EOF when this function is called the
+** results are undefined.
+*/
+static i64 fts5MultiIterRowid(Fts5Iter *pIter){
+ assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
+ return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
+}
+
+/*
+** Move the iterator to the next entry at or following iMatch.
+*/
+static void fts5MultiIterNextFrom(
+ Fts5Index *p,
+ Fts5Iter *pIter,
+ i64 iMatch
+){
+ while( 1 ){
+ i64 iRowid;
+ fts5MultiIterNext(p, pIter, 1, iMatch);
+ if( fts5MultiIterEof(p, pIter) ) break;
+ iRowid = fts5MultiIterRowid(pIter);
+ if( pIter->bRev==0 && iRowid>=iMatch ) break;
+ if( pIter->bRev!=0 && iRowid<=iMatch ) break;
+ }
+}
+
+/*
+** Return a pointer to a buffer containing the term associated with the
+** entry that the iterator currently points to.
+*/
+static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
+ Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
+ *pn = p->term.n;
+ return p->term.p;
+}
+
+/*
+** Allocate a new segment-id for the structure pStruct. The new segment
+** id must be between 1 and 65335 inclusive, and must not be used by
+** any currently existing segment. If a free segment id cannot be found,
+** SQLITE_FULL is returned.
+**
+** If an error has already occurred, this function is a no-op. 0 is
+** returned in this case.
+*/
+static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
+ int iSegid = 0;
+
+ if( p->rc==SQLITE_OK ){
+ if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
+ p->rc = SQLITE_FULL;
+ }else{
+ while( iSegid==0 ){
+ int iLvl, iSeg;
+ sqlite3_randomness(sizeof(u32), (void*)&iSegid);
+ iSegid = iSegid & ((1 << FTS5_DATA_ID_B)-1);
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
+ if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){
+ iSegid = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return iSegid;
+}
+
+/*
+** Discard all data currently cached in the hash-tables.
+*/
+static void fts5IndexDiscardData(Fts5Index *p){
+ assert( p->pHash || p->nPendingData==0 );
+ if( p->pHash ){
+ sqlite3Fts5HashClear(p->pHash);
+ p->nPendingData = 0;
+ }
+}
+
+/*
+** Return the size of the prefix, in bytes, that buffer
+** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
+**
+** Buffer (pNew/<length-unknown>) is guaranteed to be greater
+** than buffer (pOld/nOld).
+*/
+static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
+ int i;
+ for(i=0; i<nOld; i++){
+ if( pOld[i]!=pNew[i] ) break;
+ }
+ return i;
+}
+
+static void fts5WriteDlidxClear(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ int bFlush /* If true, write dlidx to disk */
+){
+ int i;
+ assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
+ for(i=0; i<pWriter->nDlidx; i++){
+ Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
+ if( pDlidx->buf.n==0 ) break;
+ if( bFlush ){
+ assert( pDlidx->pgno!=0 );
+ fts5DataWrite(p,
+ FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
+ pDlidx->buf.p, pDlidx->buf.n
+ );
+ }
+ sqlite3Fts5BufferZero(&pDlidx->buf);
+ pDlidx->bPrevValid = 0;
+ }
+}
+
+/*
+** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
+** Any new array elements are zeroed before returning.
+*/
+static int fts5WriteDlidxGrow(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ int nLvl
+){
+ if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
+ Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
+ pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
+ );
+ if( aDlidx==0 ){
+ p->rc = SQLITE_NOMEM;
+ }else{
+ int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
+ memset(&aDlidx[pWriter->nDlidx], 0, nByte);
+ pWriter->aDlidx = aDlidx;
+ pWriter->nDlidx = nLvl;
+ }
+ }
+ return p->rc;
+}
+
+/*
+** If the current doclist-index accumulating in pWriter->aDlidx[] is large
+** enough, flush it to disk and return 1. Otherwise discard it and return
+** zero.
+*/
+static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
+ int bFlag = 0;
+
+ /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
+ ** to the database, also write the doclist-index to disk. */
+ if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
+ bFlag = 1;
+ }
+ fts5WriteDlidxClear(p, pWriter, bFlag);
+ pWriter->nEmpty = 0;
+ return bFlag;
+}
+
+/*
+** This function is called whenever processing of the doclist for the
+** last term on leaf page (pWriter->iBtPage) is completed.
+**
+** The doclist-index for that term is currently stored in-memory within the
+** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
+** writes it out to disk. Or, if it is too small to bother with, discards
+** it.
+**
+** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
+*/
+static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
+ int bFlag;
+
+ assert( pWriter->iBtPage || pWriter->nEmpty==0 );
+ if( pWriter->iBtPage==0 ) return;
+ bFlag = fts5WriteFlushDlidx(p, pWriter);
+
+ if( p->rc==SQLITE_OK ){
+ const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
+ /* The following was already done in fts5WriteInit(): */
+ /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
+ sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
+ sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
+ sqlite3_step(p->pIdxWriter);
+ p->rc = sqlite3_reset(p->pIdxWriter);
+ }
+ pWriter->iBtPage = 0;
+}
+
+/*
+** This is called once for each leaf page except the first that contains
+** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
+** is larger than all terms written to earlier leaves, and equal to or
+** smaller than the first term on the new leaf.
+**
+** If an error occurs, an error code is left in Fts5Index.rc. If an error
+** has already occurred when this function is called, it is a no-op.
+*/
+static void fts5WriteBtreeTerm(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegWriter *pWriter, /* Writer object */
+ int nTerm, const u8 *pTerm /* First term on new page */
+){
+ fts5WriteFlushBtree(p, pWriter);
+ fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
+ pWriter->iBtPage = pWriter->writer.pgno;
+}
+
+/*
+** This function is called when flushing a leaf page that contains no
+** terms at all to disk.
+*/
+static void fts5WriteBtreeNoTerm(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5SegWriter *pWriter /* Writer object */
+){
+ /* If there were no rowids on the leaf page either and the doclist-index
+ ** has already been started, append an 0x00 byte to it. */
+ if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
+ Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
+ assert( pDlidx->bPrevValid );
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
+ }
+
+ /* Increment the "number of sequential leaves without a term" counter. */
+ pWriter->nEmpty++;
+}
+
+static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
+ i64 iRowid;
+ int iOff;
+
+ iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
+ fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
+ return iRowid;
+}
+
+/*
+** Rowid iRowid has just been appended to the current leaf page. It is the
+** first on the page. This function appends an appropriate entry to the current
+** doclist-index.
+*/
+static void fts5WriteDlidxAppend(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ i64 iRowid
+){
+ int i;
+ int bDone = 0;
+
+ for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
+ i64 iVal;
+ Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
+
+ if( pDlidx->buf.n>=p->pConfig->pgsz ){
+ /* The current doclist-index page is full. Write it to disk and push
+ ** a copy of iRowid (which will become the first rowid on the next
+ ** doclist-index leaf page) up into the next level of the b-tree
+ ** hierarchy. If the node being flushed is currently the root node,
+ ** also push its first rowid upwards. */
+ pDlidx->buf.p[0] = 0x01; /* Not the root node */
+ fts5DataWrite(p,
+ FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
+ pDlidx->buf.p, pDlidx->buf.n
+ );
+ fts5WriteDlidxGrow(p, pWriter, i+2);
+ pDlidx = &pWriter->aDlidx[i];
+ if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
+ i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
+
+ /* This was the root node. Push its first rowid up to the new root. */
+ pDlidx[1].pgno = pDlidx->pgno;
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
+ pDlidx[1].bPrevValid = 1;
+ pDlidx[1].iPrev = iFirst;
+ }
+
+ sqlite3Fts5BufferZero(&pDlidx->buf);
+ pDlidx->bPrevValid = 0;
+ pDlidx->pgno++;
+ }else{
+ bDone = 1;
+ }
+
+ if( pDlidx->bPrevValid ){
+ iVal = iRowid - pDlidx->iPrev;
+ }else{
+ i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
+ assert( pDlidx->buf.n==0 );
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
+ iVal = iRowid;
+ }
+
+ sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
+ pDlidx->bPrevValid = 1;
+ pDlidx->iPrev = iRowid;
+ }
+}
+
+static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
+ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
+ Fts5PageWriter *pPage = &pWriter->writer;
+ i64 iRowid;
+
+ assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
+
+ /* Set the szLeaf header field. */
+ assert( 0==fts5GetU16(&pPage->buf.p[2]) );
+ fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
+
+ if( pWriter->bFirstTermInPage ){
+ /* No term was written to this page. */
+ assert( pPage->pgidx.n==0 );
+ fts5WriteBtreeNoTerm(p, pWriter);
+ }else{
+ /* Append the pgidx to the page buffer. Set the szLeaf header field. */
+ fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
+ }
+
+ /* Write the page out to disk */
+ iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
+ fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
+
+ /* Initialize the next page. */
+ fts5BufferZero(&pPage->buf);
+ fts5BufferZero(&pPage->pgidx);
+ fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
+ pPage->iPrevPgidx = 0;
+ pPage->pgno++;
+
+ /* Increase the leaves written counter */
+ pWriter->nLeafWritten++;
+
+ /* The new leaf holds no terms or rowids */
+ pWriter->bFirstTermInPage = 1;
+ pWriter->bFirstRowidInPage = 1;
+}
+
+/*
+** Append term pTerm/nTerm to the segment being written by the writer passed
+** as the second argument.
+**
+** If an error occurs, set the Fts5Index.rc error code. If an error has
+** already occurred, this function is a no-op.
+*/
+static void fts5WriteAppendTerm(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ int nTerm, const u8 *pTerm
+){
+ int nPrefix; /* Bytes of prefix compression for term */
+ Fts5PageWriter *pPage = &pWriter->writer;
+ Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
+
+ assert( p->rc==SQLITE_OK );
+ assert( pPage->buf.n>=4 );
+ assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
+
+ /* If the current leaf page is full, flush it to disk. */
+ if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
+ if( pPage->buf.n>4 ){
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+ fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
+ }
+
+ /* TODO1: Updating pgidx here. */
+ pPgidx->n += sqlite3Fts5PutVarint(
+ &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
+ );
+ pPage->iPrevPgidx = pPage->buf.n;
+#if 0
+ fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
+ pPgidx->n += 2;
+#endif
+
+ if( pWriter->bFirstTermInPage ){
+ nPrefix = 0;
+ if( pPage->pgno!=1 ){
+ /* This is the first term on a leaf that is not the leftmost leaf in
+ ** the segment b-tree. In this case it is necessary to add a term to
+ ** the b-tree hierarchy that is (a) larger than the largest term
+ ** already written to the segment and (b) smaller than or equal to
+ ** this term. In other words, a prefix of (pTerm/nTerm) that is one
+ ** byte longer than the longest prefix (pTerm/nTerm) shares with the
+ ** previous term.
+ **
+ ** Usually, the previous term is available in pPage->term. The exception
+ ** is if this is the first term written in an incremental-merge step.
+ ** In this case the previous term is not available, so just write a
+ ** copy of (pTerm/nTerm) into the parent node. This is slightly
+ ** inefficient, but still correct. */
+ int n = nTerm;
+ if( pPage->term.n ){
+ n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
+ }
+ fts5WriteBtreeTerm(p, pWriter, n, pTerm);
+ pPage = &pWriter->writer;
+ }
+ }else{
+ nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm);
+ fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
+ }
+
+ /* Append the number of bytes of new data, then the term data itself
+ ** to the page. */
+ fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
+ fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
+
+ /* Update the Fts5PageWriter.term field. */
+ fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
+ pWriter->bFirstTermInPage = 0;
+
+ pWriter->bFirstRowidInPage = 0;
+ pWriter->bFirstRowidInDoclist = 1;
+
+ assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
+ pWriter->aDlidx[0].pgno = pPage->pgno;
+}
+
+/*
+** Append a rowid and position-list size field to the writers output.
+*/
+static void fts5WriteAppendRowid(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ i64 iRowid
+){
+ if( p->rc==SQLITE_OK ){
+ Fts5PageWriter *pPage = &pWriter->writer;
+
+ if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+
+ /* If this is to be the first rowid written to the page, set the
+ ** rowid-pointer in the page-header. Also append a value to the dlidx
+ ** buffer, in case a doclist-index is required. */
+ if( pWriter->bFirstRowidInPage ){
+ fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
+ fts5WriteDlidxAppend(p, pWriter, iRowid);
+ }
+
+ /* Write the rowid. */
+ if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
+ fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
+ }else{
+ assert( p->rc || iRowid>pWriter->iPrevRowid );
+ fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
+ }
+ pWriter->iPrevRowid = iRowid;
+ pWriter->bFirstRowidInDoclist = 0;
+ pWriter->bFirstRowidInPage = 0;
+ }
+}
+
+static void fts5WriteAppendPoslistData(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ const u8 *aData,
+ int nData
+){
+ Fts5PageWriter *pPage = &pWriter->writer;
+ const u8 *a = aData;
+ int n = nData;
+
+ assert( p->pConfig->pgsz>0 );
+ while( p->rc==SQLITE_OK
+ && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
+ ){
+ int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
+ int nCopy = 0;
+ while( nCopy<nReq ){
+ i64 dummy;
+ nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
+ }
+ fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
+ a += nCopy;
+ n -= nCopy;
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+ if( n>0 ){
+ fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
+ }
+}
+
+/*
+** Flush any data cached by the writer object to the database. Free any
+** allocations associated with the writer.
+*/
+static void fts5WriteFinish(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter, /* Writer object */
+ int *pnLeaf /* OUT: Number of leaf pages in b-tree */
+){
+ int i;
+ Fts5PageWriter *pLeaf = &pWriter->writer;
+ if( p->rc==SQLITE_OK ){
+ assert( pLeaf->pgno>=1 );
+ if( pLeaf->buf.n>4 ){
+ fts5WriteFlushLeaf(p, pWriter);
+ }
+ *pnLeaf = pLeaf->pgno-1;
+ fts5WriteFlushBtree(p, pWriter);
+ }
+ fts5BufferFree(&pLeaf->term);
+ fts5BufferFree(&pLeaf->buf);
+ fts5BufferFree(&pLeaf->pgidx);
+ fts5BufferFree(&pWriter->btterm);
+
+ for(i=0; i<pWriter->nDlidx; i++){
+ sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
+ }
+ sqlite3_free(pWriter->aDlidx);
+}
+
+static void fts5WriteInit(
+ Fts5Index *p,
+ Fts5SegWriter *pWriter,
+ int iSegid
+){
+ const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
+
+ memset(pWriter, 0, sizeof(Fts5SegWriter));
+ pWriter->iSegid = iSegid;
+
+ fts5WriteDlidxGrow(p, pWriter, 1);
+ pWriter->writer.pgno = 1;
+ pWriter->bFirstTermInPage = 1;
+ pWriter->iBtPage = 1;
+
+ assert( pWriter->writer.buf.n==0 );
+ assert( pWriter->writer.pgidx.n==0 );
+
+ /* Grow the two buffers to pgsz + padding bytes in size. */
+ sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
+ sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
+
+ if( p->pIdxWriter==0 ){
+ Fts5Config *pConfig = p->pConfig;
+ fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
+ "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
+ pConfig->zDb, pConfig->zName
+ ));
+ }
+
+ if( p->rc==SQLITE_OK ){
+ /* Initialize the 4-byte leaf-page header to 0x00. */
+ memset(pWriter->writer.buf.p, 0, 4);
+ pWriter->writer.buf.n = 4;
+
+ /* Bind the current output segment id to the index-writer. This is an
+ ** optimization over binding the same value over and over as rows are
+ ** inserted into %_idx by the current writer. */
+ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
+ }
+}
+
+/*
+** Iterator pIter was used to iterate through the input segments of on an
+** incremental merge operation. This function is called if the incremental
+** merge step has finished but the input has not been completely exhausted.
+*/
+static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
+ int i;
+ Fts5Buffer buf;
+ memset(&buf, 0, sizeof(Fts5Buffer));
+ for(i=0; i<pIter->nSeg; i++){
+ Fts5SegIter *pSeg = &pIter->aSeg[i];
+ if( pSeg->pSeg==0 ){
+ /* no-op */
+ }else if( pSeg->pLeaf==0 ){
+ /* All keys from this input segment have been transfered to the output.
+ ** Set both the first and last page-numbers to 0 to indicate that the
+ ** segment is now empty. */
+ pSeg->pSeg->pgnoLast = 0;
+ pSeg->pSeg->pgnoFirst = 0;
+ }else{
+ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
+ i64 iLeafRowid;
+ Fts5Data *pData;
+ int iId = pSeg->pSeg->iSegid;
+ u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
+
+ iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
+ pData = fts5DataRead(p, iLeafRowid);
+ if( pData ){
+ fts5BufferZero(&buf);
+ fts5BufferGrow(&p->rc, &buf, pData->nn);
+ fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
+ fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
+ fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
+ fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
+ if( p->rc==SQLITE_OK ){
+ /* Set the szLeaf field */
+ fts5PutU16(&buf.p[2], (u16)buf.n);
+ }
+
+ /* Set up the new page-index array */
+ fts5BufferAppendVarint(&p->rc, &buf, 4);
+ if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
+ && pSeg->iEndofDoclist<pData->szLeaf
+ ){
+ int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
+ fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
+ fts5BufferAppendBlob(&p->rc, &buf,
+ pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
+ );
+ }
+
+ fts5DataRelease(pData);
+ pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
+ fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
+ fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
+ }
+ }
+ }
+ fts5BufferFree(&buf);
+}
+
+static void fts5MergeChunkCallback(
+ Fts5Index *p,
+ void *pCtx,
+ const u8 *pChunk, int nChunk
+){
+ Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
+ fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
+}
+
+/*
+**
+*/
+static void fts5IndexMergeLevel(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
+ int iLvl, /* Level to read input from */
+ int *pnRem /* Write up to this many output leaves */
+){
+ Fts5Structure *pStruct = *ppStruct;
+ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
+ Fts5StructureLevel *pLvlOut;
+ Fts5Iter *pIter = 0; /* Iterator to read input data */
+ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
+ int nInput; /* Number of input segments */
+ Fts5SegWriter writer; /* Writer object */
+ Fts5StructureSegment *pSeg; /* Output segment */
+ Fts5Buffer term;
+ int bOldest; /* True if the output segment is the oldest */
+ int eDetail = p->pConfig->eDetail;
+ const int flags = FTS5INDEX_QUERY_NOOUTPUT;
+
+ assert( iLvl<pStruct->nLevel );
+ assert( pLvl->nMerge<=pLvl->nSeg );
+
+ memset(&writer, 0, sizeof(Fts5SegWriter));
+ memset(&term, 0, sizeof(Fts5Buffer));
+ if( pLvl->nMerge ){
+ pLvlOut = &pStruct->aLevel[iLvl+1];
+ assert( pLvlOut->nSeg>0 );
+ nInput = pLvl->nMerge;
+ pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
+
+ fts5WriteInit(p, &writer, pSeg->iSegid);
+ writer.writer.pgno = pSeg->pgnoLast+1;
+ writer.iBtPage = 0;
+ }else{
+ int iSegid = fts5AllocateSegid(p, pStruct);
+
+ /* Extend the Fts5Structure object as required to ensure the output
+ ** segment exists. */
+ if( iLvl==pStruct->nLevel-1 ){
+ fts5StructureAddLevel(&p->rc, ppStruct);
+ pStruct = *ppStruct;
+ }
+ fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
+ if( p->rc ) return;
+ pLvl = &pStruct->aLevel[iLvl];
+ pLvlOut = &pStruct->aLevel[iLvl+1];
+
+ fts5WriteInit(p, &writer, iSegid);
+
+ /* Add the new segment to the output level */
+ pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
+ pLvlOut->nSeg++;
+ pSeg->pgnoFirst = 1;
+ pSeg->iSegid = iSegid;
+ pStruct->nSegment++;
+
+ /* Read input from all segments in the input level */
+ nInput = pLvl->nSeg;
+ }
+ bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
+
+ assert( iLvl>=0 );
+ for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
+ fts5MultiIterEof(p, pIter)==0;
+ fts5MultiIterNext(p, pIter, 0, 0)
+ ){
+ Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
+ int nPos; /* position-list size field value */
+ int nTerm;
+ const u8 *pTerm;
+
+ /* Check for key annihilation. */
+ if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
+
+ pTerm = fts5MultiIterTerm(pIter, &nTerm);
+ if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
+ if( pnRem && writer.nLeafWritten>nRem ){
+ break;
+ }
+
+ /* This is a new term. Append a term to the output segment. */
+ fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
+ fts5BufferSet(&p->rc, &term, nTerm, pTerm);
+ }
+
+ /* Append the rowid to the output */
+ /* WRITEPOSLISTSIZE */
+ fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
+
+ if( eDetail==FTS5_DETAIL_NONE ){
+ if( pSegIter->bDel ){
+ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
+ if( pSegIter->nPos>0 ){
+ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
+ }
+ }
+ }else{
+ /* Append the position-list data to the output */
+ nPos = pSegIter->nPos*2 + pSegIter->bDel;
+ fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
+ fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
+ }
+ }
+
+ /* Flush the last leaf page to disk. Set the output segment b-tree height
+ ** and last leaf page number at the same time. */
+ fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
+
+ if( fts5MultiIterEof(p, pIter) ){
+ int i;
+
+ /* Remove the redundant segments from the %_data table */
+ for(i=0; i<nInput; i++){
+ fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
+ }
+
+ /* Remove the redundant segments from the input level */
+ if( pLvl->nSeg!=nInput ){
+ int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
+ memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
+ }
+ pStruct->nSegment -= nInput;
+ pLvl->nSeg -= nInput;
+ pLvl->nMerge = 0;
+ if( pSeg->pgnoLast==0 ){
+ pLvlOut->nSeg--;
+ pStruct->nSegment--;
+ }
+ }else{
+ assert( pSeg->pgnoLast>0 );
+ fts5TrimSegments(p, pIter);
+ pLvl->nMerge = nInput;
+ }
+
+ fts5MultiIterFree(pIter);
+ fts5BufferFree(&term);
+ if( pnRem ) *pnRem -= writer.nLeafWritten;
+}
+
+/*
+** Do up to nPg pages of automerge work on the index.
+*/
+static void fts5IndexMerge(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
+ int nPg /* Pages of work to do */
+){
+ int nRem = nPg;
+ Fts5Structure *pStruct = *ppStruct;
+ while( nRem>0 && p->rc==SQLITE_OK ){
+ int iLvl; /* To iterate through levels */
+ int iBestLvl = 0; /* Level offering the most input segments */
+ int nBest = 0; /* Number of input segments on best level */
+
+ /* Set iBestLvl to the level to read input segments from. */
+ assert( pStruct->nLevel>0 );
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
+ if( pLvl->nMerge ){
+ if( pLvl->nMerge>nBest ){
+ iBestLvl = iLvl;
+ nBest = pLvl->nMerge;
+ }
+ break;
+ }
+ if( pLvl->nSeg>nBest ){
+ nBest = pLvl->nSeg;
+ iBestLvl = iLvl;
+ }
+ }
+
+ /* If nBest is still 0, then the index must be empty. */
+#ifdef SQLITE_DEBUG
+ for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
+ assert( pStruct->aLevel[iLvl].nSeg==0 );
+ }
+#endif
+
+ if( nBest<p->pConfig->nAutomerge
+ && pStruct->aLevel[iBestLvl].nMerge==0
+ ){
+ break;
+ }
+ fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
+ if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
+ fts5StructurePromote(p, iBestLvl+1, pStruct);
+ }
+ }
+ *ppStruct = pStruct;
+}
+
+/*
+** A total of nLeaf leaf pages of data has just been flushed to a level-0
+** segment. This function updates the write-counter accordingly and, if
+** necessary, performs incremental merge work.
+**
+** If an error occurs, set the Fts5Index.rc error code. If an error has
+** already occurred, this function is a no-op.
+*/
+static void fts5IndexAutomerge(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
+ int nLeaf /* Number of output leaves just written */
+){
+ if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
+ Fts5Structure *pStruct = *ppStruct;
+ u64 nWrite; /* Initial value of write-counter */
+ int nWork; /* Number of work-quanta to perform */
+ int nRem; /* Number of leaf pages left to write */
+
+ /* Update the write-counter. While doing so, set nWork. */
+ nWrite = pStruct->nWriteCounter;
+ nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
+ pStruct->nWriteCounter += nLeaf;
+ nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
+
+ fts5IndexMerge(p, ppStruct, nRem);
+ }
+}
+
+static void fts5IndexCrisismerge(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
+){
+ const int nCrisis = p->pConfig->nCrisisMerge;
+ Fts5Structure *pStruct = *ppStruct;
+ int iLvl = 0;
+
+ assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
+ while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
+ fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
+ assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
+ fts5StructurePromote(p, iLvl+1, pStruct);
+ iLvl++;
+ }
+ *ppStruct = pStruct;
+}
+
+static int fts5IndexReturn(Fts5Index *p){
+ int rc = p->rc;
+ p->rc = SQLITE_OK;
+ return rc;
+}
+
+typedef struct Fts5FlushCtx Fts5FlushCtx;
+struct Fts5FlushCtx {
+ Fts5Index *pIdx;
+ Fts5SegWriter writer;
+};
+
+/*
+** Buffer aBuf[] contains a list of varints, all small enough to fit
+** in a 32-bit integer. Return the size of the largest prefix of this
+** list nMax bytes or less in size.
+*/
+static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
+ int ret;
+ u32 dummy;
+ ret = fts5GetVarint32(aBuf, dummy);
+ if( ret<nMax ){
+ while( 1 ){
+ int i = fts5GetVarint32(&aBuf[ret], dummy);
+ if( (ret + i) > nMax ) break;
+ ret += i;
+ }
+ }
+ return ret;
+}
+
+/*
+** Flush the contents of in-memory hash table iHash to a new level-0
+** segment on disk. Also update the corresponding structure record.
+**
+** If an error occurs, set the Fts5Index.rc error code. If an error has
+** already occurred, this function is a no-op.
+*/
+static void fts5FlushOneHash(Fts5Index *p){
+ Fts5Hash *pHash = p->pHash;
+ Fts5Structure *pStruct;
+ int iSegid;
+ int pgnoLast = 0; /* Last leaf page number in segment */
+
+ /* Obtain a reference to the index structure and allocate a new segment-id
+ ** for the new level-0 segment. */
+ pStruct = fts5StructureRead(p);
+ iSegid = fts5AllocateSegid(p, pStruct);
+
+ if( iSegid ){
+ const int pgsz = p->pConfig->pgsz;
+ int eDetail = p->pConfig->eDetail;
+ Fts5StructureSegment *pSeg; /* New segment within pStruct */
+ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
+ Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
+
+ Fts5SegWriter writer;
+ fts5WriteInit(p, &writer, iSegid);
+
+ pBuf = &writer.writer.buf;
+ pPgidx = &writer.writer.pgidx;
+
+ /* fts5WriteInit() should have initialized the buffers to (most likely)
+ ** the maximum space required. */
+ assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
+ assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
+
+ /* Begin scanning through hash table entries. This loop runs once for each
+ ** term/doclist currently stored within the hash table. */
+ if( p->rc==SQLITE_OK ){
+ p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
+ }
+ while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
+ const char *zTerm; /* Buffer containing term */
+ const u8 *pDoclist; /* Pointer to doclist for this term */
+ int nDoclist; /* Size of doclist in bytes */
+
+ /* Write the term for this entry to disk. */
+ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
+ fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
+
+ assert( writer.bFirstRowidInPage==0 );
+ if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
+ /* The entire doclist will fit on the current leaf. */
+ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
+ }else{
+ i64 iRowid = 0;
+ i64 iDelta = 0;
+ int iOff = 0;
+
+ /* The entire doclist will not fit on this leaf. The following
+ ** loop iterates through the poslists that make up the current
+ ** doclist. */
+ while( p->rc==SQLITE_OK && iOff<nDoclist ){
+ iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
+ iRowid += iDelta;
+
+ if( writer.bFirstRowidInPage ){
+ fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
+ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
+ writer.bFirstRowidInPage = 0;
+ fts5WriteDlidxAppend(p, &writer, iRowid);
+ }else{
+ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
+ }
+ assert( pBuf->n<=pBuf->nSpace );
+
+ if( eDetail==FTS5_DETAIL_NONE ){
+ if( iOff<nDoclist && pDoclist[iOff]==0 ){
+ pBuf->p[pBuf->n++] = 0;
+ iOff++;
+ if( iOff<nDoclist && pDoclist[iOff]==0 ){
+ pBuf->p[pBuf->n++] = 0;
+ iOff++;
+ }
+ }
+ if( (pBuf->n + pPgidx->n)>=pgsz ){
+ fts5WriteFlushLeaf(p, &writer);
+ }
+ }else{
+ int bDummy;
+ int nPos;
+ int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
+ nCopy += nPos;
+ if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
+ /* The entire poslist will fit on the current leaf. So copy
+ ** it in one go. */
+ fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
+ }else{
+ /* The entire poslist will not fit on this leaf. So it needs
+ ** to be broken into sections. The only qualification being
+ ** that each varint must be stored contiguously. */
+ const u8 *pPoslist = &pDoclist[iOff];
+ int iPos = 0;
+ while( p->rc==SQLITE_OK ){
+ int nSpace = pgsz - pBuf->n - pPgidx->n;
+ int n = 0;
+ if( (nCopy - iPos)<=nSpace ){
+ n = nCopy - iPos;
+ }else{
+ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
+ }
+ assert( n>0 );
+ fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
+ iPos += n;
+ if( (pBuf->n + pPgidx->n)>=pgsz ){
+ fts5WriteFlushLeaf(p, &writer);
+ }
+ if( iPos>=nCopy ) break;
+ }
+ }
+ iOff += nCopy;
+ }
+ }
+ }
+
+ /* TODO2: Doclist terminator written here. */
+ /* pBuf->p[pBuf->n++] = '\0'; */
+ assert( pBuf->n<=pBuf->nSpace );
+ sqlite3Fts5HashScanNext(pHash);
+ }
+ sqlite3Fts5HashClear(pHash);
+ fts5WriteFinish(p, &writer, &pgnoLast);
+
+ /* Update the Fts5Structure. It is written back to the database by the
+ ** fts5StructureRelease() call below. */
+ if( pStruct->nLevel==0 ){
+ fts5StructureAddLevel(&p->rc, &pStruct);
+ }
+ fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
+ if( p->rc==SQLITE_OK ){
+ pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
+ pSeg->iSegid = iSegid;
+ pSeg->pgnoFirst = 1;
+ pSeg->pgnoLast = pgnoLast;
+ pStruct->nSegment++;
+ }
+ fts5StructurePromote(p, 0, pStruct);
+ }
+
+ fts5IndexAutomerge(p, &pStruct, pgnoLast);
+ fts5IndexCrisismerge(p, &pStruct);
+ fts5StructureWrite(p, pStruct);
+ fts5StructureRelease(pStruct);
+}
+
+/*
+** Flush any data stored in the in-memory hash tables to the database.
+*/
+static void fts5IndexFlush(Fts5Index *p){
+ /* Unless it is empty, flush the hash table to disk */
+ if( p->nPendingData ){
+ assert( p->pHash );
+ p->nPendingData = 0;
+ fts5FlushOneHash(p);
+ }
+}
+
+
+static int sqlite3Fts5IndexOptimize(Fts5Index *p){
+ Fts5Structure *pStruct;
+ Fts5Structure *pNew = 0;
+ int nSeg = 0;
+
+ assert( p->rc==SQLITE_OK );
+ fts5IndexFlush(p);
+ pStruct = fts5StructureRead(p);
+
+ if( pStruct ){
+ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
+ nSeg = pStruct->nSegment;
+ if( nSeg>1 ){
+ int nByte = sizeof(Fts5Structure);
+ nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
+ pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
+ }
+ }
+ if( pNew ){
+ Fts5StructureLevel *pLvl;
+ int nByte = nSeg * sizeof(Fts5StructureSegment);
+ pNew->nLevel = pStruct->nLevel+1;
+ pNew->nRef = 1;
+ pNew->nWriteCounter = pStruct->nWriteCounter;
+ pLvl = &pNew->aLevel[pStruct->nLevel];
+ pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
+ if( pLvl->aSeg ){
+ int iLvl, iSeg;
+ int iSegOut = 0;
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
+ pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
+ iSegOut++;
+ }
+ }
+ pNew->nSegment = pLvl->nSeg = nSeg;
+ }else{
+ sqlite3_free(pNew);
+ pNew = 0;
+ }
+ }
+
+ if( pNew ){
+ int iLvl = pNew->nLevel-1;
+ while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
+ int nRem = FTS5_OPT_WORK_UNIT;
+ fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
+ }
+
+ fts5StructureWrite(p, pNew);
+ fts5StructureRelease(pNew);
+ }
+
+ fts5StructureRelease(pStruct);
+ return fts5IndexReturn(p);
+}
+
+static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
+ Fts5Structure *pStruct;
+
+ pStruct = fts5StructureRead(p);
+ if( pStruct && pStruct->nLevel ){
+ fts5IndexMerge(p, &pStruct, nMerge);
+ fts5StructureWrite(p, pStruct);
+ }
+ fts5StructureRelease(pStruct);
+
+ return fts5IndexReturn(p);
+}
+
+static void fts5AppendRowid(
+ Fts5Index *p,
+ i64 iDelta,
+ Fts5Iter *pUnused,
+ Fts5Buffer *pBuf
+){
+ UNUSED_PARAM(pUnused);
+ fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
+}
+
+static void fts5AppendPoslist(
+ Fts5Index *p,
+ i64 iDelta,
+ Fts5Iter *pMulti,
+ Fts5Buffer *pBuf
+){
+ int nData = pMulti->base.nData;
+ assert( nData>0 );
+ if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){
+ fts5BufferSafeAppendVarint(pBuf, iDelta);
+ fts5BufferSafeAppendVarint(pBuf, nData*2);
+ fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
+ }
+}
+
+
+static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
+ u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
+
+ assert( pIter->aPoslist );
+ if( p>=pIter->aEof ){
+ pIter->aPoslist = 0;
+ }else{
+ i64 iDelta;
+
+ p += fts5GetVarint(p, (u64*)&iDelta);
+ pIter->iRowid += iDelta;
+
+ /* Read position list size */
+ if( p[0] & 0x80 ){
+ int nPos;
+ pIter->nSize = fts5GetVarint32(p, nPos);
+ pIter->nPoslist = (nPos>>1);
+ }else{
+ pIter->nPoslist = ((int)(p[0])) >> 1;
+ pIter->nSize = 1;
+ }
+
+ pIter->aPoslist = p;
+ }
+}
+
+static void fts5DoclistIterInit(
+ Fts5Buffer *pBuf,
+ Fts5DoclistIter *pIter
+){
+ memset(pIter, 0, sizeof(*pIter));
+ pIter->aPoslist = pBuf->p;
+ pIter->aEof = &pBuf->p[pBuf->n];
+ fts5DoclistIterNext(pIter);
+}
+
+#if 0
+/*
+** Append a doclist to buffer pBuf.
+**
+** This function assumes that space within the buffer has already been
+** allocated.
+*/
+static void fts5MergeAppendDocid(
+ Fts5Buffer *pBuf, /* Buffer to write to */
+ i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
+ i64 iRowid /* Rowid to append */
+){
+ assert( pBuf->n!=0 || (*piLastRowid)==0 );
+ fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
+ *piLastRowid = iRowid;
+}
+#endif
+
+#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
+ assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
+ fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \
+ (iLastRowid) = (iRowid); \
+}
+
+/*
+** Swap the contents of buffer *p1 with that of *p2.
+*/
+static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
+ Fts5Buffer tmp = *p1;
+ *p1 = *p2;
+ *p2 = tmp;
+}
+
+static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
+ int i = *piOff;
+ if( i>=pBuf->n ){
+ *piOff = -1;
+ }else{
+ u64 iVal;
+ *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
+ *piRowid += iVal;
+ }
+}
+
+/*
+** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
+** In this case the buffers consist of a delta-encoded list of rowids only.
+*/
+static void fts5MergeRowidLists(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5Buffer *p1, /* First list to merge */
+ Fts5Buffer *p2 /* Second list to merge */
+){
+ int i1 = 0;
+ int i2 = 0;
+ i64 iRowid1 = 0;
+ i64 iRowid2 = 0;
+ i64 iOut = 0;
+
+ Fts5Buffer out;
+ memset(&out, 0, sizeof(out));
+ sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
+ if( p->rc ) return;
+
+ fts5NextRowid(p1, &i1, &iRowid1);
+ fts5NextRowid(p2, &i2, &iRowid2);
+ while( i1>=0 || i2>=0 ){
+ if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
+ assert( iOut==0 || iRowid1>iOut );
+ fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
+ iOut = iRowid1;
+ fts5NextRowid(p1, &i1, &iRowid1);
+ }else{
+ assert( iOut==0 || iRowid2>iOut );
+ fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
+ iOut = iRowid2;
+ if( i1>=0 && iRowid1==iRowid2 ){
+ fts5NextRowid(p1, &i1, &iRowid1);
+ }
+ fts5NextRowid(p2, &i2, &iRowid2);
+ }
+ }
+
+ fts5BufferSwap(&out, p1);
+ fts5BufferFree(&out);
+}
+
+/*
+** Buffers p1 and p2 contain doclists. This function merges the content
+** of the two doclists together and sets buffer p1 to the result before
+** returning.
+**
+** If an error occurs, an error code is left in p->rc. If an error has
+** already occurred, this function is a no-op.
+*/
+static void fts5MergePrefixLists(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5Buffer *p1, /* First list to merge */
+ Fts5Buffer *p2 /* Second list to merge */
+){
+ if( p2->n ){
+ i64 iLastRowid = 0;
+ Fts5DoclistIter i1;
+ Fts5DoclistIter i2;
+ Fts5Buffer out = {0, 0, 0};
+ Fts5Buffer tmp = {0, 0, 0};
+
+ if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return;
+ fts5DoclistIterInit(p1, &i1);
+ fts5DoclistIterInit(p2, &i2);
+
+ while( 1 ){
+ if( i1.iRowid<i2.iRowid ){
+ /* Copy entry from i1 */
+ fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
+ fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize);
+ fts5DoclistIterNext(&i1);
+ if( i1.aPoslist==0 ) break;
+ }
+ else if( i2.iRowid!=i1.iRowid ){
+ /* Copy entry from i2 */
+ fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
+ fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize);
+ fts5DoclistIterNext(&i2);
+ if( i2.aPoslist==0 ) break;
+ }
+ else{
+ /* Merge the two position lists. */
+ i64 iPos1 = 0;
+ i64 iPos2 = 0;
+ int iOff1 = 0;
+ int iOff2 = 0;
+ u8 *a1 = &i1.aPoslist[i1.nSize];
+ u8 *a2 = &i2.aPoslist[i2.nSize];
+
+ i64 iPrev = 0;
+ Fts5PoslistWriter writer;
+ memset(&writer, 0, sizeof(writer));
+
+ fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
+ fts5BufferZero(&tmp);
+ sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist);
+ if( p->rc ) break;
+
+ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
+ sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
+ assert( iPos1>=0 && iPos2>=0 );
+
+ if( iPos1<iPos2 ){
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
+ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
+ }else{
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
+ sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
+ }
+
+ if( iPos1>=0 && iPos2>=0 ){
+ while( 1 ){
+ if( iPos1<iPos2 ){
+ if( iPos1!=iPrev ){
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
+ }
+ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1);
+ if( iPos1<0 ) break;
+ }else{
+ assert( iPos2!=iPrev );
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
+ sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2);
+ if( iPos2<0 ) break;
+ }
+ }
+ }
+
+ if( iPos1>=0 ){
+ if( iPos1!=iPrev ){
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1);
+ }
+ fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1);
+ }else{
+ assert( iPos2>=0 && iPos2!=iPrev );
+ sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2);
+ fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2);
+ }
+
+ /* WRITEPOSLISTSIZE */
+ fts5BufferSafeAppendVarint(&out, tmp.n * 2);
+ fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
+ fts5DoclistIterNext(&i1);
+ fts5DoclistIterNext(&i2);
+ if( i1.aPoslist==0 || i2.aPoslist==0 ) break;
+ }
+ }
+
+ if( i1.aPoslist ){
+ fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid);
+ fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist);
+ }
+ else if( i2.aPoslist ){
+ fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid);
+ fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist);
+ }
+
+ fts5BufferSet(&p->rc, p1, out.n, out.p);
+ fts5BufferFree(&tmp);
+ fts5BufferFree(&out);
+ }
+}
+
+static void fts5SetupPrefixIter(
+ Fts5Index *p, /* Index to read from */
+ int bDesc, /* True for "ORDER BY rowid DESC" */
+ const u8 *pToken, /* Buffer containing prefix to match */
+ int nToken, /* Size of buffer pToken in bytes */
+ Fts5Colset *pColset, /* Restrict matches to these columns */
+ Fts5Iter **ppIter /* OUT: New iterator */
+){
+ Fts5Structure *pStruct;
+ Fts5Buffer *aBuf;
+ const int nBuf = 32;
+
+ void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*);
+ void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*);
+ if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
+ xMerge = fts5MergeRowidLists;
+ xAppend = fts5AppendRowid;
+ }else{
+ xMerge = fts5MergePrefixLists;
+ xAppend = fts5AppendPoslist;
+ }
+
+ aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
+ pStruct = fts5StructureRead(p);
+
+ if( aBuf && pStruct ){
+ const int flags = FTS5INDEX_QUERY_SCAN
+ | FTS5INDEX_QUERY_SKIPEMPTY
+ | FTS5INDEX_QUERY_NOOUTPUT;
+ int i;
+ i64 iLastRowid = 0;
+ Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
+ Fts5Data *pData;
+ Fts5Buffer doclist;
+ int bNewTerm = 1;
+
+ memset(&doclist, 0, sizeof(doclist));
+ fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
+ fts5IterSetOutputCb(&p->rc, p1);
+ for( /* no-op */ ;
+ fts5MultiIterEof(p, p1)==0;
+ fts5MultiIterNext2(p, p1, &bNewTerm)
+ ){
+ Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
+ int nTerm = pSeg->term.n;
+ const u8 *pTerm = pSeg->term.p;
+ p1->xSetOutputs(p1, pSeg);
+
+ assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
+ if( bNewTerm ){
+ if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
+ }
+
+ if( p1->base.nData==0 ) continue;
+
+ if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
+ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
+ assert( i<nBuf );
+ if( aBuf[i].n==0 ){
+ fts5BufferSwap(&doclist, &aBuf[i]);
+ fts5BufferZero(&doclist);
+ }else{
+ xMerge(p, &doclist, &aBuf[i]);
+ fts5BufferZero(&aBuf[i]);
+ }
+ }
+ iLastRowid = 0;
+ }
+
+ xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist);
+ iLastRowid = p1->base.iRowid;
+ }
+
+ for(i=0; i<nBuf; i++){
+ if( p->rc==SQLITE_OK ){
+ xMerge(p, &doclist, &aBuf[i]);
+ }
+ fts5BufferFree(&aBuf[i]);
+ }
+ fts5MultiIterFree(p1);
+
+ pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
+ if( pData ){
+ pData->p = (u8*)&pData[1];
+ pData->nn = pData->szLeaf = doclist.n;
+ memcpy(pData->p, doclist.p, doclist.n);
+ fts5MultiIterNew2(p, pData, bDesc, ppIter);
+ }
+ fts5BufferFree(&doclist);
+ }
+
+ fts5StructureRelease(pStruct);
+ sqlite3_free(aBuf);
+}
+
+
+/*
+** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
+** to the document with rowid iRowid.
+*/
+static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
+ assert( p->rc==SQLITE_OK );
+
+ /* Allocate the hash table if it has not already been allocated */
+ if( p->pHash==0 ){
+ p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
+ }
+
+ /* Flush the hash table to disk if required */
+ if( iRowid<p->iWriteRowid
+ || (iRowid==p->iWriteRowid && p->bDelete==0)
+ || (p->nPendingData > p->pConfig->nHashSize)
+ ){
+ fts5IndexFlush(p);
+ }
+
+ p->iWriteRowid = iRowid;
+ p->bDelete = bDelete;
+ return fts5IndexReturn(p);
+}
+
+/*
+** Commit data to disk.
+*/
+static int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){
+ assert( p->rc==SQLITE_OK );
+ fts5IndexFlush(p);
+ if( bCommit ) fts5CloseReader(p);
+ return fts5IndexReturn(p);
+}
+
+/*
+** Discard any data stored in the in-memory hash tables. Do not write it
+** to the database. Additionally, assume that the contents of the %_data
+** table may have changed on disk. So any in-memory caches of %_data
+** records must be invalidated.
+*/
+static int sqlite3Fts5IndexRollback(Fts5Index *p){
+ fts5CloseReader(p);
+ fts5IndexDiscardData(p);
+ /* assert( p->rc==SQLITE_OK ); */
+ return SQLITE_OK;
+}
+
+/*
+** The %_data table is completely empty when this function is called. This
+** function populates it with the initial structure objects for each index,
+** and the initial version of the "averages" record (a zero-byte blob).
+*/
+static int sqlite3Fts5IndexReinit(Fts5Index *p){
+ Fts5Structure s;
+ memset(&s, 0, sizeof(Fts5Structure));
+ fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
+ fts5StructureWrite(p, &s);
+ return fts5IndexReturn(p);
+}
+
+/*
+** Open a new Fts5Index handle. If the bCreate argument is true, create
+** and initialize the underlying %_data table.
+**
+** If successful, set *pp to point to the new object and return SQLITE_OK.
+** Otherwise, set *pp to NULL and return an SQLite error code.
+*/
+static int sqlite3Fts5IndexOpen(
+ Fts5Config *pConfig,
+ int bCreate,
+ Fts5Index **pp,
+ char **pzErr
+){
+ int rc = SQLITE_OK;
+ Fts5Index *p; /* New object */
+
+ *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
+ if( rc==SQLITE_OK ){
+ p->pConfig = pConfig;
+ p->nWorkUnit = FTS5_WORK_UNIT;
+ p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
+ if( p->zDataTbl && bCreate ){
+ rc = sqlite3Fts5CreateTable(
+ pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
+ );
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5CreateTable(pConfig, "idx",
+ "segid, term, pgno, PRIMARY KEY(segid, term)",
+ 1, pzErr
+ );
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IndexReinit(p);
+ }
+ }
+ }
+
+ assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
+ if( rc ){
+ sqlite3Fts5IndexClose(p);
+ *pp = 0;
+ }
+ return rc;
+}
+
+/*
+** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
+*/
+static int sqlite3Fts5IndexClose(Fts5Index *p){
+ int rc = SQLITE_OK;
+ if( p ){
+ assert( p->pReader==0 );
+ sqlite3_finalize(p->pWriter);
+ sqlite3_finalize(p->pDeleter);
+ sqlite3_finalize(p->pIdxWriter);
+ sqlite3_finalize(p->pIdxDeleter);
+ sqlite3_finalize(p->pIdxSelect);
+ sqlite3Fts5HashFree(p->pHash);
+ sqlite3_free(p->zDataTbl);
+ sqlite3_free(p);
+ }
+ return rc;
+}
+
+/*
+** Argument p points to a buffer containing utf-8 text that is n bytes in
+** size. Return the number of bytes in the nChar character prefix of the
+** buffer, or 0 if there are less than nChar characters in total.
+*/
+static int sqlite3Fts5IndexCharlenToBytelen(
+ const char *p,
+ int nByte,
+ int nChar
+){
+ int n = 0;
+ int i;
+ for(i=0; i<nChar; i++){
+ if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
+ if( (unsigned char)p[n++]>=0xc0 ){
+ while( (p[n] & 0xc0)==0x80 ) n++;
+ }
+ }
+ return n;
+}
+
+/*
+** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
+** unicode characters in the string.
+*/
+static int fts5IndexCharlen(const char *pIn, int nIn){
+ int nChar = 0;
+ int i = 0;
+ while( i<nIn ){
+ if( (unsigned char)pIn[i++]>=0xc0 ){
+ while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
+ }
+ nChar++;
+ }
+ return nChar;
+}
+
+/*
+** Insert or remove data to or from the index. Each time a document is
+** added to or removed from the index, this function is called one or more
+** times.
+**
+** For an insert, it must be called once for each token in the new document.
+** If the operation is a delete, it must be called (at least) once for each
+** unique token in the document with an iCol value less than zero. The iPos
+** argument is ignored for a delete.
+*/
+static int sqlite3Fts5IndexWrite(
+ Fts5Index *p, /* Index to write to */
+ int iCol, /* Column token appears in (-ve -> delete) */
+ int iPos, /* Position of token within column */
+ const char *pToken, int nToken /* Token to add or remove to or from index */
+){
+ int i; /* Used to iterate through indexes */
+ int rc = SQLITE_OK; /* Return code */
+ Fts5Config *pConfig = p->pConfig;
+
+ assert( p->rc==SQLITE_OK );
+ assert( (iCol<0)==p->bDelete );
+
+ /* Add the entry to the main terms index. */
+ rc = sqlite3Fts5HashWrite(
+ p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
+ );
+
+ for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
+ const int nChar = pConfig->aPrefix[i];
+ int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
+ if( nByte ){
+ rc = sqlite3Fts5HashWrite(p->pHash,
+ p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
+ nByte
+ );
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Open a new iterator to iterate though all rowid that match the
+** specified token or token prefix.
+*/
+static int sqlite3Fts5IndexQuery(
+ Fts5Index *p, /* FTS index to query */
+ const char *pToken, int nToken, /* Token (or prefix) to query for */
+ int flags, /* Mask of FTS5INDEX_QUERY_X flags */
+ Fts5Colset *pColset, /* Match these columns only */
+ Fts5IndexIter **ppIter /* OUT: New iterator object */
+){
+ Fts5Config *pConfig = p->pConfig;
+ Fts5Iter *pRet = 0;
+ Fts5Buffer buf = {0, 0, 0};
+
+ /* If the QUERY_SCAN flag is set, all other flags must be clear. */
+ assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
+
+ if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
+ int iIdx = 0; /* Index to search */
+ memcpy(&buf.p[1], pToken, nToken);
+
+ /* Figure out which index to search and set iIdx accordingly. If this
+ ** is a prefix query for which there is no prefix index, set iIdx to
+ ** greater than pConfig->nPrefix to indicate that the query will be
+ ** satisfied by scanning multiple terms in the main index.
+ **
+ ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
+ ** prefix-query. Instead of using a prefix-index (if one exists),
+ ** evaluate the prefix query using the main FTS index. This is used
+ ** for internal sanity checking by the integrity-check in debug
+ ** mode only. */
+#ifdef SQLITE_DEBUG
+ if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
+ assert( flags & FTS5INDEX_QUERY_PREFIX );
+ iIdx = 1+pConfig->nPrefix;
+ }else
+#endif
+ if( flags & FTS5INDEX_QUERY_PREFIX ){
+ int nChar = fts5IndexCharlen(pToken, nToken);
+ for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
+ if( pConfig->aPrefix[iIdx-1]==nChar ) break;
+ }
+ }
+
+ if( iIdx<=pConfig->nPrefix ){
+ /* Straight index lookup */
+ Fts5Structure *pStruct = fts5StructureRead(p);
+ buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
+ if( pStruct ){
+ fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
+ pColset, buf.p, nToken+1, -1, 0, &pRet
+ );
+ fts5StructureRelease(pStruct);
+ }
+ }else{
+ /* Scan multiple terms in the main index */
+ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
+ buf.p[0] = FTS5_MAIN_PREFIX;
+ fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet);
+ assert( p->rc!=SQLITE_OK || pRet->pColset==0 );
+ fts5IterSetOutputCb(&p->rc, pRet);
+ if( p->rc==SQLITE_OK ){
+ Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
+ if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
+ }
+ }
+
+ if( p->rc ){
+ sqlite3Fts5IterClose(&pRet->base);
+ pRet = 0;
+ fts5CloseReader(p);
+ }
+
+ *ppIter = &pRet->base;
+ sqlite3Fts5BufferFree(&buf);
+ }
+ return fts5IndexReturn(p);
+}
+
+/*
+** Return true if the iterator passed as the only argument is at EOF.
+*/
+/*
+** Move to the next matching rowid.
+*/
+static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
+ assert( pIter->pIndex->rc==SQLITE_OK );
+ fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
+ return fts5IndexReturn(pIter->pIndex);
+}
+
+/*
+** Move to the next matching term/rowid. Used by the fts5vocab module.
+*/
+static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
+ Fts5Index *p = pIter->pIndex;
+
+ assert( pIter->pIndex->rc==SQLITE_OK );
+
+ fts5MultiIterNext(p, pIter, 0, 0);
+ if( p->rc==SQLITE_OK ){
+ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
+ if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
+ fts5DataRelease(pSeg->pLeaf);
+ pSeg->pLeaf = 0;
+ pIter->base.bEof = 1;
+ }
+ }
+
+ return fts5IndexReturn(pIter->pIndex);
+}
+
+/*
+** Move to the next matching rowid that occurs at or after iMatch. The
+** definition of "at or after" depends on whether this iterator iterates
+** in ascending or descending rowid order.
+*/
+static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
+ fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
+ return fts5IndexReturn(pIter->pIndex);
+}
+
+/*
+** Return the current term.
+*/
+static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
+ int n;
+ const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
+ *pn = n-1;
+ return &z[1];
+}
+
+/*
+** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
+*/
+static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
+ if( pIndexIter ){
+ Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
+ Fts5Index *pIndex = pIter->pIndex;
+ fts5MultiIterFree(pIter);
+ fts5CloseReader(pIndex);
+ }
+}
+
+/*
+** Read and decode the "averages" record from the database.
+**
+** Parameter anSize must point to an array of size nCol, where nCol is
+** the number of user defined columns in the FTS table.
+*/
+static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
+ int nCol = p->pConfig->nCol;
+ Fts5Data *pData;
+
+ *pnRow = 0;
+ memset(anSize, 0, sizeof(i64) * nCol);
+ pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
+ if( p->rc==SQLITE_OK && pData->nn ){
+ int i = 0;
+ int iCol;
+ i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
+ for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
+ i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
+ }
+ }
+
+ fts5DataRelease(pData);
+ return fts5IndexReturn(p);
+}
+
+/*
+** Replace the current "averages" record with the contents of the buffer
+** supplied as the second argument.
+*/
+static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
+ assert( p->rc==SQLITE_OK );
+ fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
+ return fts5IndexReturn(p);
+}
+
+/*
+** Return the total number of blocks this module has read from the %_data
+** table since it was created.
+*/
+static int sqlite3Fts5IndexReads(Fts5Index *p){
+ return p->nRead;
+}
+
+/*
+** Set the 32-bit cookie value stored at the start of all structure
+** records to the value passed as the second argument.
+**
+** Return SQLITE_OK if successful, or an SQLite error code if an error
+** occurs.
+*/
+static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
+ int rc; /* Return code */
+ Fts5Config *pConfig = p->pConfig; /* Configuration object */
+ u8 aCookie[4]; /* Binary representation of iNew */
+ sqlite3_blob *pBlob = 0;
+
+ assert( p->rc==SQLITE_OK );
+ sqlite3Fts5Put32(aCookie, iNew);
+
+ rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
+ "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
+ );
+ if( rc==SQLITE_OK ){
+ sqlite3_blob_write(pBlob, aCookie, 4, 0);
+ rc = sqlite3_blob_close(pBlob);
+ }
+
+ return rc;
+}
+
+static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
+ Fts5Structure *pStruct;
+ pStruct = fts5StructureRead(p);
+ fts5StructureRelease(pStruct);
+ return fts5IndexReturn(p);
+}
+
+
+/*************************************************************************
+**************************************************************************
+** Below this point is the implementation of the integrity-check
+** functionality.
+*/
+
+/*
+** Return a simple checksum value based on the arguments.
+*/
+static u64 sqlite3Fts5IndexEntryCksum(
+ i64 iRowid,
+ int iCol,
+ int iPos,
+ int iIdx,
+ const char *pTerm,
+ int nTerm
+){
+ int i;
+ u64 ret = iRowid;
+ ret += (ret<<3) + iCol;
+ ret += (ret<<3) + iPos;
+ if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
+ for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
+ return ret;
+}
+
+#ifdef SQLITE_DEBUG
+/*
+** This function is purely an internal test. It does not contribute to
+** FTS functionality, or even the integrity-check, in any way.
+**
+** Instead, it tests that the same set of pgno/rowid combinations are
+** visited regardless of whether the doclist-index identified by parameters
+** iSegid/iLeaf is iterated in forwards or reverse order.
+*/
+static void fts5TestDlidxReverse(
+ Fts5Index *p,
+ int iSegid, /* Segment id to load from */
+ int iLeaf /* Load doclist-index for this leaf */
+){
+ Fts5DlidxIter *pDlidx = 0;
+ u64 cksum1 = 13;
+ u64 cksum2 = 13;
+
+ for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
+ fts5DlidxIterEof(p, pDlidx)==0;
+ fts5DlidxIterNext(p, pDlidx)
+ ){
+ i64 iRowid = fts5DlidxIterRowid(pDlidx);
+ int pgno = fts5DlidxIterPgno(pDlidx);
+ assert( pgno>iLeaf );
+ cksum1 += iRowid + ((i64)pgno<<32);
+ }
+ fts5DlidxIterFree(pDlidx);
+ pDlidx = 0;
+
+ for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
+ fts5DlidxIterEof(p, pDlidx)==0;
+ fts5DlidxIterPrev(p, pDlidx)
+ ){
+ i64 iRowid = fts5DlidxIterRowid(pDlidx);
+ int pgno = fts5DlidxIterPgno(pDlidx);
+ assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
+ cksum2 += iRowid + ((i64)pgno<<32);
+ }
+ fts5DlidxIterFree(pDlidx);
+ pDlidx = 0;
+
+ if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
+}
+
+static int fts5QueryCksum(
+ Fts5Index *p, /* Fts5 index object */
+ int iIdx,
+ const char *z, /* Index key to query for */
+ int n, /* Size of index key in bytes */
+ int flags, /* Flags for Fts5IndexQuery */
+ u64 *pCksum /* IN/OUT: Checksum value */
+){
+ int eDetail = p->pConfig->eDetail;
+ u64 cksum = *pCksum;
+ Fts5IndexIter *pIter = 0;
+ int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
+
+ while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){
+ i64 rowid = pIter->iRowid;
+
+ if( eDetail==FTS5_DETAIL_NONE ){
+ cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
+ }else{
+ Fts5PoslistReader sReader;
+ for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
+ sReader.bEof==0;
+ sqlite3Fts5PoslistReaderNext(&sReader)
+ ){
+ int iCol = FTS5_POS2COLUMN(sReader.iPos);
+ int iOff = FTS5_POS2OFFSET(sReader.iPos);
+ cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
+ }
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IterNext(pIter);
+ }
+ }
+ sqlite3Fts5IterClose(pIter);
+
+ *pCksum = cksum;
+ return rc;
+}
+
+
+/*
+** This function is also purely an internal test. It does not contribute to
+** FTS functionality, or even the integrity-check, in any way.
+*/
+static void fts5TestTerm(
+ Fts5Index *p,
+ Fts5Buffer *pPrev, /* Previous term */
+ const char *z, int n, /* Possibly new term to test */
+ u64 expected,
+ u64 *pCksum
+){
+ int rc = p->rc;
+ if( pPrev->n==0 ){
+ fts5BufferSet(&rc, pPrev, n, (const u8*)z);
+ }else
+ if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
+ u64 cksum3 = *pCksum;
+ const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
+ int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
+ int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
+ int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
+ u64 ck1 = 0;
+ u64 ck2 = 0;
+
+ /* Check that the results returned for ASC and DESC queries are
+ ** the same. If not, call this corruption. */
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
+ if( rc==SQLITE_OK ){
+ int f = flags|FTS5INDEX_QUERY_DESC;
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
+ }
+ if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+
+ /* If this is a prefix query, check that the results returned if the
+ ** the index is disabled are the same. In both ASC and DESC order.
+ **
+ ** This check may only be performed if the hash table is empty. This
+ ** is because the hash table only supports a single scan query at
+ ** a time, and the multi-iter loop from which this function is called
+ ** is already performing such a scan. */
+ if( p->nPendingData==0 ){
+ if( iIdx>0 && rc==SQLITE_OK ){
+ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
+ ck2 = 0;
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
+ if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ }
+ if( iIdx>0 && rc==SQLITE_OK ){
+ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
+ ck2 = 0;
+ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
+ if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
+ }
+ }
+
+ cksum3 ^= ck1;
+ fts5BufferSet(&rc, pPrev, n, (const u8*)z);
+
+ if( rc==SQLITE_OK && cksum3!=expected ){
+ rc = FTS5_CORRUPT;
+ }
+ *pCksum = cksum3;
+ }
+ p->rc = rc;
+}
+
+#else
+# define fts5TestDlidxReverse(x,y,z)
+# define fts5TestTerm(u,v,w,x,y,z)
+#endif
+
+/*
+** Check that:
+**
+** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
+** contain zero terms.
+** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
+** contain zero rowids.
+*/
+static void fts5IndexIntegrityCheckEmpty(
+ Fts5Index *p,
+ Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
+ int iFirst,
+ int iNoRowid,
+ int iLast
+){
+ int i;
+
+ /* Now check that the iter.nEmpty leaves following the current leaf
+ ** (a) exist and (b) contain no terms. */
+ for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
+ Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
+ if( pLeaf ){
+ if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
+ if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
+ }
+ fts5DataRelease(pLeaf);
+ }
+}
+
+static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
+ int iTermOff = 0;
+ int ii;
+
+ Fts5Buffer buf1 = {0,0,0};
+ Fts5Buffer buf2 = {0,0,0};
+
+ ii = pLeaf->szLeaf;
+ while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
+ int res;
+ int iOff;
+ int nIncr;
+
+ ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
+ iTermOff += nIncr;
+ iOff = iTermOff;
+
+ if( iOff>=pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else if( iTermOff==nIncr ){
+ int nByte;
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
+ if( (iOff+nByte)>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
+ }
+ }else{
+ int nKeep, nByte;
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
+ if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ buf1.n = nKeep;
+ fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
+ }
+
+ if( p->rc==SQLITE_OK ){
+ res = fts5BufferCompare(&buf1, &buf2);
+ if( res<=0 ) p->rc = FTS5_CORRUPT;
+ }
+ }
+ fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
+ }
+
+ fts5BufferFree(&buf1);
+ fts5BufferFree(&buf2);
+}
+
+static void fts5IndexIntegrityCheckSegment(
+ Fts5Index *p, /* FTS5 backend object */
+ Fts5StructureSegment *pSeg /* Segment to check internal consistency */
+){
+ Fts5Config *pConfig = p->pConfig;
+ sqlite3_stmt *pStmt = 0;
+ int rc2;
+ int iIdxPrevLeaf = pSeg->pgnoFirst-1;
+ int iDlidxPrevLeaf = pSeg->pgnoLast;
+
+ if( pSeg->pgnoFirst==0 ) return;
+
+ fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
+ "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d",
+ pConfig->zDb, pConfig->zName, pSeg->iSegid
+ ));
+
+ /* Iterate through the b-tree hierarchy. */
+ while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
+ i64 iRow; /* Rowid for this leaf */
+ Fts5Data *pLeaf; /* Data for this leaf */
+
+ int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
+ const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
+ int iIdxLeaf = sqlite3_column_int(pStmt, 2);
+ int bIdxDlidx = sqlite3_column_int(pStmt, 3);
+
+ /* If the leaf in question has already been trimmed from the segment,
+ ** ignore this b-tree entry. Otherwise, load it into memory. */
+ if( iIdxLeaf<pSeg->pgnoFirst ) continue;
+ iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
+ pLeaf = fts5DataRead(p, iRow);
+ if( pLeaf==0 ) break;
+
+ /* Check that the leaf contains at least one term, and that it is equal
+ ** to or larger than the split-key in zIdxTerm. Also check that if there
+ ** is also a rowid pointer within the leaf page header, it points to a
+ ** location before the term. */
+ if( pLeaf->nn<=pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ int iOff; /* Offset of first term on leaf */
+ int iRowidOff; /* Offset of first rowid on leaf */
+ int nTerm; /* Size of term on leaf in bytes */
+ int res; /* Comparison of term and split-key */
+
+ iOff = fts5LeafFirstTermOff(pLeaf);
+ iRowidOff = fts5LeafFirstRowidOff(pLeaf);
+ if( iRowidOff>=iOff ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
+ res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
+ if( res==0 ) res = nTerm - nIdxTerm;
+ if( res<0 ) p->rc = FTS5_CORRUPT;
+ }
+
+ fts5IntegrityCheckPgidx(p, pLeaf);
+ }
+ fts5DataRelease(pLeaf);
+ if( p->rc ) break;
+
+ /* Now check that the iter.nEmpty leaves following the current leaf
+ ** (a) exist and (b) contain no terms. */
+ fts5IndexIntegrityCheckEmpty(
+ p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
+ );
+ if( p->rc ) break;
+
+ /* If there is a doclist-index, check that it looks right. */
+ if( bIdxDlidx ){
+ Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
+ int iPrevLeaf = iIdxLeaf;
+ int iSegid = pSeg->iSegid;
+ int iPg = 0;
+ i64 iKey;
+
+ for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
+ fts5DlidxIterEof(p, pDlidx)==0;
+ fts5DlidxIterNext(p, pDlidx)
+ ){
+
+ /* Check any rowid-less pages that occur before the current leaf. */
+ for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
+ iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
+ pLeaf = fts5DataRead(p, iKey);
+ if( pLeaf ){
+ if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
+ fts5DataRelease(pLeaf);
+ }
+ }
+ iPrevLeaf = fts5DlidxIterPgno(pDlidx);
+
+ /* Check that the leaf page indicated by the iterator really does
+ ** contain the rowid suggested by the same. */
+ iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
+ pLeaf = fts5DataRead(p, iKey);
+ if( pLeaf ){
+ i64 iRowid;
+ int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
+ ASSERT_SZLEAF_OK(pLeaf);
+ if( iRowidOff>=pLeaf->szLeaf ){
+ p->rc = FTS5_CORRUPT;
+ }else{
+ fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
+ if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
+ }
+ fts5DataRelease(pLeaf);
+ }
+ }
+
+ iDlidxPrevLeaf = iPg;
+ fts5DlidxIterFree(pDlidx);
+ fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
+ }else{
+ iDlidxPrevLeaf = pSeg->pgnoLast;
+ /* TODO: Check there is no doclist index */
+ }
+
+ iIdxPrevLeaf = iIdxLeaf;
+ }
+
+ rc2 = sqlite3_finalize(pStmt);
+ if( p->rc==SQLITE_OK ) p->rc = rc2;
+
+ /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
+#if 0
+ if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
+ p->rc = FTS5_CORRUPT;
+ }
+#endif
+}
+
+
+/*
+** Run internal checks to ensure that the FTS index (a) is internally
+** consistent and (b) contains entries for which the XOR of the checksums
+** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
+**
+** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
+** checksum does not match. Return SQLITE_OK if all checks pass without
+** error, or some other SQLite error code if another error (e.g. OOM)
+** occurs.
+*/
+static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
+ int eDetail = p->pConfig->eDetail;
+ u64 cksum2 = 0; /* Checksum based on contents of indexes */
+ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
+ Fts5Iter *pIter; /* Used to iterate through entire index */
+ Fts5Structure *pStruct; /* Index structure */
+
+#ifdef SQLITE_DEBUG
+ /* Used by extra internal tests only run if NDEBUG is not defined */
+ u64 cksum3 = 0; /* Checksum based on contents of indexes */
+ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
+#endif
+ const int flags = FTS5INDEX_QUERY_NOOUTPUT;
+
+ /* Load the FTS index structure */
+ pStruct = fts5StructureRead(p);
+
+ /* Check that the internal nodes of each segment match the leaves */
+ if( pStruct ){
+ int iLvl, iSeg;
+ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
+ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
+ Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
+ fts5IndexIntegrityCheckSegment(p, pSeg);
+ }
+ }
+ }
+
+ /* The cksum argument passed to this function is a checksum calculated
+ ** based on all expected entries in the FTS index (including prefix index
+ ** entries). This block checks that a checksum calculated based on the
+ ** actual contents of FTS index is identical.
+ **
+ ** Two versions of the same checksum are calculated. The first (stack
+ ** variable cksum2) based on entries extracted from the full-text index
+ ** while doing a linear scan of each individual index in turn.
+ **
+ ** As each term visited by the linear scans, a separate query for the
+ ** same term is performed. cksum3 is calculated based on the entries
+ ** extracted by these queries.
+ */
+ for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
+ fts5MultiIterEof(p, pIter)==0;
+ fts5MultiIterNext(p, pIter, 0, 0)
+ ){
+ int n; /* Size of term in bytes */
+ i64 iPos = 0; /* Position read from poslist */
+ int iOff = 0; /* Offset within poslist */
+ i64 iRowid = fts5MultiIterRowid(pIter);
+ char *z = (char*)fts5MultiIterTerm(pIter, &n);
+
+ /* If this is a new term, query for it. Update cksum3 with the results. */
+ fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
+
+ if( eDetail==FTS5_DETAIL_NONE ){
+ if( 0==fts5MultiIterIsEmpty(p, pIter) ){
+ cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
+ }
+ }else{
+ poslist.n = 0;
+ fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
+ while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
+ int iCol = FTS5_POS2COLUMN(iPos);
+ int iTokOff = FTS5_POS2OFFSET(iPos);
+ cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
+ }
+ }
+ }
+ fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
+
+ fts5MultiIterFree(pIter);
+ if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
+
+ fts5StructureRelease(pStruct);
+#ifdef SQLITE_DEBUG
+ fts5BufferFree(&term);
+#endif
+ fts5BufferFree(&poslist);
+ return fts5IndexReturn(p);
+}
+
+/*************************************************************************
+**************************************************************************
+** Below this point is the implementation of the fts5_decode() scalar
+** function only.
+*/
+
+/*
+** Decode a segment-data rowid from the %_data table. This function is
+** the opposite of macro FTS5_SEGMENT_ROWID().
+*/
+static void fts5DecodeRowid(
+ i64 iRowid, /* Rowid from %_data table */
+ int *piSegid, /* OUT: Segment id */
+ int *pbDlidx, /* OUT: Dlidx flag */
+ int *piHeight, /* OUT: Height */
+ int *piPgno /* OUT: Page number */
+){
+ *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
+ iRowid >>= FTS5_DATA_PAGE_B;
+
+ *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
+ iRowid >>= FTS5_DATA_HEIGHT_B;
+
+ *pbDlidx = (int)(iRowid & 0x0001);
+ iRowid >>= FTS5_DATA_DLI_B;
+
+ *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
+}
+
+static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
+ int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
+ fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
+
+ if( iSegid==0 ){
+ if( iKey==FTS5_AVERAGES_ROWID ){
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
+ }else{
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
+ }
+ }
+ else{
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
+ bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
+ );
+ }
+}
+
+static void fts5DebugStructure(
+ int *pRc, /* IN/OUT: error code */
+ Fts5Buffer *pBuf,
+ Fts5Structure *p
+){
+ int iLvl, iSeg; /* Iterate through levels, segments */
+
+ for(iLvl=0; iLvl<p->nLevel; iLvl++){
+ Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
+ " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
+ );
+ for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
+ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
+ pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
+ );
+ }
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
+ }
+}
+
+/*
+** This is part of the fts5_decode() debugging aid.
+**
+** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
+** function appends a human-readable representation of the same object
+** to the buffer passed as the second argument.
+*/
+static void fts5DecodeStructure(
+ int *pRc, /* IN/OUT: error code */
+ Fts5Buffer *pBuf,
+ const u8 *pBlob, int nBlob
+){
+ int rc; /* Return code */
+ Fts5Structure *p = 0; /* Decoded structure object */
+
+ rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
+ if( rc!=SQLITE_OK ){
+ *pRc = rc;
+ return;
+ }
+
+ fts5DebugStructure(pRc, pBuf, p);
+ fts5StructureRelease(p);
+}
+
+/*
+** This is part of the fts5_decode() debugging aid.
+**
+** Arguments pBlob/nBlob contain an "averages" record. This function
+** appends a human-readable representation of record to the buffer passed
+** as the second argument.
+*/
+static void fts5DecodeAverages(
+ int *pRc, /* IN/OUT: error code */
+ Fts5Buffer *pBuf,
+ const u8 *pBlob, int nBlob
+){
+ int i = 0;
+ const char *zSpace = "";
+
+ while( i<nBlob ){
+ u64 iVal;
+ i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
+ zSpace = " ";
+ }
+}
+
+/*
+** Buffer (a/n) is assumed to contain a list of serialized varints. Read
+** each varint and append its string representation to buffer pBuf. Return
+** after either the input buffer is exhausted or a 0 value is read.
+**
+** The return value is the number of bytes read from the input buffer.
+*/
+static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
+ int iOff = 0;
+ while( iOff<n ){
+ int iVal;
+ iOff += fts5GetVarint32(&a[iOff], iVal);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
+ }
+ return iOff;
+}
+
+/*
+** The start of buffer (a/n) contains the start of a doclist. The doclist
+** may or may not finish within the buffer. This function appends a text
+** representation of the part of the doclist that is present to buffer
+** pBuf.
+**
+** The return value is the number of bytes read from the input buffer.
+*/
+static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
+ i64 iDocid = 0;
+ int iOff = 0;
+
+ if( n>0 ){
+ iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
+ }
+ while( iOff<n ){
+ int nPos;
+ int bDel;
+ iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
+ iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
+ if( iOff<n ){
+ i64 iDelta;
+ iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
+ iDocid += iDelta;
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
+ }
+ }
+
+ return iOff;
+}
+
+/*
+** This function is part of the fts5_decode() debugging function. It is
+** only ever used with detail=none tables.
+**
+** Buffer (pData/nData) contains a doclist in the format used by detail=none
+** tables. This function appends a human-readable version of that list to
+** buffer pBuf.
+**
+** If *pRc is other than SQLITE_OK when this function is called, it is a
+** no-op. If an OOM or other error occurs within this function, *pRc is
+** set to an SQLite error code before returning. The final state of buffer
+** pBuf is undefined in this case.
+*/
+static void fts5DecodeRowidList(
+ int *pRc, /* IN/OUT: Error code */
+ Fts5Buffer *pBuf, /* Buffer to append text to */
+ const u8 *pData, int nData /* Data to decode list-of-rowids from */
+){
+ int i = 0;
+ i64 iRowid = 0;
+
+ while( i<nData ){
+ const char *zApp = "";
+ u64 iVal;
+ i += sqlite3Fts5GetVarint(&pData[i], &iVal);
+ iRowid += iVal;
+
+ if( i<nData && pData[i]==0x00 ){
+ i++;
+ if( i<nData && pData[i]==0x00 ){
+ i++;
+ zApp = "+";
+ }else{
+ zApp = "*";
+ }
+ }
+
+ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
+ }
+}
+
+/*
+** The implementation of user-defined scalar function fts5_decode().
+*/
+static void fts5DecodeFunction(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args (always 2) */
+ sqlite3_value **apVal /* Function arguments */
+){
+ i64 iRowid; /* Rowid for record being decoded */
+ int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
+ const u8 *aBlob; int n; /* Record to decode */
+ u8 *a = 0;
+ Fts5Buffer s; /* Build up text to return here */
+ int rc = SQLITE_OK; /* Return code */
+ int nSpace = 0;
+ int eDetailNone = (sqlite3_user_data(pCtx)!=0);
+
+ assert( nArg==2 );
+ UNUSED_PARAM(nArg);
+ memset(&s, 0, sizeof(Fts5Buffer));
+ iRowid = sqlite3_value_int64(apVal[0]);
+
+ /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
+ ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
+ ** buffer overreads even if the record is corrupt. */
+ n = sqlite3_value_bytes(apVal[1]);
+ aBlob = sqlite3_value_blob(apVal[1]);
+ nSpace = n + FTS5_DATA_ZERO_PADDING;
+ a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
+ if( a==0 ) goto decode_out;
+ memcpy(a, aBlob, n);
+
+
+ fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
+
+ fts5DebugRowid(&rc, &s, iRowid);
+ if( bDlidx ){
+ Fts5Data dlidx;
+ Fts5DlidxLvl lvl;
+
+ dlidx.p = a;
+ dlidx.nn = n;
+
+ memset(&lvl, 0, sizeof(Fts5DlidxLvl));
+ lvl.pData = &dlidx;
+ lvl.iLeafPgno = iPgno;
+
+ for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
+ sqlite3Fts5BufferAppendPrintf(&rc, &s,
+ " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
+ );
+ }
+ }else if( iSegid==0 ){
+ if( iRowid==FTS5_AVERAGES_ROWID ){
+ fts5DecodeAverages(&rc, &s, a, n);
+ }else{
+ fts5DecodeStructure(&rc, &s, a, n);
+ }
+ }else if( eDetailNone ){
+ Fts5Buffer term; /* Current term read from page */
+ int szLeaf;
+ int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
+ int iTermOff;
+ int nKeep = 0;
+ int iOff;
+
+ memset(&term, 0, sizeof(Fts5Buffer));
+
+ /* Decode any entries that occur before the first term. */
+ if( szLeaf<n ){
+ iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
+ }else{
+ iTermOff = szLeaf;
+ }
+ fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
+
+ iOff = iTermOff;
+ while( iOff<szLeaf ){
+ int nAppend;
+
+ /* Read the term data for the next term*/
+ iOff += fts5GetVarint32(&a[iOff], nAppend);
+ term.n = nKeep;
+ fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
+ sqlite3Fts5BufferAppendPrintf(
+ &rc, &s, " term=%.*s", term.n, (const char*)term.p
+ );
+ iOff += nAppend;
+
+ /* Figure out where the doclist for this term ends */
+ if( iPgidxOff<n ){
+ int nIncr;
+ iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
+ iTermOff += nIncr;
+ }else{
+ iTermOff = szLeaf;
+ }
+
+ fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
+ iOff = iTermOff;
+ if( iOff<szLeaf ){
+ iOff += fts5GetVarint32(&a[iOff], nKeep);
+ }
+ }
+
+ fts5BufferFree(&term);
+ }else{
+ Fts5Buffer term; /* Current term read from page */
+ int szLeaf; /* Offset of pgidx in a[] */
+ int iPgidxOff;
+ int iPgidxPrev = 0; /* Previous value read from pgidx */
+ int iTermOff = 0;
+ int iRowidOff = 0;
+ int iOff;
+ int nDoclist;
+
+ memset(&term, 0, sizeof(Fts5Buffer));
+
+ if( n<4 ){
+ sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
+ goto decode_out;
+ }else{
+ iRowidOff = fts5GetU16(&a[0]);
+ iPgidxOff = szLeaf = fts5GetU16(&a[2]);
+ if( iPgidxOff<n ){
+ fts5GetVarint32(&a[iPgidxOff], iTermOff);
+ }
+ }
+
+ /* Decode the position list tail at the start of the page */
+ if( iRowidOff!=0 ){
+ iOff = iRowidOff;
+ }else if( iTermOff!=0 ){
+ iOff = iTermOff;
+ }else{
+ iOff = szLeaf;
+ }
+ fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
+
+ /* Decode any more doclist data that appears on the page before the
+ ** first term. */
+ nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
+ fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
+
+ while( iPgidxOff<n ){
+ int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
+ int nByte; /* Bytes of data */
+ int iEnd;
+
+ iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
+ iPgidxPrev += nByte;
+ iOff = iPgidxPrev;
+
+ if( iPgidxOff<n ){
+ fts5GetVarint32(&a[iPgidxOff], nByte);
+ iEnd = iPgidxPrev + nByte;
+ }else{
+ iEnd = szLeaf;
+ }
+
+ if( bFirst==0 ){
+ iOff += fts5GetVarint32(&a[iOff], nByte);
+ term.n = nByte;
+ }
+ iOff += fts5GetVarint32(&a[iOff], nByte);
+ fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
+ iOff += nByte;
+
+ sqlite3Fts5BufferAppendPrintf(
+ &rc, &s, " term=%.*s", term.n, (const char*)term.p
+ );
+ iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
+ }
+
+ fts5BufferFree(&term);
+ }
+
+ decode_out:
+ sqlite3_free(a);
+ if( rc==SQLITE_OK ){
+ sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
+ }else{
+ sqlite3_result_error_code(pCtx, rc);
+ }
+ fts5BufferFree(&s);
+}
+
+/*
+** The implementation of user-defined scalar function fts5_rowid().
+*/
+static void fts5RowidFunction(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args (always 2) */
+ sqlite3_value **apVal /* Function arguments */
+){
+ const char *zArg;
+ if( nArg==0 ){
+ sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
+ }else{
+ zArg = (const char*)sqlite3_value_text(apVal[0]);
+ if( 0==sqlite3_stricmp(zArg, "segment") ){
+ i64 iRowid;
+ int segid, pgno;
+ if( nArg!=3 ){
+ sqlite3_result_error(pCtx,
+ "should be: fts5_rowid('segment', segid, pgno))", -1
+ );
+ }else{
+ segid = sqlite3_value_int(apVal[1]);
+ pgno = sqlite3_value_int(apVal[2]);
+ iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
+ sqlite3_result_int64(pCtx, iRowid);
+ }
+ }else{
+ sqlite3_result_error(pCtx,
+ "first arg to fts5_rowid() must be 'segment'" , -1
+ );
+ }
+ }
+}
+
+/*
+** This is called as part of registering the FTS5 module with database
+** connection db. It registers several user-defined scalar functions useful
+** with FTS5.
+**
+** If successful, SQLITE_OK is returned. If an error occurs, some other
+** SQLite error code is returned instead.
+*/
+static int sqlite3Fts5IndexInit(sqlite3 *db){
+ int rc = sqlite3_create_function(
+ db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
+ );
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(
+ db, "fts5_decode_none", 2,
+ SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
+ );
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(
+ db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
+ );
+ }
+ return rc;
+}
+
+#line 1 "fts5_main.c"
+/*
+** 2014 Jun 09
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This is an SQLite module implementing full-text search.
+*/
+
+
+/* #include "fts5Int.h" */
+
+/*
+** This variable is set to false when running tests for which the on disk
+** structures should not be corrupt. Otherwise, true. If it is false, extra
+** assert() conditions in the fts5 code are activated - conditions that are
+** only true if it is guaranteed that the fts5 database is not corrupt.
+*/
+int sqlite3_fts5_may_be_corrupt = 1;
+
+
+typedef struct Fts5Auxdata Fts5Auxdata;
+typedef struct Fts5Auxiliary Fts5Auxiliary;
+typedef struct Fts5Cursor Fts5Cursor;
+typedef struct Fts5Sorter Fts5Sorter;
+typedef struct Fts5Table Fts5Table;
+typedef struct Fts5TokenizerModule Fts5TokenizerModule;
+
+/*
+** NOTES ON TRANSACTIONS:
+**
+** SQLite invokes the following virtual table methods as transactions are
+** opened and closed by the user:
+**
+** xBegin(): Start of a new transaction.
+** xSync(): Initial part of two-phase commit.
+** xCommit(): Final part of two-phase commit.
+** xRollback(): Rollback the transaction.
+**
+** Anything that is required as part of a commit that may fail is performed
+** in the xSync() callback. Current versions of SQLite ignore any errors
+** returned by xCommit().
+**
+** And as sub-transactions are opened/closed:
+**
+** xSavepoint(int S): Open savepoint S.
+** xRelease(int S): Commit and close savepoint S.
+** xRollbackTo(int S): Rollback to start of savepoint S.
+**
+** During a write-transaction the fts5_index.c module may cache some data
+** in-memory. It is flushed to disk whenever xSync(), xRelease() or
+** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo()
+** is called.
+**
+** Additionally, if SQLITE_DEBUG is defined, an instance of the following
+** structure is used to record the current transaction state. This information
+** is not required, but it is used in the assert() statements executed by
+** function fts5CheckTransactionState() (see below).
+*/
+struct Fts5TransactionState {
+ int eState; /* 0==closed, 1==open, 2==synced */
+ int iSavepoint; /* Number of open savepoints (0 -> none) */
+};
+
+/*
+** A single object of this type is allocated when the FTS5 module is
+** registered with a database handle. It is used to store pointers to
+** all registered FTS5 extensions - tokenizers and auxiliary functions.
+*/
+struct Fts5Global {
+ fts5_api api; /* User visible part of object (see fts5.h) */
+ sqlite3 *db; /* Associated database connection */
+ i64 iNextId; /* Used to allocate unique cursor ids */
+ Fts5Auxiliary *pAux; /* First in list of all aux. functions */
+ Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */
+ Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */
+ Fts5Cursor *pCsr; /* First in list of all open cursors */
+};
+
+/*
+** Each auxiliary function registered with the FTS5 module is represented
+** by an object of the following type. All such objects are stored as part
+** of the Fts5Global.pAux list.
+*/
+struct Fts5Auxiliary {
+ Fts5Global *pGlobal; /* Global context for this function */
+ char *zFunc; /* Function name (nul-terminated) */
+ void *pUserData; /* User-data pointer */
+ fts5_extension_function xFunc; /* Callback function */
+ void (*xDestroy)(void*); /* Destructor function */
+ Fts5Auxiliary *pNext; /* Next registered auxiliary function */
+};
+
+/*
+** Each tokenizer module registered with the FTS5 module is represented
+** by an object of the following type. All such objects are stored as part
+** of the Fts5Global.pTok list.
+*/
+struct Fts5TokenizerModule {
+ char *zName; /* Name of tokenizer */
+ void *pUserData; /* User pointer passed to xCreate() */
+ fts5_tokenizer x; /* Tokenizer functions */
+ void (*xDestroy)(void*); /* Destructor function */
+ Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
+};
+
+/*
+** Virtual-table object.
+*/
+struct Fts5Table {
+ sqlite3_vtab base; /* Base class used by SQLite core */
+ Fts5Config *pConfig; /* Virtual table configuration */
+ Fts5Index *pIndex; /* Full-text index */
+ Fts5Storage *pStorage; /* Document store */
+ Fts5Global *pGlobal; /* Global (connection wide) data */
+ Fts5Cursor *pSortCsr; /* Sort data from this cursor */
+#ifdef SQLITE_DEBUG
+ struct Fts5TransactionState ts;
+#endif
+};
+
+struct Fts5MatchPhrase {
+ Fts5Buffer *pPoslist; /* Pointer to current poslist */
+ int nTerm; /* Size of phrase in terms */
+};
+
+/*
+** pStmt:
+** SELECT rowid, <fts> FROM <fts> ORDER BY +rank;
+**
+** aIdx[]:
+** There is one entry in the aIdx[] array for each phrase in the query,
+** the value of which is the offset within aPoslist[] following the last
+** byte of the position list for the corresponding phrase.
+*/
+struct Fts5Sorter {
+ sqlite3_stmt *pStmt;
+ i64 iRowid; /* Current rowid */
+ const u8 *aPoslist; /* Position lists for current row */
+ int nIdx; /* Number of entries in aIdx[] */
+ int aIdx[1]; /* Offsets into aPoslist for current row */
+};
+
+
+/*
+** Virtual-table cursor object.
+**
+** iSpecial:
+** If this is a 'special' query (refer to function fts5SpecialMatch()),
+** then this variable contains the result of the query.
+**
+** iFirstRowid, iLastRowid:
+** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the
+** cursor iterates in ascending order of rowids, iFirstRowid is the lower
+** limit of rowids to return, and iLastRowid the upper. In other words, the
+** WHERE clause in the user's query might have been:
+**
+** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid
+**
+** If the cursor iterates in descending order of rowid, iFirstRowid
+** is the upper limit (i.e. the "first" rowid visited) and iLastRowid
+** the lower.
+*/
+struct Fts5Cursor {
+ sqlite3_vtab_cursor base; /* Base class used by SQLite core */
+ Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */
+ int *aColumnSize; /* Values for xColumnSize() */
+ i64 iCsrId; /* Cursor id */
+
+ /* Zero from this point onwards on cursor reset */
+ int ePlan; /* FTS5_PLAN_XXX value */
+ int bDesc; /* True for "ORDER BY rowid DESC" queries */
+ i64 iFirstRowid; /* Return no rowids earlier than this */
+ i64 iLastRowid; /* Return no rowids later than this */
+ sqlite3_stmt *pStmt; /* Statement used to read %_content */
+ Fts5Expr *pExpr; /* Expression for MATCH queries */
+ Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */
+ int csrflags; /* Mask of cursor flags (see below) */
+ i64 iSpecial; /* Result of special query */
+
+ /* "rank" function. Populated on demand from vtab.xColumn(). */
+ char *zRank; /* Custom rank function */
+ char *zRankArgs; /* Custom rank function args */
+ Fts5Auxiliary *pRank; /* Rank callback (or NULL) */
+ int nRankArg; /* Number of trailing arguments for rank() */
+ sqlite3_value **apRankArg; /* Array of trailing arguments */
+ sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */
+
+ /* Auxiliary data storage */
+ Fts5Auxiliary *pAux; /* Currently executing extension function */
+ Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
+
+ /* Cache used by auxiliary functions xInst() and xInstCount() */
+ Fts5PoslistReader *aInstIter; /* One for each phrase */
+ int nInstAlloc; /* Size of aInst[] array (entries / 3) */
+ int nInstCount; /* Number of phrase instances */
+ int *aInst; /* 3 integers per phrase instance */
+};
+
+/*
+** Bits that make up the "idxNum" parameter passed indirectly by
+** xBestIndex() to xFilter().
+*/
+#define FTS5_BI_MATCH 0x0001 /* <tbl> MATCH ? */
+#define FTS5_BI_RANK 0x0002 /* rank MATCH ? */
+#define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */
+#define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */
+#define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */
+
+#define FTS5_BI_ORDER_RANK 0x0020
+#define FTS5_BI_ORDER_ROWID 0x0040
+#define FTS5_BI_ORDER_DESC 0x0080
+
+/*
+** Values for Fts5Cursor.csrflags
+*/
+#define FTS5CSR_EOF 0x01
+#define FTS5CSR_REQUIRE_CONTENT 0x02
+#define FTS5CSR_REQUIRE_DOCSIZE 0x04
+#define FTS5CSR_REQUIRE_INST 0x08
+#define FTS5CSR_FREE_ZRANK 0x10
+#define FTS5CSR_REQUIRE_RESEEK 0x20
+#define FTS5CSR_REQUIRE_POSLIST 0x40
+
+#define BitFlagAllTest(x,y) (((x) & (y))==(y))
+#define BitFlagTest(x,y) (((x) & (y))!=0)
+
+
+/*
+** Macros to Set(), Clear() and Test() cursor flags.
+*/
+#define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag))
+#define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag))
+#define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag))
+
+struct Fts5Auxdata {
+ Fts5Auxiliary *pAux; /* Extension to which this belongs */
+ void *pPtr; /* Pointer value */
+ void(*xDelete)(void*); /* Destructor */
+ Fts5Auxdata *pNext; /* Next object in linked list */
+};
+
+#ifdef SQLITE_DEBUG
+#define FTS5_BEGIN 1
+#define FTS5_SYNC 2
+#define FTS5_COMMIT 3
+#define FTS5_ROLLBACK 4
+#define FTS5_SAVEPOINT 5
+#define FTS5_RELEASE 6
+#define FTS5_ROLLBACKTO 7
+static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){
+ switch( op ){
+ case FTS5_BEGIN:
+ assert( p->ts.eState==0 );
+ p->ts.eState = 1;
+ p->ts.iSavepoint = -1;
+ break;
+
+ case FTS5_SYNC:
+ assert( p->ts.eState==1 );
+ p->ts.eState = 2;
+ break;
+
+ case FTS5_COMMIT:
+ assert( p->ts.eState==2 );
+ p->ts.eState = 0;
+ break;
+
+ case FTS5_ROLLBACK:
+ assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 );
+ p->ts.eState = 0;
+ break;
+
+ case FTS5_SAVEPOINT:
+ assert( p->ts.eState==1 );
+ assert( iSavepoint>=0 );
+ assert( iSavepoint>p->ts.iSavepoint );
+ p->ts.iSavepoint = iSavepoint;
+ break;
+
+ case FTS5_RELEASE:
+ assert( p->ts.eState==1 );
+ assert( iSavepoint>=0 );
+ assert( iSavepoint<=p->ts.iSavepoint );
+ p->ts.iSavepoint = iSavepoint-1;
+ break;
+
+ case FTS5_ROLLBACKTO:
+ assert( p->ts.eState==1 );
+ assert( iSavepoint>=0 );
+ assert( iSavepoint<=p->ts.iSavepoint );
+ p->ts.iSavepoint = iSavepoint;
+ break;
+ }
+}
+#else
+# define fts5CheckTransactionState(x,y,z)
+#endif
+
+/*
+** Return true if pTab is a contentless table.
+*/
+static int fts5IsContentless(Fts5Table *pTab){
+ return pTab->pConfig->eContent==FTS5_CONTENT_NONE;
+}
+
+/*
+** Delete a virtual table handle allocated by fts5InitVtab().
+*/
+static void fts5FreeVtab(Fts5Table *pTab){
+ if( pTab ){
+ sqlite3Fts5IndexClose(pTab->pIndex);
+ sqlite3Fts5StorageClose(pTab->pStorage);
+ sqlite3Fts5ConfigFree(pTab->pConfig);
+ sqlite3_free(pTab);
+ }
+}
+
+/*
+** The xDisconnect() virtual table method.
+*/
+static int fts5DisconnectMethod(sqlite3_vtab *pVtab){
+ fts5FreeVtab((Fts5Table*)pVtab);
+ return SQLITE_OK;
+}
+
+/*
+** The xDestroy() virtual table method.
+*/
+static int fts5DestroyMethod(sqlite3_vtab *pVtab){
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ int rc = sqlite3Fts5DropAll(pTab->pConfig);
+ if( rc==SQLITE_OK ){
+ fts5FreeVtab((Fts5Table*)pVtab);
+ }
+ return rc;
+}
+
+/*
+** This function is the implementation of both the xConnect and xCreate
+** methods of the FTS3 virtual table.
+**
+** The argv[] array contains the following:
+**
+** argv[0] -> module name ("fts5")
+** argv[1] -> database name
+** argv[2] -> table name
+** argv[...] -> "column name" and other module argument fields.
+*/
+static int fts5InitVtab(
+ int bCreate, /* True for xCreate, false for xConnect */
+ sqlite3 *db, /* The SQLite database connection */
+ void *pAux, /* Hash table containing tokenizers */
+ int argc, /* Number of elements in argv array */
+ const char * const *argv, /* xCreate/xConnect argument array */
+ sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
+ char **pzErr /* Write any error message here */
+){
+ Fts5Global *pGlobal = (Fts5Global*)pAux;
+ const char **azConfig = (const char**)argv;
+ int rc = SQLITE_OK; /* Return code */
+ Fts5Config *pConfig = 0; /* Results of parsing argc/argv */
+ Fts5Table *pTab = 0; /* New virtual table object */
+
+ /* Allocate the new vtab object and parse the configuration */
+ pTab = (Fts5Table*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Table));
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
+ assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
+ }
+ if( rc==SQLITE_OK ){
+ pTab->pConfig = pConfig;
+ pTab->pGlobal = pGlobal;
+ }
+
+ /* Open the index sub-system */
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->pIndex, pzErr);
+ }
+
+ /* Open the storage sub-system */
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageOpen(
+ pConfig, pTab->pIndex, bCreate, &pTab->pStorage, pzErr
+ );
+ }
+
+ /* Call sqlite3_declare_vtab() */
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5ConfigDeclareVtab(pConfig);
+ }
+
+ /* Load the initial configuration */
+ if( rc==SQLITE_OK ){
+ assert( pConfig->pzErrmsg==0 );
+ pConfig->pzErrmsg = pzErr;
+ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
+ sqlite3Fts5IndexRollback(pTab->pIndex);
+ pConfig->pzErrmsg = 0;
+ }
+
+ if( rc!=SQLITE_OK ){
+ fts5FreeVtab(pTab);
+ pTab = 0;
+ }else if( bCreate ){
+ fts5CheckTransactionState(pTab, FTS5_BEGIN, 0);
+ }
+ *ppVTab = (sqlite3_vtab*)pTab;
+ return rc;
+}
+
+/*
+** The xConnect() and xCreate() methods for the virtual table. All the
+** work is done in function fts5InitVtab().
+*/
+static int fts5ConnectMethod(
+ sqlite3 *db, /* Database connection */
+ void *pAux, /* Pointer to tokenizer hash table */
+ int argc, /* Number of elements in argv array */
+ const char * const *argv, /* xCreate/xConnect argument array */
+ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
+ char **pzErr /* OUT: sqlite3_malloc'd error message */
+){
+ return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
+}
+static int fts5CreateMethod(
+ sqlite3 *db, /* Database connection */
+ void *pAux, /* Pointer to tokenizer hash table */
+ int argc, /* Number of elements in argv array */
+ const char * const *argv, /* xCreate/xConnect argument array */
+ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
+ char **pzErr /* OUT: sqlite3_malloc'd error message */
+){
+ return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
+}
+
+/*
+** The different query plans.
+*/
+#define FTS5_PLAN_MATCH 1 /* (<tbl> MATCH ?) */
+#define FTS5_PLAN_SOURCE 2 /* A source cursor for SORTED_MATCH */
+#define FTS5_PLAN_SPECIAL 3 /* An internal query */
+#define FTS5_PLAN_SORTED_MATCH 4 /* (<tbl> MATCH ? ORDER BY rank) */
+#define FTS5_PLAN_SCAN 5 /* No usable constraint */
+#define FTS5_PLAN_ROWID 6 /* (rowid = ?) */
+
+/*
+** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
+** extension is currently being used by a version of SQLite too old to
+** support index-info flags. In that case this function is a no-op.
+*/
+static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){
+#if SQLITE_VERSION_NUMBER>=3008012
+#ifndef SQLITE_CORE
+ if( sqlite3_libversion_number()>=3008012 )
+#endif
+ {
+ pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE;
+ }
+#endif
+}
+
+/*
+** Implementation of the xBestIndex method for FTS5 tables. Within the
+** WHERE constraint, it searches for the following:
+**
+** 1. A MATCH constraint against the special column.
+** 2. A MATCH constraint against the "rank" column.
+** 3. An == constraint against the rowid column.
+** 4. A < or <= constraint against the rowid column.
+** 5. A > or >= constraint against the rowid column.
+**
+** Within the ORDER BY, either:
+**
+** 5. ORDER BY rank [ASC|DESC]
+** 6. ORDER BY rowid [ASC|DESC]
+**
+** Costs are assigned as follows:
+**
+** a) If an unusable MATCH operator is present in the WHERE clause, the
+** cost is unconditionally set to 1e50 (a really big number).
+**
+** a) If a MATCH operator is present, the cost depends on the other
+** constraints also present. As follows:
+**
+** * No other constraints: cost=1000.0
+** * One rowid range constraint: cost=750.0
+** * Both rowid range constraints: cost=500.0
+** * An == rowid constraint: cost=100.0
+**
+** b) Otherwise, if there is no MATCH:
+**
+** * No other constraints: cost=1000000.0
+** * One rowid range constraint: cost=750000.0
+** * Both rowid range constraints: cost=250000.0
+** * An == rowid constraint: cost=10.0
+**
+** Costs are not modified by the ORDER BY clause.
+*/
+static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
+ Fts5Table *pTab = (Fts5Table*)pVTab;
+ Fts5Config *pConfig = pTab->pConfig;
+ int idxFlags = 0; /* Parameter passed through to xFilter() */
+ int bHasMatch;
+ int iNext;
+ int i;
+
+ struct Constraint {
+ int op; /* Mask against sqlite3_index_constraint.op */
+ int fts5op; /* FTS5 mask for idxFlags */
+ int iCol; /* 0==rowid, 1==tbl, 2==rank */
+ int omit; /* True to omit this if found */
+ int iConsIndex; /* Index in pInfo->aConstraint[] */
+ } aConstraint[] = {
+ {SQLITE_INDEX_CONSTRAINT_MATCH|SQLITE_INDEX_CONSTRAINT_EQ,
+ FTS5_BI_MATCH, 1, 1, -1},
+ {SQLITE_INDEX_CONSTRAINT_MATCH|SQLITE_INDEX_CONSTRAINT_EQ,
+ FTS5_BI_RANK, 2, 1, -1},
+ {SQLITE_INDEX_CONSTRAINT_EQ, FTS5_BI_ROWID_EQ, 0, 0, -1},
+ {SQLITE_INDEX_CONSTRAINT_LT|SQLITE_INDEX_CONSTRAINT_LE,
+ FTS5_BI_ROWID_LE, 0, 0, -1},
+ {SQLITE_INDEX_CONSTRAINT_GT|SQLITE_INDEX_CONSTRAINT_GE,
+ FTS5_BI_ROWID_GE, 0, 0, -1},
+ };
+
+ int aColMap[3];
+ aColMap[0] = -1;
+ aColMap[1] = pConfig->nCol;
+ aColMap[2] = pConfig->nCol+1;
+
+ /* Set idxFlags flags for all WHERE clause terms that will be used. */
+ for(i=0; i<pInfo->nConstraint; i++){
+ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
+ int j;
+ for(j=0; j<ArraySize(aConstraint); j++){
+ struct Constraint *pC = &aConstraint[j];
+ if( p->iColumn==aColMap[pC->iCol] && p->op & pC->op ){
+ if( p->usable ){
+ pC->iConsIndex = i;
+ idxFlags |= pC->fts5op;
+ }else if( j==0 ){
+ /* As there exists an unusable MATCH constraint this is an
+ ** unusable plan. Set a prohibitively high cost. */
+ pInfo->estimatedCost = 1e50;
+ return SQLITE_OK;
+ }
+ }
+ }
+ }
+
+ /* Set idxFlags flags for the ORDER BY clause */
+ if( pInfo->nOrderBy==1 ){
+ int iSort = pInfo->aOrderBy[0].iColumn;
+ if( iSort==(pConfig->nCol+1) && BitFlagTest(idxFlags, FTS5_BI_MATCH) ){
+ idxFlags |= FTS5_BI_ORDER_RANK;
+ }else if( iSort==-1 ){
+ idxFlags |= FTS5_BI_ORDER_ROWID;
+ }
+ if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){
+ pInfo->orderByConsumed = 1;
+ if( pInfo->aOrderBy[0].desc ){
+ idxFlags |= FTS5_BI_ORDER_DESC;
+ }
+ }
+ }
+
+ /* Calculate the estimated cost based on the flags set in idxFlags. */
+ bHasMatch = BitFlagTest(idxFlags, FTS5_BI_MATCH);
+ if( BitFlagTest(idxFlags, FTS5_BI_ROWID_EQ) ){
+ pInfo->estimatedCost = bHasMatch ? 100.0 : 10.0;
+ if( bHasMatch==0 ) fts5SetUniqueFlag(pInfo);
+ }else if( BitFlagAllTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){
+ pInfo->estimatedCost = bHasMatch ? 500.0 : 250000.0;
+ }else if( BitFlagTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){
+ pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0;
+ }else{
+ pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0;
+ }
+
+ /* Assign argvIndex values to each constraint in use. */
+ iNext = 1;
+ for(i=0; i<ArraySize(aConstraint); i++){
+ struct Constraint *pC = &aConstraint[i];
+ if( pC->iConsIndex>=0 ){
+ pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++;
+ pInfo->aConstraintUsage[pC->iConsIndex].omit = (unsigned char)pC->omit;
+ }
+ }
+
+ pInfo->idxNum = idxFlags;
+ return SQLITE_OK;
+}
+
+/*
+** Implementation of xOpen method.
+*/
+static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
+ Fts5Table *pTab = (Fts5Table*)pVTab;
+ Fts5Config *pConfig = pTab->pConfig;
+ Fts5Cursor *pCsr; /* New cursor object */
+ int nByte; /* Bytes of space to allocate */
+ int rc = SQLITE_OK; /* Return code */
+
+ nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
+ pCsr = (Fts5Cursor*)sqlite3_malloc(nByte);
+ if( pCsr ){
+ Fts5Global *pGlobal = pTab->pGlobal;
+ memset(pCsr, 0, nByte);
+ pCsr->aColumnSize = (int*)&pCsr[1];
+ pCsr->pNext = pGlobal->pCsr;
+ pGlobal->pCsr = pCsr;
+ pCsr->iCsrId = ++pGlobal->iNextId;
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ *ppCsr = (sqlite3_vtab_cursor*)pCsr;
+ return rc;
+}
+
+static int fts5StmtType(Fts5Cursor *pCsr){
+ if( pCsr->ePlan==FTS5_PLAN_SCAN ){
+ return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC;
+ }
+ return FTS5_STMT_LOOKUP;
+}
+
+/*
+** This function is called after the cursor passed as the only argument
+** is moved to point at a different row. It clears all cached data
+** specific to the previous row stored by the cursor object.
+*/
+static void fts5CsrNewrow(Fts5Cursor *pCsr){
+ CsrFlagSet(pCsr,
+ FTS5CSR_REQUIRE_CONTENT
+ | FTS5CSR_REQUIRE_DOCSIZE
+ | FTS5CSR_REQUIRE_INST
+ | FTS5CSR_REQUIRE_POSLIST
+ );
+}
+
+static void fts5FreeCursorComponents(Fts5Cursor *pCsr){
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ Fts5Auxdata *pData;
+ Fts5Auxdata *pNext;
+
+ sqlite3_free(pCsr->aInstIter);
+ sqlite3_free(pCsr->aInst);
+ if( pCsr->pStmt ){
+ int eStmt = fts5StmtType(pCsr);
+ sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
+ }
+ if( pCsr->pSorter ){
+ Fts5Sorter *pSorter = pCsr->pSorter;
+ sqlite3_finalize(pSorter->pStmt);
+ sqlite3_free(pSorter);
+ }
+
+ if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){
+ sqlite3Fts5ExprFree(pCsr->pExpr);
+ }
+
+ for(pData=pCsr->pAuxdata; pData; pData=pNext){
+ pNext = pData->pNext;
+ if( pData->xDelete ) pData->xDelete(pData->pPtr);
+ sqlite3_free(pData);
+ }
+
+ sqlite3_finalize(pCsr->pRankArgStmt);
+ sqlite3_free(pCsr->apRankArg);
+
+ if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){
+ sqlite3_free(pCsr->zRank);
+ sqlite3_free(pCsr->zRankArgs);
+ }
+
+ memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr));
+}
+
+
+/*
+** Close the cursor. For additional information see the documentation
+** on the xClose method of the virtual table interface.
+*/
+static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
+ if( pCursor ){
+ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
+ Fts5Cursor **pp;
+
+ fts5FreeCursorComponents(pCsr);
+ /* Remove the cursor from the Fts5Global.pCsr list */
+ for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
+ *pp = pCsr->pNext;
+
+ sqlite3_free(pCsr);
+ }
+ return SQLITE_OK;
+}
+
+static int fts5SorterNext(Fts5Cursor *pCsr){
+ Fts5Sorter *pSorter = pCsr->pSorter;
+ int rc;
+
+ rc = sqlite3_step(pSorter->pStmt);
+ if( rc==SQLITE_DONE ){
+ rc = SQLITE_OK;
+ CsrFlagSet(pCsr, FTS5CSR_EOF);
+ }else if( rc==SQLITE_ROW ){
+ const u8 *a;
+ const u8 *aBlob;
+ int nBlob;
+ int i;
+ int iOff = 0;
+ rc = SQLITE_OK;
+
+ pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0);
+ nBlob = sqlite3_column_bytes(pSorter->pStmt, 1);
+ aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1);
+
+ /* nBlob==0 in detail=none mode. */
+ if( nBlob>0 ){
+ for(i=0; i<(pSorter->nIdx-1); i++){
+ int iVal;
+ a += fts5GetVarint32(a, iVal);
+ iOff += iVal;
+ pSorter->aIdx[i] = iOff;
+ }
+ pSorter->aIdx[i] = &aBlob[nBlob] - a;
+ pSorter->aPoslist = a;
+ }
+
+ fts5CsrNewrow(pCsr);
+ }
+
+ return rc;
+}
+
+
+/*
+** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors
+** open on table pTab.
+*/
+static void fts5TripCursors(Fts5Table *pTab){
+ Fts5Cursor *pCsr;
+ for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
+ if( pCsr->ePlan==FTS5_PLAN_MATCH
+ && pCsr->base.pVtab==(sqlite3_vtab*)pTab
+ ){
+ CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK);
+ }
+ }
+}
+
+/*
+** If the REQUIRE_RESEEK flag is set on the cursor passed as the first
+** argument, close and reopen all Fts5IndexIter iterators that the cursor
+** is using. Then attempt to move the cursor to a rowid equal to or laster
+** (in the cursors sort order - ASC or DESC) than the current rowid.
+**
+** If the new rowid is not equal to the old, set output parameter *pbSkip
+** to 1 before returning. Otherwise, leave it unchanged.
+**
+** Return SQLITE_OK if successful or if no reseek was required, or an
+** error code if an error occurred.
+*/
+static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){
+ int rc = SQLITE_OK;
+ assert( *pbSkip==0 );
+ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ int bDesc = pCsr->bDesc;
+ i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr);
+
+ rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, iRowid, bDesc);
+ if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){
+ *pbSkip = 1;
+ }
+
+ CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK);
+ fts5CsrNewrow(pCsr);
+ if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
+ CsrFlagSet(pCsr, FTS5CSR_EOF);
+ *pbSkip = 1;
+ }
+ }
+ return rc;
+}
+
+
+/*
+** Advance the cursor to the next row in the table that matches the
+** search criteria.
+**
+** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
+** even if we reach end-of-file. The fts5EofMethod() will be called
+** subsequently to determine whether or not an EOF was hit.
+*/
+static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
+ int rc;
+
+ assert( (pCsr->ePlan<3)==
+ (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)
+ );
+ assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) );
+
+ if( pCsr->ePlan<3 ){
+ int bSkip = 0;
+ if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
+ rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid);
+ CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr));
+ fts5CsrNewrow(pCsr);
+ }else{
+ switch( pCsr->ePlan ){
+ case FTS5_PLAN_SPECIAL: {
+ CsrFlagSet(pCsr, FTS5CSR_EOF);
+ rc = SQLITE_OK;
+ break;
+ }
+
+ case FTS5_PLAN_SORTED_MATCH: {
+ rc = fts5SorterNext(pCsr);
+ break;
+ }
+
+ default:
+ rc = sqlite3_step(pCsr->pStmt);
+ if( rc!=SQLITE_ROW ){
+ CsrFlagSet(pCsr, FTS5CSR_EOF);
+ rc = sqlite3_reset(pCsr->pStmt);
+ }else{
+ rc = SQLITE_OK;
+ }
+ break;
+ }
+ }
+
+ return rc;
+}
+
+
+static int fts5PrepareStatement(
+ sqlite3_stmt **ppStmt,
+ Fts5Config *pConfig,
+ const char *zFmt,
+ ...
+){
+ sqlite3_stmt *pRet = 0;
+ int rc;
+ char *zSql;
+ va_list ap;
+
+ va_start(ap, zFmt);
+ zSql = sqlite3_vmprintf(zFmt, ap);
+ if( zSql==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pRet, 0);
+ if( rc!=SQLITE_OK ){
+ *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db));
+ }
+ sqlite3_free(zSql);
+ }
+
+ va_end(ap);
+ *ppStmt = pRet;
+ return rc;
+}
+
+static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
+ Fts5Config *pConfig = pTab->pConfig;
+ Fts5Sorter *pSorter;
+ int nPhrase;
+ int nByte;
+ int rc;
+ const char *zRank = pCsr->zRank;
+ const char *zRankArgs = pCsr->zRankArgs;
+
+ nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
+ nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1);
+ pSorter = (Fts5Sorter*)sqlite3_malloc(nByte);
+ if( pSorter==0 ) return SQLITE_NOMEM;
+ memset(pSorter, 0, nByte);
+ pSorter->nIdx = nPhrase;
+
+ /* TODO: It would be better to have some system for reusing statement
+ ** handles here, rather than preparing a new one for each query. But that
+ ** is not possible as SQLite reference counts the virtual table objects.
+ ** And since the statement required here reads from this very virtual
+ ** table, saving it creates a circular reference.
+ **
+ ** If SQLite a built-in statement cache, this wouldn't be a problem. */
+ rc = fts5PrepareStatement(&pSorter->pStmt, pConfig,
+ "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s",
+ pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
+ (zRankArgs ? ", " : ""),
+ (zRankArgs ? zRankArgs : ""),
+ bDesc ? "DESC" : "ASC"
+ );
+
+ pCsr->pSorter = pSorter;
+ if( rc==SQLITE_OK ){
+ assert( pTab->pSortCsr==0 );
+ pTab->pSortCsr = pCsr;
+ rc = fts5SorterNext(pCsr);
+ pTab->pSortCsr = 0;
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3_finalize(pSorter->pStmt);
+ sqlite3_free(pSorter);
+ pCsr->pSorter = 0;
+ }
+
+ return rc;
+}
+
+static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
+ int rc;
+ Fts5Expr *pExpr = pCsr->pExpr;
+ rc = sqlite3Fts5ExprFirst(pExpr, pTab->pIndex, pCsr->iFirstRowid, bDesc);
+ if( sqlite3Fts5ExprEof(pExpr) ){
+ CsrFlagSet(pCsr, FTS5CSR_EOF);
+ }
+ fts5CsrNewrow(pCsr);
+ return rc;
+}
+
+/*
+** Process a "special" query. A special query is identified as one with a
+** MATCH expression that begins with a '*' character. The remainder of
+** the text passed to the MATCH operator are used as the special query
+** parameters.
+*/
+static int fts5SpecialMatch(
+ Fts5Table *pTab,
+ Fts5Cursor *pCsr,
+ const char *zQuery
+){
+ int rc = SQLITE_OK; /* Return code */
+ const char *z = zQuery; /* Special query text */
+ int n; /* Number of bytes in text at z */
+
+ while( z[0]==' ' ) z++;
+ for(n=0; z[n] && z[n]!=' '; n++);
+
+ assert( pTab->base.zErrMsg==0 );
+ pCsr->ePlan = FTS5_PLAN_SPECIAL;
+
+ if( 0==sqlite3_strnicmp("reads", z, n) ){
+ pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->pIndex);
+ }
+ else if( 0==sqlite3_strnicmp("id", z, n) ){
+ pCsr->iSpecial = pCsr->iCsrId;
+ }
+ else{
+ /* An unrecognized directive. Return an error message. */
+ pTab->base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z);
+ rc = SQLITE_ERROR;
+ }
+
+ return rc;
+}
+
+/*
+** Search for an auxiliary function named zName that can be used with table
+** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
+** structure. Otherwise, if no such function exists, return NULL.
+*/
+static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){
+ Fts5Auxiliary *pAux;
+
+ for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){
+ if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux;
+ }
+
+ /* No function of the specified name was found. Return 0. */
+ return 0;
+}
+
+
+static int fts5FindRankFunction(Fts5Cursor *pCsr){
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ Fts5Config *pConfig = pTab->pConfig;
+ int rc = SQLITE_OK;
+ Fts5Auxiliary *pAux = 0;
+ const char *zRank = pCsr->zRank;
+ const char *zRankArgs = pCsr->zRankArgs;
+
+ if( zRankArgs ){
+ char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs);
+ if( zSql ){
+ sqlite3_stmt *pStmt = 0;
+ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0);
+ sqlite3_free(zSql);
+ assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 );
+ if( rc==SQLITE_OK ){
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ int nByte;
+ pCsr->nRankArg = sqlite3_column_count(pStmt);
+ nByte = sizeof(sqlite3_value*)*pCsr->nRankArg;
+ pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte);
+ if( rc==SQLITE_OK ){
+ int i;
+ for(i=0; i<pCsr->nRankArg; i++){
+ pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i);
+ }
+ }
+ pCsr->pRankArgStmt = pStmt;
+ }else{
+ rc = sqlite3_finalize(pStmt);
+ assert( rc!=SQLITE_OK );
+ }
+ }
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ pAux = fts5FindAuxiliary(pTab, zRank);
+ if( pAux==0 ){
+ assert( pTab->base.zErrMsg==0 );
+ pTab->base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank);
+ rc = SQLITE_ERROR;
+ }
+ }
+
+ pCsr->pRank = pAux;
+ return rc;
+}
+
+
+static int fts5CursorParseRank(
+ Fts5Config *pConfig,
+ Fts5Cursor *pCsr,
+ sqlite3_value *pRank
+){
+ int rc = SQLITE_OK;
+ if( pRank ){
+ const char *z = (const char*)sqlite3_value_text(pRank);
+ char *zRank = 0;
+ char *zRankArgs = 0;
+
+ if( z==0 ){
+ if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR;
+ }else{
+ rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
+ }
+ if( rc==SQLITE_OK ){
+ pCsr->zRank = zRank;
+ pCsr->zRankArgs = zRankArgs;
+ CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK);
+ }else if( rc==SQLITE_ERROR ){
+ pCsr->base.pVtab->zErrMsg = sqlite3_mprintf(
+ "parse error in rank function: %s", z
+ );
+ }
+ }else{
+ if( pConfig->zRank ){
+ pCsr->zRank = (char*)pConfig->zRank;
+ pCsr->zRankArgs = (char*)pConfig->zRankArgs;
+ }else{
+ pCsr->zRank = (char*)FTS5_DEFAULT_RANK;
+ pCsr->zRankArgs = 0;
+ }
+ }
+ return rc;
+}
+
+static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){
+ if( pVal ){
+ int eType = sqlite3_value_numeric_type(pVal);
+ if( eType==SQLITE_INTEGER ){
+ return sqlite3_value_int64(pVal);
+ }
+ }
+ return iDefault;
+}
+
+/*
+** This is the xFilter interface for the virtual table. See
+** the virtual table xFilter method documentation for additional
+** information.
+**
+** There are three possible query strategies:
+**
+** 1. Full-text search using a MATCH operator.
+** 2. A by-rowid lookup.
+** 3. A full-table scan.
+*/
+static int fts5FilterMethod(
+ sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
+ int idxNum, /* Strategy index */
+ const char *zUnused, /* Unused */
+ int nVal, /* Number of elements in apVal */
+ sqlite3_value **apVal /* Arguments for the indexing scheme */
+){
+ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
+ Fts5Config *pConfig = pTab->pConfig;
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
+ int rc = SQLITE_OK; /* Error code */
+ int iVal = 0; /* Counter for apVal[] */
+ int bDesc; /* True if ORDER BY [rank|rowid] DESC */
+ int bOrderByRank; /* True if ORDER BY rank */
+ sqlite3_value *pMatch = 0; /* <tbl> MATCH ? expression (or NULL) */
+ sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */
+ sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */
+ sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */
+ sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */
+ char **pzErrmsg = pConfig->pzErrmsg;
+
+ UNUSED_PARAM(zUnused);
+ UNUSED_PARAM(nVal);
+
+ if( pCsr->ePlan ){
+ fts5FreeCursorComponents(pCsr);
+ memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr));
+ }
+
+ assert( pCsr->pStmt==0 );
+ assert( pCsr->pExpr==0 );
+ assert( pCsr->csrflags==0 );
+ assert( pCsr->pRank==0 );
+ assert( pCsr->zRank==0 );
+ assert( pCsr->zRankArgs==0 );
+
+ assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg );
+ pConfig->pzErrmsg = &pTab->base.zErrMsg;
+
+ /* Decode the arguments passed through to this function.
+ **
+ ** Note: The following set of if(...) statements must be in the same
+ ** order as the corresponding entries in the struct at the top of
+ ** fts5BestIndexMethod(). */
+ if( BitFlagTest(idxNum, FTS5_BI_MATCH) ) pMatch = apVal[iVal++];
+ if( BitFlagTest(idxNum, FTS5_BI_RANK) ) pRank = apVal[iVal++];
+ if( BitFlagTest(idxNum, FTS5_BI_ROWID_EQ) ) pRowidEq = apVal[iVal++];
+ if( BitFlagTest(idxNum, FTS5_BI_ROWID_LE) ) pRowidLe = apVal[iVal++];
+ if( BitFlagTest(idxNum, FTS5_BI_ROWID_GE) ) pRowidGe = apVal[iVal++];
+ assert( iVal==nVal );
+ bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0);
+ pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0);
+
+ /* Set the cursor upper and lower rowid limits. Only some strategies
+ ** actually use them. This is ok, as the xBestIndex() method leaves the
+ ** sqlite3_index_constraint.omit flag clear for range constraints
+ ** on the rowid field. */
+ if( pRowidEq ){
+ pRowidLe = pRowidGe = pRowidEq;
+ }
+ if( bDesc ){
+ pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
+ pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
+ }else{
+ pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
+ pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
+ }
+
+ if( pTab->pSortCsr ){
+ /* If pSortCsr is non-NULL, then this call is being made as part of
+ ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
+ ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will
+ ** return results to the user for this query. The current cursor
+ ** (pCursor) is used to execute the query issued by function
+ ** fts5CursorFirstSorted() above. */
+ assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 );
+ assert( nVal==0 && pMatch==0 && bOrderByRank==0 && bDesc==0 );
+ assert( pCsr->iLastRowid==LARGEST_INT64 );
+ assert( pCsr->iFirstRowid==SMALLEST_INT64 );
+ pCsr->ePlan = FTS5_PLAN_SOURCE;
+ pCsr->pExpr = pTab->pSortCsr->pExpr;
+ rc = fts5CursorFirst(pTab, pCsr, bDesc);
+ sqlite3Fts5ExprClearEof(pCsr->pExpr);
+ }else if( pMatch ){
+ const char *zExpr = (const char*)sqlite3_value_text(apVal[0]);
+ if( zExpr==0 ) zExpr = "";
+
+ rc = fts5CursorParseRank(pConfig, pCsr, pRank);
+ if( rc==SQLITE_OK ){
+ if( zExpr[0]=='*' ){
+ /* The user has issued a query of the form "MATCH '*...'". This
+ ** indicates that the MATCH expression is not a full text query,
+ ** but a request for an internal parameter. */
+ rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]);
+ }else{
+ char **pzErr = &pTab->base.zErrMsg;
+ rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pCsr->pExpr, pzErr);
+ if( rc==SQLITE_OK ){
+ if( bOrderByRank ){
+ pCsr->ePlan = FTS5_PLAN_SORTED_MATCH;
+ rc = fts5CursorFirstSorted(pTab, pCsr, bDesc);
+ }else{
+ pCsr->ePlan = FTS5_PLAN_MATCH;
+ rc = fts5CursorFirst(pTab, pCsr, bDesc);
+ }
+ }
+ }
+ }
+ }else if( pConfig->zContent==0 ){
+ *pConfig->pzErrmsg = sqlite3_mprintf(
+ "%s: table does not support scanning", pConfig->zName
+ );
+ rc = SQLITE_ERROR;
+ }else{
+ /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup
+ ** by rowid (ePlan==FTS5_PLAN_ROWID). */
+ pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN);
+ rc = sqlite3Fts5StorageStmt(
+ pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->base.zErrMsg
+ );
+ if( rc==SQLITE_OK ){
+ if( pCsr->ePlan==FTS5_PLAN_ROWID ){
+ sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
+ }else{
+ sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid);
+ sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid);
+ }
+ rc = fts5NextMethod(pCursor);
+ }
+ }
+
+ pConfig->pzErrmsg = pzErrmsg;
+ return rc;
+}
+
+/*
+** This is the xEof method of the virtual table. SQLite calls this
+** routine to find out if it has reached the end of a result set.
+*/
+static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
+ return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0);
+}
+
+/*
+** Return the rowid that the cursor currently points to.
+*/
+static i64 fts5CursorRowid(Fts5Cursor *pCsr){
+ assert( pCsr->ePlan==FTS5_PLAN_MATCH
+ || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
+ || pCsr->ePlan==FTS5_PLAN_SOURCE
+ );
+ if( pCsr->pSorter ){
+ return pCsr->pSorter->iRowid;
+ }else{
+ return sqlite3Fts5ExprRowid(pCsr->pExpr);
+ }
+}
+
+/*
+** This is the xRowid method. The SQLite core calls this routine to
+** retrieve the rowid for the current row of the result set. fts5
+** exposes %_content.rowid as the rowid for the virtual table. The
+** rowid should be written to *pRowid.
+*/
+static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
+ int ePlan = pCsr->ePlan;
+
+ assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
+ switch( ePlan ){
+ case FTS5_PLAN_SPECIAL:
+ *pRowid = 0;
+ break;
+
+ case FTS5_PLAN_SOURCE:
+ case FTS5_PLAN_MATCH:
+ case FTS5_PLAN_SORTED_MATCH:
+ *pRowid = fts5CursorRowid(pCsr);
+ break;
+
+ default:
+ *pRowid = sqlite3_column_int64(pCsr->pStmt, 0);
+ break;
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** If the cursor requires seeking (bSeekRequired flag is set), seek it.
+** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise.
+**
+** If argument bErrormsg is true and an error occurs, an error message may
+** be left in sqlite3_vtab.zErrMsg.
+*/
+static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){
+ int rc = SQLITE_OK;
+
+ /* If the cursor does not yet have a statement handle, obtain one now. */
+ if( pCsr->pStmt==0 ){
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ int eStmt = fts5StmtType(pCsr);
+ rc = sqlite3Fts5StorageStmt(
+ pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->base.zErrMsg:0)
+ );
+ assert( rc!=SQLITE_OK || pTab->base.zErrMsg==0 );
+ assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) );
+ }
+
+ if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){
+ assert( pCsr->pExpr );
+ sqlite3_reset(pCsr->pStmt);
+ sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr));
+ rc = sqlite3_step(pCsr->pStmt);
+ if( rc==SQLITE_ROW ){
+ rc = SQLITE_OK;
+ CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT);
+ }else{
+ rc = sqlite3_reset(pCsr->pStmt);
+ if( rc==SQLITE_OK ){
+ rc = FTS5_CORRUPT;
+ }
+ }
+ }
+ return rc;
+}
+
+static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){
+ va_list ap; /* ... printf arguments */
+ va_start(ap, zFormat);
+ assert( p->base.zErrMsg==0 );
+ p->base.zErrMsg = sqlite3_vmprintf(zFormat, ap);
+ va_end(ap);
+}
+
+/*
+** This function is called to handle an FTS INSERT command. In other words,
+** an INSERT statement of the form:
+**
+** INSERT INTO fts(fts) VALUES($pCmd)
+** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal)
+**
+** Argument pVal is the value assigned to column "fts" by the INSERT
+** statement. This function returns SQLITE_OK if successful, or an SQLite
+** error code if an error occurs.
+**
+** The commands implemented by this function are documented in the "Special
+** INSERT Directives" section of the documentation. It should be updated if
+** more commands are added to this function.
+*/
+static int fts5SpecialInsert(
+ Fts5Table *pTab, /* Fts5 table object */
+ const char *zCmd, /* Text inserted into table-name column */
+ sqlite3_value *pVal /* Value inserted into rank column */
+){
+ Fts5Config *pConfig = pTab->pConfig;
+ int rc = SQLITE_OK;
+ int bError = 0;
+
+ if( 0==sqlite3_stricmp("delete-all", zCmd) ){
+ if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
+ fts5SetVtabError(pTab,
+ "'delete-all' may only be used with a "
+ "contentless or external content fts5 table"
+ );
+ rc = SQLITE_ERROR;
+ }else{
+ rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage);
+ }
+ }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){
+ if( pConfig->eContent==FTS5_CONTENT_NONE ){
+ fts5SetVtabError(pTab,
+ "'rebuild' may not be used with a contentless fts5 table"
+ );
+ rc = SQLITE_ERROR;
+ }else{
+ rc = sqlite3Fts5StorageRebuild(pTab->pStorage);
+ }
+ }else if( 0==sqlite3_stricmp("optimize", zCmd) ){
+ rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
+ }else if( 0==sqlite3_stricmp("merge", zCmd) ){
+ int nMerge = sqlite3_value_int(pVal);
+ rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
+ }else if( 0==sqlite3_stricmp("integrity-check", zCmd) ){
+ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage);
+#ifdef SQLITE_DEBUG
+ }else if( 0==sqlite3_stricmp("prefix-index", zCmd) ){
+ pConfig->bPrefixIndex = sqlite3_value_int(pVal);
+#endif
+ }else{
+ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, zCmd, pVal, &bError);
+ }
+ if( rc==SQLITE_OK ){
+ if( bError ){
+ rc = SQLITE_ERROR;
+ }else{
+ rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0);
+ }
+ }
+ }
+ return rc;
+}
+
+static int fts5SpecialDelete(
+ Fts5Table *pTab,
+ sqlite3_value **apVal
+){
+ int rc = SQLITE_OK;
+ int eType1 = sqlite3_value_type(apVal[1]);
+ if( eType1==SQLITE_INTEGER ){
+ sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]);
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]);
+ }
+ return rc;
+}
+
+static void fts5StorageInsert(
+ int *pRc,
+ Fts5Table *pTab,
+ sqlite3_value **apVal,
+ i64 *piRowid
+){
+ int rc = *pRc;
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid);
+ }
+ *pRc = rc;
+}
+
+/*
+** This function is the implementation of the xUpdate callback used by
+** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
+** inserted, updated or deleted.
+**
+** A delete specifies a single argument - the rowid of the row to remove.
+**
+** Update and insert operations pass:
+**
+** 1. The "old" rowid, or NULL.
+** 2. The "new" rowid.
+** 3. Values for each of the nCol matchable columns.
+** 4. Values for the two hidden columns (<tablename> and "rank").
+*/
+static int fts5UpdateMethod(
+ sqlite3_vtab *pVtab, /* Virtual table handle */
+ int nArg, /* Size of argument array */
+ sqlite3_value **apVal, /* Array of arguments */
+ sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
+){
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ Fts5Config *pConfig = pTab->pConfig;
+ int eType0; /* value_type() of apVal[0] */
+ int rc = SQLITE_OK; /* Return code */
+
+ /* A transaction must be open when this is called. */
+ assert( pTab->ts.eState==1 );
+
+ assert( pVtab->zErrMsg==0 );
+ assert( nArg==1 || nArg==(2+pConfig->nCol+2) );
+ assert( nArg==1
+ || sqlite3_value_type(apVal[1])==SQLITE_INTEGER
+ || sqlite3_value_type(apVal[1])==SQLITE_NULL
+ );
+ assert( pTab->pConfig->pzErrmsg==0 );
+ pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg;
+
+ /* Put any active cursors into REQUIRE_SEEK state. */
+ fts5TripCursors(pTab);
+
+ eType0 = sqlite3_value_type(apVal[0]);
+ if( eType0==SQLITE_NULL
+ && sqlite3_value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL
+ ){
+ /* A "special" INSERT op. These are handled separately. */
+ const char *z = (const char*)sqlite3_value_text(apVal[2+pConfig->nCol]);
+ if( pConfig->eContent!=FTS5_CONTENT_NORMAL
+ && 0==sqlite3_stricmp("delete", z)
+ ){
+ rc = fts5SpecialDelete(pTab, apVal);
+ }else{
+ rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]);
+ }
+ }else{
+ /* A regular INSERT, UPDATE or DELETE statement. The trick here is that
+ ** any conflict on the rowid value must be detected before any
+ ** modifications are made to the database file. There are 4 cases:
+ **
+ ** 1) DELETE
+ ** 2) UPDATE (rowid not modified)
+ ** 3) UPDATE (rowid modified)
+ ** 4) INSERT
+ **
+ ** Cases 3 and 4 may violate the rowid constraint.
+ */
+ int eConflict = SQLITE_ABORT;
+ if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
+ eConflict = sqlite3_vtab_on_conflict(pConfig->db);
+ }
+
+ assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL );
+ assert( nArg!=1 || eType0==SQLITE_INTEGER );
+
+ /* Filter out attempts to run UPDATE or DELETE on contentless tables.
+ ** This is not suported. */
+ if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){
+ pTab->base.zErrMsg = sqlite3_mprintf(
+ "cannot %s contentless fts5 table: %s",
+ (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName
+ );
+ rc = SQLITE_ERROR;
+ }
+
+ /* Case 1: DELETE */
+ else if( nArg==1 ){
+ i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0);
+ }
+
+ /* Case 2: INSERT */
+ else if( eType0!=SQLITE_INTEGER ){
+ /* If this is a REPLACE, first remove the current entry (if any) */
+ if( eConflict==SQLITE_REPLACE
+ && sqlite3_value_type(apVal[1])==SQLITE_INTEGER
+ ){
+ i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
+ }
+ fts5StorageInsert(&rc, pTab, apVal, pRowid);
+ }
+
+ /* Case 2: UPDATE */
+ else{
+ i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */
+ i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */
+ if( iOld!=iNew ){
+ if( eConflict==SQLITE_REPLACE ){
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
+ }
+ fts5StorageInsert(&rc, pTab, apVal, pRowid);
+ }else{
+ rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *pRowid);
+ }
+ }
+ }else{
+ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
+ fts5StorageInsert(&rc, pTab, apVal, pRowid);
+ }
+ }
+ }
+
+ pTab->pConfig->pzErrmsg = 0;
+ return rc;
+}
+
+/*
+** Implementation of xSync() method.
+*/
+static int fts5SyncMethod(sqlite3_vtab *pVtab){
+ int rc;
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ fts5CheckTransactionState(pTab, FTS5_SYNC, 0);
+ pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg;
+ fts5TripCursors(pTab);
+ rc = sqlite3Fts5StorageSync(pTab->pStorage, 1);
+ pTab->pConfig->pzErrmsg = 0;
+ return rc;
+}
+
+/*
+** Implementation of xBegin() method.
+*/
+static int fts5BeginMethod(sqlite3_vtab *pVtab){
+ UNUSED_PARAM(pVtab); /* Call below is a no-op for NDEBUG builds */
+ fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_BEGIN, 0);
+ return SQLITE_OK;
+}
+
+/*
+** Implementation of xCommit() method. This is a no-op. The contents of
+** the pending-terms hash-table have already been flushed into the database
+** by fts5SyncMethod().
+*/
+static int fts5CommitMethod(sqlite3_vtab *pVtab){
+ UNUSED_PARAM(pVtab); /* Call below is a no-op for NDEBUG builds */
+ fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_COMMIT, 0);
+ return SQLITE_OK;
+}
+
+/*
+** Implementation of xRollback(). Discard the contents of the pending-terms
+** hash-table. Any changes made to the database are reverted by SQLite.
+*/
+static int fts5RollbackMethod(sqlite3_vtab *pVtab){
+ int rc;
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0);
+ rc = sqlite3Fts5StorageRollback(pTab->pStorage);
+ return rc;
+}
+
+static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*);
+
+static void *fts5ApiUserData(Fts5Context *pCtx){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ return pCsr->pAux->pUserData;
+}
+
+static int fts5ApiColumnCount(Fts5Context *pCtx){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol;
+}
+
+static int fts5ApiColumnTotalSize(
+ Fts5Context *pCtx,
+ int iCol,
+ sqlite3_int64 *pnToken
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken);
+}
+
+static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
+}
+
+static int fts5ApiTokenize(
+ Fts5Context *pCtx,
+ const char *pText, int nText,
+ void *pUserData,
+ int (*xToken)(void*, int, const char*, int, int, int)
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ return sqlite3Fts5Tokenize(
+ pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
+ );
+}
+
+static int fts5ApiPhraseCount(Fts5Context *pCtx){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
+}
+
+static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
+}
+
+static int fts5ApiColumnText(
+ Fts5Context *pCtx,
+ int iCol,
+ const char **pz,
+ int *pn
+){
+ int rc = SQLITE_OK;
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){
+ *pz = 0;
+ *pn = 0;
+ }else{
+ rc = fts5SeekCursor(pCsr, 0);
+ if( rc==SQLITE_OK ){
+ *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1);
+ *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
+ }
+ }
+ return rc;
+}
+
+static int fts5CsrPoslist(
+ Fts5Cursor *pCsr,
+ int iPhrase,
+ const u8 **pa,
+ int *pn
+){
+ Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
+ int rc = SQLITE_OK;
+ int bLive = (pCsr->pSorter==0);
+
+ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){
+
+ if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
+ Fts5PoslistPopulator *aPopulator;
+ int i;
+ aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive);
+ if( aPopulator==0 ) rc = SQLITE_NOMEM;
+ for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){
+ int n; const char *z;
+ rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5ExprPopulatePoslists(
+ pConfig, pCsr->pExpr, aPopulator, i, z, n
+ );
+ }
+ }
+ sqlite3_free(aPopulator);
+
+ if( pCsr->pSorter ){
+ sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid);
+ }
+ }
+ CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST);
+ }
+
+ if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){
+ Fts5Sorter *pSorter = pCsr->pSorter;
+ int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
+ *pn = pSorter->aIdx[iPhrase] - i1;
+ *pa = &pSorter->aPoslist[i1];
+ }else{
+ *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
+ }
+
+ return rc;
+}
+
+/*
+** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
+** correctly for the current view. Return SQLITE_OK if successful, or an
+** SQLite error code otherwise.
+*/
+static int fts5CacheInstArray(Fts5Cursor *pCsr){
+ int rc = SQLITE_OK;
+ Fts5PoslistReader *aIter; /* One iterator for each phrase */
+ int nIter; /* Number of iterators/phrases */
+
+ nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
+ if( pCsr->aInstIter==0 ){
+ int nByte = sizeof(Fts5PoslistReader) * nIter;
+ pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
+ }
+ aIter = pCsr->aInstIter;
+
+ if( aIter ){
+ int nInst = 0; /* Number instances seen so far */
+ int i;
+
+ /* Initialize all iterators */
+ for(i=0; i<nIter && rc==SQLITE_OK; i++){
+ const u8 *a;
+ int n;
+ rc = fts5CsrPoslist(pCsr, i, &a, &n);
+ if( rc==SQLITE_OK ){
+ sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ while( 1 ){
+ int *aInst;
+ int iBest = -1;
+ for(i=0; i<nIter; i++){
+ if( (aIter[i].bEof==0)
+ && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos)
+ ){
+ iBest = i;
+ }
+ }
+ if( iBest<0 ) break;
+
+ nInst++;
+ if( nInst>=pCsr->nInstAlloc ){
+ pCsr->nInstAlloc = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32;
+ aInst = (int*)sqlite3_realloc(
+ pCsr->aInst, pCsr->nInstAlloc*sizeof(int)*3
+ );
+ if( aInst ){
+ pCsr->aInst = aInst;
+ }else{
+ rc = SQLITE_NOMEM;
+ break;
+ }
+ }
+
+ aInst = &pCsr->aInst[3 * (nInst-1)];
+ aInst[0] = iBest;
+ aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
+ aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
+ sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
+ }
+ }
+
+ pCsr->nInstCount = nInst;
+ CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST);
+ }
+ return rc;
+}
+
+static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ int rc = SQLITE_OK;
+ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
+ || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
+ *pnInst = pCsr->nInstCount;
+ }
+ return rc;
+}
+
+static int fts5ApiInst(
+ Fts5Context *pCtx,
+ int iIdx,
+ int *piPhrase,
+ int *piCol,
+ int *piOff
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ int rc = SQLITE_OK;
+ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
+ || SQLITE_OK==(rc = fts5CacheInstArray(pCsr))
+ ){
+ if( iIdx<0 || iIdx>=pCsr->nInstCount ){
+ rc = SQLITE_RANGE;
+#if 0
+ }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
+ *piPhrase = pCsr->aInst[iIdx*3];
+ *piCol = pCsr->aInst[iIdx*3 + 2];
+ *piOff = -1;
+#endif
+ }else{
+ *piPhrase = pCsr->aInst[iIdx*3];
+ *piCol = pCsr->aInst[iIdx*3 + 1];
+ *piOff = pCsr->aInst[iIdx*3 + 2];
+ }
+ }
+ return rc;
+}
+
+static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
+ return fts5CursorRowid((Fts5Cursor*)pCtx);
+}
+
+static int fts5ColumnSizeCb(
+ void *pContext, /* Pointer to int */
+ int tflags,
+ const char *pUnused, /* Buffer containing token */
+ int nUnused, /* Size of token in bytes */
+ int iUnused1, /* Start offset of token */
+ int iUnused2 /* End offset of token */
+){
+ int *pCnt = (int*)pContext;
+ UNUSED_PARAM2(pUnused, nUnused);
+ UNUSED_PARAM2(iUnused1, iUnused2);
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
+ (*pCnt)++;
+ }
+ return SQLITE_OK;
+}
+
+static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ Fts5Config *pConfig = pTab->pConfig;
+ int rc = SQLITE_OK;
+
+ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){
+ if( pConfig->bColumnsize ){
+ i64 iRowid = fts5CursorRowid(pCsr);
+ rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize);
+ }else if( pConfig->zContent==0 ){
+ int i;
+ for(i=0; i<pConfig->nCol; i++){
+ if( pConfig->abUnindexed[i]==0 ){
+ pCsr->aColumnSize[i] = -1;
+ }
+ }
+ }else{
+ int i;
+ for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
+ if( pConfig->abUnindexed[i]==0 ){
+ const char *z; int n;
+ void *p = (void*)(&pCsr->aColumnSize[i]);
+ pCsr->aColumnSize[i] = 0;
+ rc = fts5ApiColumnText(pCtx, i, &z, &n);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(
+ pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
+ );
+ }
+ }
+ }
+ }
+ CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
+ }
+ if( iCol<0 ){
+ int i;
+ *pnToken = 0;
+ for(i=0; i<pConfig->nCol; i++){
+ *pnToken += pCsr->aColumnSize[i];
+ }
+ }else if( iCol<pConfig->nCol ){
+ *pnToken = pCsr->aColumnSize[iCol];
+ }else{
+ *pnToken = 0;
+ rc = SQLITE_RANGE;
+ }
+ return rc;
+}
+
+/*
+** Implementation of the xSetAuxdata() method.
+*/
+static int fts5ApiSetAuxdata(
+ Fts5Context *pCtx, /* Fts5 context */
+ void *pPtr, /* Pointer to save as auxdata */
+ void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Auxdata *pData;
+
+ /* Search through the cursors list of Fts5Auxdata objects for one that
+ ** corresponds to the currently executing auxiliary function. */
+ for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
+ if( pData->pAux==pCsr->pAux ) break;
+ }
+
+ if( pData ){
+ if( pData->xDelete ){
+ pData->xDelete(pData->pPtr);
+ }
+ }else{
+ int rc = SQLITE_OK;
+ pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata));
+ if( pData==0 ){
+ if( xDelete ) xDelete(pPtr);
+ return rc;
+ }
+ pData->pAux = pCsr->pAux;
+ pData->pNext = pCsr->pAuxdata;
+ pCsr->pAuxdata = pData;
+ }
+
+ pData->xDelete = xDelete;
+ pData->pPtr = pPtr;
+ return SQLITE_OK;
+}
+
+static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Auxdata *pData;
+ void *pRet = 0;
+
+ for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
+ if( pData->pAux==pCsr->pAux ) break;
+ }
+
+ if( pData ){
+ pRet = pData->pPtr;
+ if( bClear ){
+ pData->pPtr = 0;
+ pData->xDelete = 0;
+ }
+ }
+
+ return pRet;
+}
+
+static void fts5ApiPhraseNext(
+ Fts5Context *pUnused,
+ Fts5PhraseIter *pIter,
+ int *piCol, int *piOff
+){
+ UNUSED_PARAM(pUnused);
+ if( pIter->a>=pIter->b ){
+ *piCol = -1;
+ *piOff = -1;
+ }else{
+ int iVal;
+ pIter->a += fts5GetVarint32(pIter->a, iVal);
+ if( iVal==1 ){
+ pIter->a += fts5GetVarint32(pIter->a, iVal);
+ *piCol = iVal;
+ *piOff = 0;
+ pIter->a += fts5GetVarint32(pIter->a, iVal);
+ }
+ *piOff += (iVal-2);
+ }
+}
+
+static int fts5ApiPhraseFirst(
+ Fts5Context *pCtx,
+ int iPhrase,
+ Fts5PhraseIter *pIter,
+ int *piCol, int *piOff
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ int n;
+ int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
+ if( rc==SQLITE_OK ){
+ pIter->b = &pIter->a[n];
+ *piCol = 0;
+ *piOff = 0;
+ fts5ApiPhraseNext(pCtx, pIter, piCol, piOff);
+ }
+ return rc;
+}
+
+static void fts5ApiPhraseNextColumn(
+ Fts5Context *pCtx,
+ Fts5PhraseIter *pIter,
+ int *piCol
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
+
+ if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
+ if( pIter->a>=pIter->b ){
+ *piCol = -1;
+ }else{
+ int iIncr;
+ pIter->a += fts5GetVarint32(&pIter->a[0], iIncr);
+ *piCol += (iIncr-2);
+ }
+ }else{
+ while( 1 ){
+ int dummy;
+ if( pIter->a>=pIter->b ){
+ *piCol = -1;
+ return;
+ }
+ if( pIter->a[0]==0x01 ) break;
+ pIter->a += fts5GetVarint32(pIter->a, dummy);
+ }
+ pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
+ }
+}
+
+static int fts5ApiPhraseFirstColumn(
+ Fts5Context *pCtx,
+ int iPhrase,
+ Fts5PhraseIter *pIter,
+ int *piCol
+){
+ int rc = SQLITE_OK;
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
+
+ if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
+ Fts5Sorter *pSorter = pCsr->pSorter;
+ int n;
+ if( pSorter ){
+ int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
+ n = pSorter->aIdx[iPhrase] - i1;
+ pIter->a = &pSorter->aPoslist[i1];
+ }else{
+ rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n);
+ }
+ if( rc==SQLITE_OK ){
+ pIter->b = &pIter->a[n];
+ *piCol = 0;
+ fts5ApiPhraseNextColumn(pCtx, pIter, piCol);
+ }
+ }else{
+ int n;
+ rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
+ if( rc==SQLITE_OK ){
+ pIter->b = &pIter->a[n];
+ if( n<=0 ){
+ *piCol = -1;
+ }else if( pIter->a[0]==0x01 ){
+ pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
+ }else{
+ *piCol = 0;
+ }
+ }
+ }
+
+ return rc;
+}
+
+
+static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
+ int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
+);
+
+static const Fts5ExtensionApi sFts5Api = {
+ 2, /* iVersion */
+ fts5ApiUserData,
+ fts5ApiColumnCount,
+ fts5ApiRowCount,
+ fts5ApiColumnTotalSize,
+ fts5ApiTokenize,
+ fts5ApiPhraseCount,
+ fts5ApiPhraseSize,
+ fts5ApiInstCount,
+ fts5ApiInst,
+ fts5ApiRowid,
+ fts5ApiColumnText,
+ fts5ApiColumnSize,
+ fts5ApiQueryPhrase,
+ fts5ApiSetAuxdata,
+ fts5ApiGetAuxdata,
+ fts5ApiPhraseFirst,
+ fts5ApiPhraseNext,
+ fts5ApiPhraseFirstColumn,
+ fts5ApiPhraseNextColumn,
+};
+
+/*
+** Implementation of API function xQueryPhrase().
+*/
+static int fts5ApiQueryPhrase(
+ Fts5Context *pCtx,
+ int iPhrase,
+ void *pUserData,
+ int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*)
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ int rc;
+ Fts5Cursor *pNew = 0;
+
+ rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
+ if( rc==SQLITE_OK ){
+ pNew->ePlan = FTS5_PLAN_MATCH;
+ pNew->iFirstRowid = SMALLEST_INT64;
+ pNew->iLastRowid = LARGEST_INT64;
+ pNew->base.pVtab = (sqlite3_vtab*)pTab;
+ rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr);
+ }
+
+ if( rc==SQLITE_OK ){
+ for(rc = fts5CursorFirst(pTab, pNew, 0);
+ rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
+ rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
+ ){
+ rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData);
+ if( rc!=SQLITE_OK ){
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+ break;
+ }
+ }
+ }
+
+ fts5CloseMethod((sqlite3_vtab_cursor*)pNew);
+ return rc;
+}
+
+static void fts5ApiInvoke(
+ Fts5Auxiliary *pAux,
+ Fts5Cursor *pCsr,
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ assert( pCsr->pAux==0 );
+ pCsr->pAux = pAux;
+ pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
+ pCsr->pAux = 0;
+}
+
+static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
+ Fts5Cursor *pCsr;
+ for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
+ if( pCsr->iCsrId==iCsrId ) break;
+ }
+ return pCsr;
+}
+
+static void fts5ApiCallback(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+
+ Fts5Auxiliary *pAux;
+ Fts5Cursor *pCsr;
+ i64 iCsrId;
+
+ assert( argc>=1 );
+ pAux = (Fts5Auxiliary*)sqlite3_user_data(context);
+ iCsrId = sqlite3_value_int64(argv[0]);
+
+ pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId);
+ if( pCsr==0 ){
+ char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId);
+ sqlite3_result_error(context, zErr, -1);
+ sqlite3_free(zErr);
+ }else{
+ fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]);
+ }
+}
+
+
+/*
+** Given cursor id iId, return a pointer to the corresponding Fts5Index
+** object. Or NULL If the cursor id does not exist.
+**
+** If successful, set *ppConfig to point to the associated config object
+** before returning.
+*/
+static Fts5Index *sqlite3Fts5IndexFromCsrid(
+ Fts5Global *pGlobal, /* FTS5 global context for db handle */
+ i64 iCsrId, /* Id of cursor to find */
+ Fts5Config **ppConfig /* OUT: Configuration object */
+){
+ Fts5Cursor *pCsr;
+ Fts5Table *pTab;
+
+ pCsr = fts5CursorFromCsrid(pGlobal, iCsrId);
+ pTab = (Fts5Table*)pCsr->base.pVtab;
+ *ppConfig = pTab->pConfig;
+
+ return pTab->pIndex;
+}
+
+/*
+** Return a "position-list blob" corresponding to the current position of
+** cursor pCsr via sqlite3_result_blob(). A position-list blob contains
+** the current position-list for each phrase in the query associated with
+** cursor pCsr.
+**
+** A position-list blob begins with (nPhrase-1) varints, where nPhrase is
+** the number of phrases in the query. Following the varints are the
+** concatenated position lists for each phrase, in order.
+**
+** The first varint (if it exists) contains the size of the position list
+** for phrase 0. The second (same disclaimer) contains the size of position
+** list 1. And so on. There is no size field for the final position list,
+** as it can be derived from the total size of the blob.
+*/
+static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){
+ int i;
+ int rc = SQLITE_OK;
+ int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
+ Fts5Buffer val;
+
+ memset(&val, 0, sizeof(Fts5Buffer));
+ switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){
+ case FTS5_DETAIL_FULL:
+
+ /* Append the varints */
+ for(i=0; i<(nPhrase-1); i++){
+ const u8 *dummy;
+ int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy);
+ sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
+ }
+
+ /* Append the position lists */
+ for(i=0; i<nPhrase; i++){
+ const u8 *pPoslist;
+ int nPoslist;
+ nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist);
+ sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
+ }
+ break;
+
+ case FTS5_DETAIL_COLUMNS:
+
+ /* Append the varints */
+ for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){
+ const u8 *dummy;
+ int nByte;
+ rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte);
+ sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
+ }
+
+ /* Append the position lists */
+ for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
+ const u8 *pPoslist;
+ int nPoslist;
+ rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist);
+ sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free);
+ return rc;
+}
+
+/*
+** This is the xColumn method, called by SQLite to request a value from
+** the row that the supplied cursor currently points to.
+*/
+static int fts5ColumnMethod(
+ sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
+ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
+ int iCol /* Index of column to read value from */
+){
+ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
+ Fts5Config *pConfig = pTab->pConfig;
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
+ int rc = SQLITE_OK;
+
+ assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
+
+ if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){
+ if( iCol==pConfig->nCol ){
+ sqlite3_result_int64(pCtx, pCsr->iSpecial);
+ }
+ }else
+
+ if( iCol==pConfig->nCol ){
+ /* User is requesting the value of the special column with the same name
+ ** as the table. Return the cursor integer id number. This value is only
+ ** useful in that it may be passed as the first argument to an FTS5
+ ** auxiliary function. */
+ sqlite3_result_int64(pCtx, pCsr->iCsrId);
+ }else if( iCol==pConfig->nCol+1 ){
+
+ /* The value of the "rank" column. */
+ if( pCsr->ePlan==FTS5_PLAN_SOURCE ){
+ fts5PoslistBlob(pCtx, pCsr);
+ }else if(
+ pCsr->ePlan==FTS5_PLAN_MATCH
+ || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
+ ){
+ if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){
+ fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg);
+ }
+ }
+ }else if( !fts5IsContentless(pTab) ){
+ rc = fts5SeekCursor(pCsr, 1);
+ if( rc==SQLITE_OK ){
+ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
+ }
+ }
+ return rc;
+}
+
+
+/*
+** This routine implements the xFindFunction method for the FTS3
+** virtual table.
+*/
+static int fts5FindFunctionMethod(
+ sqlite3_vtab *pVtab, /* Virtual table handle */
+ int nUnused, /* Number of SQL function arguments */
+ const char *zName, /* Name of SQL function */
+ void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
+ void **ppArg /* OUT: User data for *pxFunc */
+){
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ Fts5Auxiliary *pAux;
+
+ UNUSED_PARAM(nUnused);
+ pAux = fts5FindAuxiliary(pTab, zName);
+ if( pAux ){
+ *pxFunc = fts5ApiCallback;
+ *ppArg = (void*)pAux;
+ return 1;
+ }
+
+ /* No function of the specified name was found. Return 0. */
+ return 0;
+}
+
+/*
+** Implementation of FTS5 xRename method. Rename an fts5 table.
+*/
+static int fts5RenameMethod(
+ sqlite3_vtab *pVtab, /* Virtual table handle */
+ const char *zName /* New name of table */
+){
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ return sqlite3Fts5StorageRename(pTab->pStorage, zName);
+}
+
+/*
+** The xSavepoint() method.
+**
+** Flush the contents of the pending-terms table to disk.
+*/
+static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
+ fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint);
+ fts5TripCursors(pTab);
+ return sqlite3Fts5StorageSync(pTab->pStorage, 0);
+}
+
+/*
+** The xRelease() method.
+**
+** This is a no-op.
+*/
+static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
+ fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint);
+ fts5TripCursors(pTab);
+ return sqlite3Fts5StorageSync(pTab->pStorage, 0);
+}
+
+/*
+** The xRollbackTo() method.
+**
+** Discard the contents of the pending terms table.
+*/
+static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
+ Fts5Table *pTab = (Fts5Table*)pVtab;
+ UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
+ fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint);
+ fts5TripCursors(pTab);
+ return sqlite3Fts5StorageRollback(pTab->pStorage);
+}
+
+/*
+** Register a new auxiliary function with global context pGlobal.
+*/
+static int fts5CreateAux(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void *pUserData, /* User data for aux. function */
+ fts5_extension_function xFunc, /* Aux. function implementation */
+ void(*xDestroy)(void*) /* Destructor for pUserData */
+){
+ Fts5Global *pGlobal = (Fts5Global*)pApi;
+ int rc = sqlite3_overload_function(pGlobal->db, zName, -1);
+ if( rc==SQLITE_OK ){
+ Fts5Auxiliary *pAux;
+ int nName; /* Size of zName in bytes, including \0 */
+ int nByte; /* Bytes of space to allocate */
+
+ nName = (int)strlen(zName) + 1;
+ nByte = sizeof(Fts5Auxiliary) + nName;
+ pAux = (Fts5Auxiliary*)sqlite3_malloc(nByte);
+ if( pAux ){
+ memset(pAux, 0, nByte);
+ pAux->zFunc = (char*)&pAux[1];
+ memcpy(pAux->zFunc, zName, nName);
+ pAux->pGlobal = pGlobal;
+ pAux->pUserData = pUserData;
+ pAux->xFunc = xFunc;
+ pAux->xDestroy = xDestroy;
+ pAux->pNext = pGlobal->pAux;
+ pGlobal->pAux = pAux;
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Register a new tokenizer. This is the implementation of the
+** fts5_api.xCreateTokenizer() method.
+*/
+static int fts5CreateTokenizer(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void *pUserData, /* User data for aux. function */
+ fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
+ void(*xDestroy)(void*) /* Destructor for pUserData */
+){
+ Fts5Global *pGlobal = (Fts5Global*)pApi;
+ Fts5TokenizerModule *pNew;
+ int nName; /* Size of zName and its \0 terminator */
+ int nByte; /* Bytes of space to allocate */
+ int rc = SQLITE_OK;
+
+ nName = (int)strlen(zName) + 1;
+ nByte = sizeof(Fts5TokenizerModule) + nName;
+ pNew = (Fts5TokenizerModule*)sqlite3_malloc(nByte);
+ if( pNew ){
+ memset(pNew, 0, nByte);
+ pNew->zName = (char*)&pNew[1];
+ memcpy(pNew->zName, zName, nName);
+ pNew->pUserData = pUserData;
+ pNew->x = *pTokenizer;
+ pNew->xDestroy = xDestroy;
+ pNew->pNext = pGlobal->pTok;
+ pGlobal->pTok = pNew;
+ if( pNew->pNext==0 ){
+ pGlobal->pDfltTok = pNew;
+ }
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+
+ return rc;
+}
+
+static Fts5TokenizerModule *fts5LocateTokenizer(
+ Fts5Global *pGlobal,
+ const char *zName
+){
+ Fts5TokenizerModule *pMod = 0;
+
+ if( zName==0 ){
+ pMod = pGlobal->pDfltTok;
+ }else{
+ for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){
+ if( sqlite3_stricmp(zName, pMod->zName)==0 ) break;
+ }
+ }
+
+ return pMod;
+}
+
+/*
+** Find a tokenizer. This is the implementation of the
+** fts5_api.xFindTokenizer() method.
+*/
+static int fts5FindTokenizer(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void **ppUserData,
+ fts5_tokenizer *pTokenizer /* Populate this object */
+){
+ int rc = SQLITE_OK;
+ Fts5TokenizerModule *pMod;
+
+ pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
+ if( pMod ){
+ *pTokenizer = pMod->x;
+ *ppUserData = pMod->pUserData;
+ }else{
+ memset(pTokenizer, 0, sizeof(fts5_tokenizer));
+ rc = SQLITE_ERROR;
+ }
+
+ return rc;
+}
+
+static int sqlite3Fts5GetTokenizer(
+ Fts5Global *pGlobal,
+ const char **azArg,
+ int nArg,
+ Fts5Tokenizer **ppTok,
+ fts5_tokenizer **ppTokApi,
+ char **pzErr
+){
+ Fts5TokenizerModule *pMod;
+ int rc = SQLITE_OK;
+
+ pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]);
+ if( pMod==0 ){
+ assert( nArg>0 );
+ rc = SQLITE_ERROR;
+ *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
+ }else{
+ rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok);
+ *ppTokApi = &pMod->x;
+ if( rc!=SQLITE_OK && pzErr ){
+ *pzErr = sqlite3_mprintf("error in tokenizer constructor");
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ *ppTokApi = 0;
+ *ppTok = 0;
+ }
+
+ return rc;
+}
+
+static void fts5ModuleDestroy(void *pCtx){
+ Fts5TokenizerModule *pTok, *pNextTok;
+ Fts5Auxiliary *pAux, *pNextAux;
+ Fts5Global *pGlobal = (Fts5Global*)pCtx;
+
+ for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){
+ pNextAux = pAux->pNext;
+ if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData);
+ sqlite3_free(pAux);
+ }
+
+ for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){
+ pNextTok = pTok->pNext;
+ if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData);
+ sqlite3_free(pTok);
+ }
+
+ sqlite3_free(pGlobal);
+}
+
+static void fts5Fts5Func(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apUnused /* Function arguments */
+){
+ Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
+ char buf[8];
+ UNUSED_PARAM2(nArg, apUnused);
+ assert( nArg==0 );
+ assert( sizeof(buf)>=sizeof(pGlobal) );
+ memcpy(buf, (void*)&pGlobal, sizeof(pGlobal));
+ sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT);
+}
+
+/*
+** Implementation of fts5_source_id() function.
+*/
+static void fts5SourceIdFunc(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apUnused /* Function arguments */
+){
+ assert( nArg==0 );
+ UNUSED_PARAM2(nArg, apUnused);
+ sqlite3_result_text(pCtx, "fts5: 2016-02-19 16:19:23 0cb728c15c66f1bf09cc1e0731a95ba937c6c71c", -1, SQLITE_TRANSIENT);
+}
+
+static int fts5Init(sqlite3 *db){
+ static const sqlite3_module fts5Mod = {
+ /* iVersion */ 2,
+ /* xCreate */ fts5CreateMethod,
+ /* xConnect */ fts5ConnectMethod,
+ /* xBestIndex */ fts5BestIndexMethod,
+ /* xDisconnect */ fts5DisconnectMethod,
+ /* xDestroy */ fts5DestroyMethod,
+ /* xOpen */ fts5OpenMethod,
+ /* xClose */ fts5CloseMethod,
+ /* xFilter */ fts5FilterMethod,
+ /* xNext */ fts5NextMethod,
+ /* xEof */ fts5EofMethod,
+ /* xColumn */ fts5ColumnMethod,
+ /* xRowid */ fts5RowidMethod,
+ /* xUpdate */ fts5UpdateMethod,
+ /* xBegin */ fts5BeginMethod,
+ /* xSync */ fts5SyncMethod,
+ /* xCommit */ fts5CommitMethod,
+ /* xRollback */ fts5RollbackMethod,
+ /* xFindFunction */ fts5FindFunctionMethod,
+ /* xRename */ fts5RenameMethod,
+ /* xSavepoint */ fts5SavepointMethod,
+ /* xRelease */ fts5ReleaseMethod,
+ /* xRollbackTo */ fts5RollbackToMethod,
+ };
+
+ int rc;
+ Fts5Global *pGlobal = 0;
+
+ pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
+ if( pGlobal==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ void *p = (void*)pGlobal;
+ memset(pGlobal, 0, sizeof(Fts5Global));
+ pGlobal->db = db;
+ pGlobal->api.iVersion = 2;
+ pGlobal->api.xCreateFunction = fts5CreateAux;
+ pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
+ pGlobal->api.xFindTokenizer = fts5FindTokenizer;
+ rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
+ if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
+ if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
+ if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);
+ if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api);
+ if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(
+ db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0
+ );
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(
+ db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0
+ );
+ }
+ }
+
+ /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
+ ** fts5_test_mi.c is compiled and linked into the executable. And call
+ ** its entry point to enable the matchinfo() demo. */
+#ifdef SQLITE_FTS5_ENABLE_TEST_MI
+ if( rc==SQLITE_OK ){
+ extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*);
+ rc = sqlite3Fts5TestRegisterMatchinfo(db);
+ }
+#endif
+
+ return rc;
+}
+
+/*
+** The following functions are used to register the module with SQLite. If
+** this module is being built as part of the SQLite core (SQLITE_CORE is
+** defined), then sqlite3_open() will call sqlite3Fts5Init() directly.
+**
+** Or, if this module is being built as a loadable extension,
+** sqlite3Fts5Init() is omitted and the two standard entry points
+** sqlite3_fts_init() and sqlite3_fts5_init() defined instead.
+*/
+#ifndef SQLITE_CORE
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+int sqlite3_fts_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+){
+ SQLITE_EXTENSION_INIT2(pApi);
+ (void)pzErrMsg; /* Unused parameter */
+ return fts5Init(db);
+}
+
+#ifdef _WIN32
+__declspec(dllexport)
+#endif
+int sqlite3_fts5_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+){
+ SQLITE_EXTENSION_INIT2(pApi);
+ (void)pzErrMsg; /* Unused parameter */
+ return fts5Init(db);
+}
+#else
+int sqlite3Fts5Init(sqlite3 *db){
+ return fts5Init(db);
+}
+#endif
+
+#line 1 "fts5_storage.c"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+*/
+
+
+
+/* #include "fts5Int.h" */
+
+struct Fts5Storage {
+ Fts5Config *pConfig;
+ Fts5Index *pIndex;
+ int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */
+ i64 nTotalRow; /* Total number of rows in FTS table */
+ i64 *aTotalSize; /* Total sizes of each column */
+ sqlite3_stmt *aStmt[11];
+};
+
+
+#if FTS5_STMT_SCAN_ASC!=0
+# error "FTS5_STMT_SCAN_ASC mismatch"
+#endif
+#if FTS5_STMT_SCAN_DESC!=1
+# error "FTS5_STMT_SCAN_DESC mismatch"
+#endif
+#if FTS5_STMT_LOOKUP!=2
+# error "FTS5_STMT_LOOKUP mismatch"
+#endif
+
+#define FTS5_STMT_INSERT_CONTENT 3
+#define FTS5_STMT_REPLACE_CONTENT 4
+#define FTS5_STMT_DELETE_CONTENT 5
+#define FTS5_STMT_REPLACE_DOCSIZE 6
+#define FTS5_STMT_DELETE_DOCSIZE 7
+#define FTS5_STMT_LOOKUP_DOCSIZE 8
+#define FTS5_STMT_REPLACE_CONFIG 9
+#define FTS5_STMT_SCAN 10
+
+/*
+** Prepare the two insert statements - Fts5Storage.pInsertContent and
+** Fts5Storage.pInsertDocsize - if they have not already been prepared.
+** Return SQLITE_OK if successful, or an SQLite error code if an error
+** occurs.
+*/
+static int fts5StorageGetStmt(
+ Fts5Storage *p, /* Storage handle */
+ int eStmt, /* FTS5_STMT_XXX constant */
+ sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */
+ char **pzErrMsg /* OUT: Error message (if any) */
+){
+ int rc = SQLITE_OK;
+
+ /* If there is no %_docsize table, there should be no requests for
+ ** statements to operate on it. */
+ assert( p->pConfig->bColumnsize || (
+ eStmt!=FTS5_STMT_REPLACE_DOCSIZE
+ && eStmt!=FTS5_STMT_DELETE_DOCSIZE
+ && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE
+ ));
+
+ assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) );
+ if( p->aStmt[eStmt]==0 ){
+ const char *azStmt[] = {
+ "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC",
+ "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC",
+ "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */
+
+ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */
+ "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */
+ "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */
+ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* REPLACE_DOCSIZE */
+ "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */
+
+ "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */
+
+ "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */
+ "SELECT %s FROM %s AS T", /* SCAN */
+ };
+ Fts5Config *pC = p->pConfig;
+ char *zSql = 0;
+
+ switch( eStmt ){
+ case FTS5_STMT_SCAN:
+ zSql = sqlite3_mprintf(azStmt[eStmt],
+ pC->zContentExprlist, pC->zContent
+ );
+ break;
+
+ case FTS5_STMT_SCAN_ASC:
+ case FTS5_STMT_SCAN_DESC:
+ zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist,
+ pC->zContent, pC->zContentRowid, pC->zContentRowid,
+ pC->zContentRowid
+ );
+ break;
+
+ case FTS5_STMT_LOOKUP:
+ zSql = sqlite3_mprintf(azStmt[eStmt],
+ pC->zContentExprlist, pC->zContent, pC->zContentRowid
+ );
+ break;
+
+ case FTS5_STMT_INSERT_CONTENT:
+ case FTS5_STMT_REPLACE_CONTENT: {
+ int nCol = pC->nCol + 1;
+ char *zBind;
+ int i;
+
+ zBind = sqlite3_malloc(1 + nCol*2);
+ if( zBind ){
+ for(i=0; i<nCol; i++){
+ zBind[i*2] = '?';
+ zBind[i*2 + 1] = ',';
+ }
+ zBind[i*2-1] = '\0';
+ zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind);
+ sqlite3_free(zBind);
+ }
+ break;
+ }
+
+ default:
+ zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName);
+ break;
+ }
+
+ if( zSql==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare_v2(pC->db, zSql, -1, &p->aStmt[eStmt], 0);
+ sqlite3_free(zSql);
+ if( rc!=SQLITE_OK && pzErrMsg ){
+ *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db));
+ }
+ }
+ }
+
+ *ppStmt = p->aStmt[eStmt];
+ return rc;
+}
+
+
+static int fts5ExecPrintf(
+ sqlite3 *db,
+ char **pzErr,
+ const char *zFormat,
+ ...
+){
+ int rc;
+ va_list ap; /* ... printf arguments */
+ char *zSql;
+
+ va_start(ap, zFormat);
+ zSql = sqlite3_vmprintf(zFormat, ap);
+
+ if( zSql==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_exec(db, zSql, 0, 0, pzErr);
+ sqlite3_free(zSql);
+ }
+
+ va_end(ap);
+ return rc;
+}
+
+/*
+** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error
+** code otherwise.
+*/
+static int sqlite3Fts5DropAll(Fts5Config *pConfig){
+ int rc = fts5ExecPrintf(pConfig->db, 0,
+ "DROP TABLE IF EXISTS %Q.'%q_data';"
+ "DROP TABLE IF EXISTS %Q.'%q_idx';"
+ "DROP TABLE IF EXISTS %Q.'%q_config';",
+ pConfig->zDb, pConfig->zName,
+ pConfig->zDb, pConfig->zName,
+ pConfig->zDb, pConfig->zName
+ );
+ if( rc==SQLITE_OK && pConfig->bColumnsize ){
+ rc = fts5ExecPrintf(pConfig->db, 0,
+ "DROP TABLE IF EXISTS %Q.'%q_docsize';",
+ pConfig->zDb, pConfig->zName
+ );
+ }
+ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
+ rc = fts5ExecPrintf(pConfig->db, 0,
+ "DROP TABLE IF EXISTS %Q.'%q_content';",
+ pConfig->zDb, pConfig->zName
+ );
+ }
+ return rc;
+}
+
+static void fts5StorageRenameOne(
+ Fts5Config *pConfig, /* Current FTS5 configuration */
+ int *pRc, /* IN/OUT: Error code */
+ const char *zTail, /* Tail of table name e.g. "data", "config" */
+ const char *zName /* New name of FTS5 table */
+){
+ if( *pRc==SQLITE_OK ){
+ *pRc = fts5ExecPrintf(pConfig->db, 0,
+ "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';",
+ pConfig->zDb, pConfig->zName, zTail, zName, zTail
+ );
+ }
+}
+
+static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){
+ Fts5Config *pConfig = pStorage->pConfig;
+ int rc = sqlite3Fts5StorageSync(pStorage, 1);
+
+ fts5StorageRenameOne(pConfig, &rc, "data", zName);
+ fts5StorageRenameOne(pConfig, &rc, "idx", zName);
+ fts5StorageRenameOne(pConfig, &rc, "config", zName);
+ if( pConfig->bColumnsize ){
+ fts5StorageRenameOne(pConfig, &rc, "docsize", zName);
+ }
+ if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
+ fts5StorageRenameOne(pConfig, &rc, "content", zName);
+ }
+ return rc;
+}
+
+/*
+** Create the shadow table named zPost, with definition zDefn. Return
+** SQLITE_OK if successful, or an SQLite error code otherwise.
+*/
+static int sqlite3Fts5CreateTable(
+ Fts5Config *pConfig, /* FTS5 configuration */
+ const char *zPost, /* Shadow table to create (e.g. "content") */
+ const char *zDefn, /* Columns etc. for shadow table */
+ int bWithout, /* True for without rowid */
+ char **pzErr /* OUT: Error message */
+){
+ int rc;
+ char *zErr = 0;
+
+ rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s",
+ pConfig->zDb, pConfig->zName, zPost, zDefn, bWithout?" WITHOUT ROWID":""
+ );
+ if( zErr ){
+ *pzErr = sqlite3_mprintf(
+ "fts5: error creating shadow table %q_%s: %s",
+ pConfig->zName, zPost, zErr
+ );
+ sqlite3_free(zErr);
+ }
+
+ return rc;
+}
+
+/*
+** Open a new Fts5Index handle. If the bCreate argument is true, create
+** and initialize the underlying tables
+**
+** If successful, set *pp to point to the new object and return SQLITE_OK.
+** Otherwise, set *pp to NULL and return an SQLite error code.
+*/
+static int sqlite3Fts5StorageOpen(
+ Fts5Config *pConfig,
+ Fts5Index *pIndex,
+ int bCreate,
+ Fts5Storage **pp,
+ char **pzErr /* OUT: Error message */
+){
+ int rc = SQLITE_OK;
+ Fts5Storage *p; /* New object */
+ int nByte; /* Bytes of space to allocate */
+
+ nByte = sizeof(Fts5Storage) /* Fts5Storage object */
+ + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */
+ *pp = p = (Fts5Storage*)sqlite3_malloc(nByte);
+ if( !p ) return SQLITE_NOMEM;
+
+ memset(p, 0, nByte);
+ p->aTotalSize = (i64*)&p[1];
+ p->pConfig = pConfig;
+ p->pIndex = pIndex;
+
+ if( bCreate ){
+ if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
+ int nDefn = 32 + pConfig->nCol*10;
+ char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10);
+ if( zDefn==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ int i;
+ int iOff;
+ sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY");
+ iOff = (int)strlen(zDefn);
+ for(i=0; i<pConfig->nCol; i++){
+ sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i);
+ iOff += (int)strlen(&zDefn[iOff]);
+ }
+ rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr);
+ }
+ sqlite3_free(zDefn);
+ }
+
+ if( rc==SQLITE_OK && pConfig->bColumnsize ){
+ rc = sqlite3Fts5CreateTable(
+ pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr
+ );
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5CreateTable(
+ pConfig, "config", "k PRIMARY KEY, v", 1, pzErr
+ );
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
+ }
+ }
+
+ if( rc ){
+ sqlite3Fts5StorageClose(p);
+ *pp = 0;
+ }
+ return rc;
+}
+
+/*
+** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen().
+*/
+static int sqlite3Fts5StorageClose(Fts5Storage *p){
+ int rc = SQLITE_OK;
+ if( p ){
+ int i;
+
+ /* Finalize all SQL statements */
+ for(i=0; i<ArraySize(p->aStmt); i++){
+ sqlite3_finalize(p->aStmt[i]);
+ }
+
+ sqlite3_free(p);
+ }
+ return rc;
+}
+
+typedef struct Fts5InsertCtx Fts5InsertCtx;
+struct Fts5InsertCtx {
+ Fts5Storage *pStorage;
+ int iCol;
+ int szCol; /* Size of column value in tokens */
+};
+
+/*
+** Tokenization callback used when inserting tokens into the FTS index.
+*/
+static int fts5StorageInsertCallback(
+ void *pContext, /* Pointer to Fts5InsertCtx object */
+ int tflags,
+ const char *pToken, /* Buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iUnused1, /* Start offset of token */
+ int iUnused2 /* End offset of token */
+){
+ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
+ Fts5Index *pIdx = pCtx->pStorage->pIndex;
+ UNUSED_PARAM2(iUnused1, iUnused2);
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
+ pCtx->szCol++;
+ }
+ return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
+}
+
+/*
+** If a row with rowid iDel is present in the %_content table, add the
+** delete-markers to the FTS index necessary to delete it. Do not actually
+** remove the %_content row at this time though.
+*/
+static int fts5StorageDeleteFromIndex(
+ Fts5Storage *p,
+ i64 iDel,
+ sqlite3_value **apVal
+){
+ Fts5Config *pConfig = p->pConfig;
+ sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */
+ int rc; /* Return code */
+ int rc2; /* sqlite3_reset() return code */
+ int iCol;
+ Fts5InsertCtx ctx;
+
+ if( apVal==0 ){
+ rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0);
+ if( rc!=SQLITE_OK ) return rc;
+ sqlite3_bind_int64(pSeek, 1, iDel);
+ if( sqlite3_step(pSeek)!=SQLITE_ROW ){
+ return sqlite3_reset(pSeek);
+ }
+ }
+
+ ctx.pStorage = p;
+ ctx.iCol = -1;
+ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel);
+ for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
+ if( pConfig->abUnindexed[iCol-1]==0 ){
+ const char *zText;
+ int nText;
+ if( pSeek ){
+ zText = (const char*)sqlite3_column_text(pSeek, iCol);
+ nText = sqlite3_column_bytes(pSeek, iCol);
+ }else{
+ zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
+ nText = sqlite3_value_bytes(apVal[iCol-1]);
+ }
+ ctx.szCol = 0;
+ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
+ zText, nText, (void*)&ctx, fts5StorageInsertCallback
+ );
+ p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
+ }
+ }
+ p->nTotalRow--;
+
+ rc2 = sqlite3_reset(pSeek);
+ if( rc==SQLITE_OK ) rc = rc2;
+ return rc;
+}
+
+
+/*
+** Insert a record into the %_docsize table. Specifically, do:
+**
+** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf);
+**
+** If there is no %_docsize table (as happens if the columnsize=0 option
+** is specified when the FTS5 table is created), this function is a no-op.
+*/
+static int fts5StorageInsertDocsize(
+ Fts5Storage *p, /* Storage module to write to */
+ i64 iRowid, /* id value */
+ Fts5Buffer *pBuf /* sz value */
+){
+ int rc = SQLITE_OK;
+ if( p->pConfig->bColumnsize ){
+ sqlite3_stmt *pReplace = 0;
+ rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pReplace, 1, iRowid);
+ sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
+ sqlite3_step(pReplace);
+ rc = sqlite3_reset(pReplace);
+ }
+ }
+ return rc;
+}
+
+/*
+** Load the contents of the "averages" record from disk into the
+** p->nTotalRow and p->aTotalSize[] variables. If successful, and if
+** argument bCache is true, set the p->bTotalsValid flag to indicate
+** that the contents of aTotalSize[] and nTotalRow are valid until
+** further notice.
+**
+** Return SQLITE_OK if successful, or an SQLite error code if an error
+** occurs.
+*/
+static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
+ int rc = SQLITE_OK;
+ if( p->bTotalsValid==0 ){
+ rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
+ p->bTotalsValid = bCache;
+ }
+ return rc;
+}
+
+/*
+** Store the current contents of the p->nTotalRow and p->aTotalSize[]
+** variables in the "averages" record on disk.
+**
+** Return SQLITE_OK if successful, or an SQLite error code if an error
+** occurs.
+*/
+static int fts5StorageSaveTotals(Fts5Storage *p){
+ int nCol = p->pConfig->nCol;
+ int i;
+ Fts5Buffer buf;
+ int rc = SQLITE_OK;
+ memset(&buf, 0, sizeof(buf));
+
+ sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow);
+ for(i=0; i<nCol; i++){
+ sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n);
+ }
+ sqlite3_free(buf.p);
+
+ return rc;
+}
+
+/*
+** Remove a row from the FTS table.
+*/
+static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){
+ Fts5Config *pConfig = p->pConfig;
+ int rc;
+ sqlite3_stmt *pDel = 0;
+
+ assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 );
+ rc = fts5StorageLoadTotals(p, 1);
+
+ /* Delete the index records */
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageDeleteFromIndex(p, iDel, apVal);
+ }
+
+ /* Delete the %_docsize record */
+ if( rc==SQLITE_OK && pConfig->bColumnsize ){
+ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pDel, 1, iDel);
+ sqlite3_step(pDel);
+ rc = sqlite3_reset(pDel);
+ }
+ }
+
+ /* Delete the %_content record */
+ if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0);
+ }
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pDel, 1, iDel);
+ sqlite3_step(pDel);
+ rc = sqlite3_reset(pDel);
+ }
+ }
+
+ /* Write the averages record */
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageSaveTotals(p);
+ }
+
+ return rc;
+}
+
+/*
+** Delete all entries in the FTS5 index.
+*/
+static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){
+ Fts5Config *pConfig = p->pConfig;
+ int rc;
+
+ /* Delete the contents of the %_data and %_docsize tables. */
+ rc = fts5ExecPrintf(pConfig->db, 0,
+ "DELETE FROM %Q.'%q_data';"
+ "DELETE FROM %Q.'%q_idx';",
+ pConfig->zDb, pConfig->zName,
+ pConfig->zDb, pConfig->zName
+ );
+ if( rc==SQLITE_OK && pConfig->bColumnsize ){
+ rc = fts5ExecPrintf(pConfig->db, 0,
+ "DELETE FROM %Q.'%q_docsize';",
+ pConfig->zDb, pConfig->zName
+ );
+ }
+
+ /* Reinitialize the %_data table. This call creates the initial structure
+ ** and averages records. */
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IndexReinit(p->pIndex);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
+ }
+ return rc;
+}
+
+static int sqlite3Fts5StorageRebuild(Fts5Storage *p){
+ Fts5Buffer buf = {0,0,0};
+ Fts5Config *pConfig = p->pConfig;
+ sqlite3_stmt *pScan = 0;
+ Fts5InsertCtx ctx;
+ int rc;
+
+ memset(&ctx, 0, sizeof(Fts5InsertCtx));
+ ctx.pStorage = p;
+ rc = sqlite3Fts5StorageDeleteAll(p);
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageLoadTotals(p, 1);
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
+ }
+
+ while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){
+ i64 iRowid = sqlite3_column_int64(pScan, 0);
+
+ sqlite3Fts5BufferZero(&buf);
+ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
+ for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
+ ctx.szCol = 0;
+ if( pConfig->abUnindexed[ctx.iCol]==0 ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
+ sqlite3_column_bytes(pScan, ctx.iCol+1),
+ (void*)&ctx,
+ fts5StorageInsertCallback
+ );
+ }
+ sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
+ p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
+ }
+ p->nTotalRow++;
+
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageInsertDocsize(p, iRowid, &buf);
+ }
+ }
+ sqlite3_free(buf.p);
+
+ /* Write the averages record */
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageSaveTotals(p);
+ }
+ return rc;
+}
+
+static int sqlite3Fts5StorageOptimize(Fts5Storage *p){
+ return sqlite3Fts5IndexOptimize(p->pIndex);
+}
+
+static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){
+ return sqlite3Fts5IndexMerge(p->pIndex, nMerge);
+}
+
+/*
+** Allocate a new rowid. This is used for "external content" tables when
+** a NULL value is inserted into the rowid column. The new rowid is allocated
+** by inserting a dummy row into the %_docsize table. The dummy will be
+** overwritten later.
+**
+** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In
+** this case the user is required to provide a rowid explicitly.
+*/
+static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){
+ int rc = SQLITE_MISMATCH;
+ if( p->pConfig->bColumnsize ){
+ sqlite3_stmt *pReplace = 0;
+ rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_null(pReplace, 1);
+ sqlite3_bind_null(pReplace, 2);
+ sqlite3_step(pReplace);
+ rc = sqlite3_reset(pReplace);
+ }
+ if( rc==SQLITE_OK ){
+ *piRowid = sqlite3_last_insert_rowid(p->pConfig->db);
+ }
+ }
+ return rc;
+}
+
+/*
+** Insert a new row into the FTS content table.
+*/
+static int sqlite3Fts5StorageContentInsert(
+ Fts5Storage *p,
+ sqlite3_value **apVal,
+ i64 *piRowid
+){
+ Fts5Config *pConfig = p->pConfig;
+ int rc = SQLITE_OK;
+
+ /* Insert the new row into the %_content table. */
+ if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
+ if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){
+ *piRowid = sqlite3_value_int64(apVal[1]);
+ }else{
+ rc = fts5StorageNewRowid(p, piRowid);
+ }
+ }else{
+ sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */
+ int i; /* Counter variable */
+ rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
+ for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
+ rc = sqlite3_bind_value(pInsert, i, apVal[i]);
+ }
+ if( rc==SQLITE_OK ){
+ sqlite3_step(pInsert);
+ rc = sqlite3_reset(pInsert);
+ }
+ *piRowid = sqlite3_last_insert_rowid(pConfig->db);
+ }
+
+ return rc;
+}
+
+/*
+** Insert new entries into the FTS index and %_docsize table.
+*/
+static int sqlite3Fts5StorageIndexInsert(
+ Fts5Storage *p,
+ sqlite3_value **apVal,
+ i64 iRowid
+){
+ Fts5Config *pConfig = p->pConfig;
+ int rc = SQLITE_OK; /* Return code */
+ Fts5InsertCtx ctx; /* Tokenization callback context object */
+ Fts5Buffer buf; /* Buffer used to build up %_docsize blob */
+
+ memset(&buf, 0, sizeof(Fts5Buffer));
+ ctx.pStorage = p;
+ rc = fts5StorageLoadTotals(p, 1);
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
+ }
+ for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
+ ctx.szCol = 0;
+ if( pConfig->abUnindexed[ctx.iCol]==0 ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
+ sqlite3_value_bytes(apVal[ctx.iCol+2]),
+ (void*)&ctx,
+ fts5StorageInsertCallback
+ );
+ }
+ sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
+ p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
+ }
+ p->nTotalRow++;
+
+ /* Write the %_docsize record */
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageInsertDocsize(p, iRowid, &buf);
+ }
+ sqlite3_free(buf.p);
+
+ /* Write the averages record */
+ if( rc==SQLITE_OK ){
+ rc = fts5StorageSaveTotals(p);
+ }
+
+ return rc;
+}
+
+static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){
+ Fts5Config *pConfig = p->pConfig;
+ char *zSql;
+ int rc;
+
+ zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'",
+ pConfig->zDb, pConfig->zName, zSuffix
+ );
+ if( zSql==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ sqlite3_stmt *pCnt = 0;
+ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0);
+ if( rc==SQLITE_OK ){
+ if( SQLITE_ROW==sqlite3_step(pCnt) ){
+ *pnRow = sqlite3_column_int64(pCnt, 0);
+ }
+ rc = sqlite3_finalize(pCnt);
+ }
+ }
+
+ sqlite3_free(zSql);
+ return rc;
+}
+
+/*
+** Context object used by sqlite3Fts5StorageIntegrity().
+*/
+typedef struct Fts5IntegrityCtx Fts5IntegrityCtx;
+struct Fts5IntegrityCtx {
+ i64 iRowid;
+ int iCol;
+ int szCol;
+ u64 cksum;
+ Fts5Termset *pTermset;
+ Fts5Config *pConfig;
+};
+
+
+/*
+** Tokenization callback used by integrity check.
+*/
+static int fts5StorageIntegrityCallback(
+ void *pContext, /* Pointer to Fts5IntegrityCtx object */
+ int tflags,
+ const char *pToken, /* Buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iUnused1, /* Start offset of token */
+ int iUnused2 /* End offset of token */
+){
+ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
+ Fts5Termset *pTermset = pCtx->pTermset;
+ int bPresent;
+ int ii;
+ int rc = SQLITE_OK;
+ int iPos;
+ int iCol;
+
+ UNUSED_PARAM2(iUnused1, iUnused2);
+
+ if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
+ pCtx->szCol++;
+ }
+
+ switch( pCtx->pConfig->eDetail ){
+ case FTS5_DETAIL_FULL:
+ iPos = pCtx->szCol-1;
+ iCol = pCtx->iCol;
+ break;
+
+ case FTS5_DETAIL_COLUMNS:
+ iPos = pCtx->iCol;
+ iCol = 0;
+ break;
+
+ default:
+ assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE );
+ iPos = 0;
+ iCol = 0;
+ break;
+ }
+
+ rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
+ if( rc==SQLITE_OK && bPresent==0 ){
+ pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
+ pCtx->iRowid, iCol, iPos, 0, pToken, nToken
+ );
+ }
+
+ for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){
+ const int nChar = pCtx->pConfig->aPrefix[ii];
+ int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
+ if( nByte ){
+ rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
+ if( bPresent==0 ){
+ pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
+ pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
+ );
+ }
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Check that the contents of the FTS index match that of the %_content
+** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
+** some other SQLite error code if an error occurs while attempting to
+** determine this.
+*/
+static int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
+ Fts5Config *pConfig = p->pConfig;
+ int rc; /* Return code */
+ int *aColSize; /* Array of size pConfig->nCol */
+ i64 *aTotalSize; /* Array of size pConfig->nCol */
+ Fts5IntegrityCtx ctx;
+ sqlite3_stmt *pScan;
+
+ memset(&ctx, 0, sizeof(Fts5IntegrityCtx));
+ ctx.pConfig = p->pConfig;
+ aTotalSize = (i64*)sqlite3_malloc(pConfig->nCol * (sizeof(int)+sizeof(i64)));
+ if( !aTotalSize ) return SQLITE_NOMEM;
+ aColSize = (int*)&aTotalSize[pConfig->nCol];
+ memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol);
+
+ /* Generate the expected index checksum based on the contents of the
+ ** %_content table. This block stores the checksum in ctx.cksum. */
+ rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
+ if( rc==SQLITE_OK ){
+ int rc2;
+ while( SQLITE_ROW==sqlite3_step(pScan) ){
+ int i;
+ ctx.iRowid = sqlite3_column_int64(pScan, 0);
+ ctx.szCol = 0;
+ if( pConfig->bColumnsize ){
+ rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
+ }
+ if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){
+ rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
+ }
+ for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
+ if( pConfig->abUnindexed[i] ) continue;
+ ctx.iCol = i;
+ ctx.szCol = 0;
+ if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
+ rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ (const char*)sqlite3_column_text(pScan, i+1),
+ sqlite3_column_bytes(pScan, i+1),
+ (void*)&ctx,
+ fts5StorageIntegrityCallback
+ );
+ }
+ if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
+ rc = FTS5_CORRUPT;
+ }
+ aTotalSize[i] += ctx.szCol;
+ if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
+ sqlite3Fts5TermsetFree(ctx.pTermset);
+ ctx.pTermset = 0;
+ }
+ }
+ sqlite3Fts5TermsetFree(ctx.pTermset);
+ ctx.pTermset = 0;
+
+ if( rc!=SQLITE_OK ) break;
+ }
+ rc2 = sqlite3_reset(pScan);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
+
+ /* Test that the "totals" (sometimes called "averages") record looks Ok */
+ if( rc==SQLITE_OK ){
+ int i;
+ rc = fts5StorageLoadTotals(p, 0);
+ for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
+ if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
+ }
+ }
+
+ /* Check that the %_docsize and %_content tables contain the expected
+ ** number of rows. */
+ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
+ i64 nRow = 0;
+ rc = fts5StorageCount(p, "content", &nRow);
+ if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
+ }
+ if( rc==SQLITE_OK && pConfig->bColumnsize ){
+ i64 nRow = 0;
+ rc = fts5StorageCount(p, "docsize", &nRow);
+ if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
+ }
+
+ /* Pass the expected checksum down to the FTS index module. It will
+ ** verify, amongst other things, that it matches the checksum generated by
+ ** inspecting the index itself. */
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum);
+ }
+
+ sqlite3_free(aTotalSize);
+ return rc;
+}
+
+/*
+** Obtain an SQLite statement handle that may be used to read data from the
+** %_content table.
+*/
+static int sqlite3Fts5StorageStmt(
+ Fts5Storage *p,
+ int eStmt,
+ sqlite3_stmt **pp,
+ char **pzErrMsg
+){
+ int rc;
+ assert( eStmt==FTS5_STMT_SCAN_ASC
+ || eStmt==FTS5_STMT_SCAN_DESC
+ || eStmt==FTS5_STMT_LOOKUP
+ );
+ rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg);
+ if( rc==SQLITE_OK ){
+ assert( p->aStmt[eStmt]==*pp );
+ p->aStmt[eStmt] = 0;
+ }
+ return rc;
+}
+
+/*
+** Release an SQLite statement handle obtained via an earlier call to
+** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function
+** must match that passed to the sqlite3Fts5StorageStmt() call.
+*/
+static void sqlite3Fts5StorageStmtRelease(
+ Fts5Storage *p,
+ int eStmt,
+ sqlite3_stmt *pStmt
+){
+ assert( eStmt==FTS5_STMT_SCAN_ASC
+ || eStmt==FTS5_STMT_SCAN_DESC
+ || eStmt==FTS5_STMT_LOOKUP
+ );
+ if( p->aStmt[eStmt]==0 ){
+ sqlite3_reset(pStmt);
+ p->aStmt[eStmt] = pStmt;
+ }else{
+ sqlite3_finalize(pStmt);
+ }
+}
+
+static int fts5StorageDecodeSizeArray(
+ int *aCol, int nCol, /* Array to populate */
+ const u8 *aBlob, int nBlob /* Record to read varints from */
+){
+ int i;
+ int iOff = 0;
+ for(i=0; i<nCol; i++){
+ if( iOff>=nBlob ) return 1;
+ iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]);
+ }
+ return (iOff!=nBlob);
+}
+
+/*
+** Argument aCol points to an array of integers containing one entry for
+** each table column. This function reads the %_docsize record for the
+** specified rowid and populates aCol[] with the results.
+**
+** An SQLite error code is returned if an error occurs, or SQLITE_OK
+** otherwise.
+*/
+static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
+ int nCol = p->pConfig->nCol; /* Number of user columns in table */
+ sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */
+ int rc; /* Return Code */
+
+ assert( p->pConfig->bColumnsize );
+ rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
+ if( rc==SQLITE_OK ){
+ int bCorrupt = 1;
+ sqlite3_bind_int64(pLookup, 1, iRowid);
+ if( SQLITE_ROW==sqlite3_step(pLookup) ){
+ const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
+ int nBlob = sqlite3_column_bytes(pLookup, 0);
+ if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
+ bCorrupt = 0;
+ }
+ }
+ rc = sqlite3_reset(pLookup);
+ if( bCorrupt && rc==SQLITE_OK ){
+ rc = FTS5_CORRUPT;
+ }
+ }
+
+ return rc;
+}
+
+static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
+ int rc = fts5StorageLoadTotals(p, 0);
+ if( rc==SQLITE_OK ){
+ *pnToken = 0;
+ if( iCol<0 ){
+ int i;
+ for(i=0; i<p->pConfig->nCol; i++){
+ *pnToken += p->aTotalSize[i];
+ }
+ }else if( iCol<p->pConfig->nCol ){
+ *pnToken = p->aTotalSize[iCol];
+ }else{
+ rc = SQLITE_RANGE;
+ }
+ }
+ return rc;
+}
+
+static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){
+ int rc = fts5StorageLoadTotals(p, 0);
+ if( rc==SQLITE_OK ){
+ *pnRow = p->nTotalRow;
+ }
+ return rc;
+}
+
+/*
+** Flush any data currently held in-memory to disk.
+*/
+static int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit){
+ if( bCommit && p->bTotalsValid ){
+ int rc = fts5StorageSaveTotals(p);
+ p->bTotalsValid = 0;
+ if( rc!=SQLITE_OK ) return rc;
+ }
+ return sqlite3Fts5IndexSync(p->pIndex, bCommit);
+}
+
+static int sqlite3Fts5StorageRollback(Fts5Storage *p){
+ p->bTotalsValid = 0;
+ return sqlite3Fts5IndexRollback(p->pIndex);
+}
+
+static int sqlite3Fts5StorageConfigValue(
+ Fts5Storage *p,
+ const char *z,
+ sqlite3_value *pVal,
+ int iVal
+){
+ sqlite3_stmt *pReplace = 0;
+ int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC);
+ if( pVal ){
+ sqlite3_bind_value(pReplace, 2, pVal);
+ }else{
+ sqlite3_bind_int(pReplace, 2, iVal);
+ }
+ sqlite3_step(pReplace);
+ rc = sqlite3_reset(pReplace);
+ }
+ if( rc==SQLITE_OK && pVal ){
+ int iNew = p->pConfig->iCookie + 1;
+ rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew);
+ if( rc==SQLITE_OK ){
+ p->pConfig->iCookie = iNew;
+ }
+ }
+ return rc;
+}
+
+
+
+#line 1 "fts5_tokenize.c"
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+
+/* #include "fts5Int.h" */
+
+/**************************************************************************
+** Start of ascii tokenizer implementation.
+*/
+
+/*
+** For tokenizers with no "unicode" modifier, the set of token characters
+** is the same as the set of ASCII range alphanumeric characters.
+*/
+static unsigned char aAsciiTokenChar[128] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */
+};
+
+typedef struct AsciiTokenizer AsciiTokenizer;
+struct AsciiTokenizer {
+ unsigned char aTokenChar[128];
+};
+
+static void fts5AsciiAddExceptions(
+ AsciiTokenizer *p,
+ const char *zArg,
+ int bTokenChars
+){
+ int i;
+ for(i=0; zArg[i]; i++){
+ if( (zArg[i] & 0x80)==0 ){
+ p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars;
+ }
+ }
+}
+
+/*
+** Delete a "ascii" tokenizer.
+*/
+static void fts5AsciiDelete(Fts5Tokenizer *p){
+ sqlite3_free(p);
+}
+
+/*
+** Create an "ascii" tokenizer.
+*/
+static int fts5AsciiCreate(
+ void *pUnused,
+ const char **azArg, int nArg,
+ Fts5Tokenizer **ppOut
+){
+ int rc = SQLITE_OK;
+ AsciiTokenizer *p = 0;
+ UNUSED_PARAM(pUnused);
+ if( nArg%2 ){
+ rc = SQLITE_ERROR;
+ }else{
+ p = sqlite3_malloc(sizeof(AsciiTokenizer));
+ if( p==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ int i;
+ memset(p, 0, sizeof(AsciiTokenizer));
+ memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
+ for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+ const char *zArg = azArg[i+1];
+ if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
+ fts5AsciiAddExceptions(p, zArg, 1);
+ }else
+ if( 0==sqlite3_stricmp(azArg[i], "separators") ){
+ fts5AsciiAddExceptions(p, zArg, 0);
+ }else{
+ rc = SQLITE_ERROR;
+ }
+ }
+ if( rc!=SQLITE_OK ){
+ fts5AsciiDelete((Fts5Tokenizer*)p);
+ p = 0;
+ }
+ }
+ }
+
+ *ppOut = (Fts5Tokenizer*)p;
+ return rc;
+}
+
+
+static void asciiFold(char *aOut, const char *aIn, int nByte){
+ int i;
+ for(i=0; i<nByte; i++){
+ char c = aIn[i];
+ if( c>='A' && c<='Z' ) c += 32;
+ aOut[i] = c;
+ }
+}
+
+/*
+** Tokenize some text using the ascii tokenizer.
+*/
+static int fts5AsciiTokenize(
+ Fts5Tokenizer *pTokenizer,
+ void *pCtx,
+ int iUnused,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
+){
+ AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
+ int rc = SQLITE_OK;
+ int ie;
+ int is = 0;
+
+ char aFold[64];
+ int nFold = sizeof(aFold);
+ char *pFold = aFold;
+ unsigned char *a = p->aTokenChar;
+
+ UNUSED_PARAM(iUnused);
+
+ while( is<nText && rc==SQLITE_OK ){
+ int nByte;
+
+ /* Skip any leading divider characters. */
+ while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){
+ is++;
+ }
+ if( is==nText ) break;
+
+ /* Count the token characters */
+ ie = is+1;
+ while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){
+ ie++;
+ }
+
+ /* Fold to lower case */
+ nByte = ie-is;
+ if( nByte>nFold ){
+ if( pFold!=aFold ) sqlite3_free(pFold);
+ pFold = sqlite3_malloc(nByte*2);
+ if( pFold==0 ){
+ rc = SQLITE_NOMEM;
+ break;
+ }
+ nFold = nByte*2;
+ }
+ asciiFold(pFold, &pText[is], nByte);
+
+ /* Invoke the token callback */
+ rc = xToken(pCtx, 0, pFold, nByte, is, ie);
+ is = ie+1;
+ }
+
+ if( pFold!=aFold ) sqlite3_free(pFold);
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+ return rc;
+}
+
+/**************************************************************************
+** Start of unicode61 tokenizer implementation.
+*/
+
+
+/*
+** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
+** from the sqlite3 source file utf.c. If this file is compiled as part
+** of the amalgamation, they are not required.
+*/
+#ifndef SQLITE_AMALGAMATION
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+#define READ_UTF8(zIn, zTerm, c) \
+ c = *(zIn++); \
+ if( c>=0xc0 ){ \
+ c = sqlite3Utf8Trans1[c-0xc0]; \
+ while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
+ c = (c<<6) + (0x3f & *(zIn++)); \
+ } \
+ if( c<0x80 \
+ || (c&0xFFFFF800)==0xD800 \
+ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
+ }
+
+
+#define WRITE_UTF8(zOut, c) { \
+ if( c<0x00080 ){ \
+ *zOut++ = (unsigned char)(c&0xFF); \
+ } \
+ else if( c<0x00800 ){ \
+ *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \
+ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
+ } \
+ else if( c<0x10000 ){ \
+ *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \
+ *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
+ }else{ \
+ *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \
+ *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \
+ *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
+ } \
+}
+
+#endif /* ifndef SQLITE_AMALGAMATION */
+
+typedef struct Unicode61Tokenizer Unicode61Tokenizer;
+struct Unicode61Tokenizer {
+ unsigned char aTokenChar[128]; /* ASCII range token characters */
+ char *aFold; /* Buffer to fold text into */
+ int nFold; /* Size of aFold[] in bytes */
+ int bRemoveDiacritic; /* True if remove_diacritics=1 is set */
+ int nException;
+ int *aiException;
+};
+
+static int fts5UnicodeAddExceptions(
+ Unicode61Tokenizer *p, /* Tokenizer object */
+ const char *z, /* Characters to treat as exceptions */
+ int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */
+){
+ int rc = SQLITE_OK;
+ int n = (int)strlen(z);
+ int *aNew;
+
+ if( n>0 ){
+ aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int));
+ if( aNew ){
+ int nNew = p->nException;
+ const unsigned char *zCsr = (const unsigned char*)z;
+ const unsigned char *zTerm = (const unsigned char*)&z[n];
+ while( zCsr<zTerm ){
+ int iCode;
+ int bToken;
+ READ_UTF8(zCsr, zTerm, iCode);
+ if( iCode<128 ){
+ p->aTokenChar[iCode] = (unsigned char)bTokenChars;
+ }else{
+ bToken = sqlite3Fts5UnicodeIsalnum(iCode);
+ assert( (bToken==0 || bToken==1) );
+ assert( (bTokenChars==0 || bTokenChars==1) );
+ if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
+ int i;
+ for(i=0; i<nNew; i++){
+ if( aNew[i]>iCode ) break;
+ }
+ memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
+ aNew[i] = iCode;
+ nNew++;
+ }
+ }
+ }
+ p->aiException = aNew;
+ p->nException = nNew;
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Return true if the p->aiException[] array contains the value iCode.
+*/
+static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
+ if( p->nException>0 ){
+ int *a = p->aiException;
+ int iLo = 0;
+ int iHi = p->nException-1;
+
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( iCode==a[iTest] ){
+ return 1;
+ }else if( iCode>a[iTest] ){
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+** Delete a "unicode61" tokenizer.
+*/
+static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
+ if( pTok ){
+ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
+ sqlite3_free(p->aiException);
+ sqlite3_free(p->aFold);
+ sqlite3_free(p);
+ }
+ return;
+}
+
+/*
+** Create a "unicode61" tokenizer.
+*/
+static int fts5UnicodeCreate(
+ void *pUnused,
+ const char **azArg, int nArg,
+ Fts5Tokenizer **ppOut
+){
+ int rc = SQLITE_OK; /* Return code */
+ Unicode61Tokenizer *p = 0; /* New tokenizer object */
+
+ UNUSED_PARAM(pUnused);
+
+ if( nArg%2 ){
+ rc = SQLITE_ERROR;
+ }else{
+ p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
+ if( p ){
+ int i;
+ memset(p, 0, sizeof(Unicode61Tokenizer));
+ memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
+ p->bRemoveDiacritic = 1;
+ p->nFold = 64;
+ p->aFold = sqlite3_malloc(p->nFold * sizeof(char));
+ if( p->aFold==0 ){
+ rc = SQLITE_NOMEM;
+ }
+ for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+ const char *zArg = azArg[i+1];
+ if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
+ rc = SQLITE_ERROR;
+ }
+ p->bRemoveDiacritic = (zArg[0]=='1');
+ }else
+ if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
+ rc = fts5UnicodeAddExceptions(p, zArg, 1);
+ }else
+ if( 0==sqlite3_stricmp(azArg[i], "separators") ){
+ rc = fts5UnicodeAddExceptions(p, zArg, 0);
+ }else{
+ rc = SQLITE_ERROR;
+ }
+ }
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ if( rc!=SQLITE_OK ){
+ fts5UnicodeDelete((Fts5Tokenizer*)p);
+ p = 0;
+ }
+ *ppOut = (Fts5Tokenizer*)p;
+ }
+ return rc;
+}
+
+/*
+** Return true if, for the purposes of tokenizing with the tokenizer
+** passed as the first argument, codepoint iCode is considered a token
+** character (not a separator).
+*/
+static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
+ assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+ return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
+}
+
+static int fts5UnicodeTokenize(
+ Fts5Tokenizer *pTokenizer,
+ void *pCtx,
+ int iUnused,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
+){
+ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
+ int rc = SQLITE_OK;
+ unsigned char *a = p->aTokenChar;
+
+ unsigned char *zTerm = (unsigned char*)&pText[nText];
+ unsigned char *zCsr = (unsigned char *)pText;
+
+ /* Output buffer */
+ char *aFold = p->aFold;
+ int nFold = p->nFold;
+ const char *pEnd = &aFold[nFold-6];
+
+ UNUSED_PARAM(iUnused);
+
+ /* Each iteration of this loop gobbles up a contiguous run of separators,
+ ** then the next token. */
+ while( rc==SQLITE_OK ){
+ int iCode; /* non-ASCII codepoint read from input */
+ char *zOut = aFold;
+ int is;
+ int ie;
+
+ /* Skip any separator characters. */
+ while( 1 ){
+ if( zCsr>=zTerm ) goto tokenize_done;
+ if( *zCsr & 0x80 ) {
+ /* A character outside of the ascii range. Skip past it if it is
+ ** a separator character. Or break out of the loop if it is not. */
+ is = zCsr - (unsigned char*)pText;
+ READ_UTF8(zCsr, zTerm, iCode);
+ if( fts5UnicodeIsAlnum(p, iCode) ){
+ goto non_ascii_tokenchar;
+ }
+ }else{
+ if( a[*zCsr] ){
+ is = zCsr - (unsigned char*)pText;
+ goto ascii_tokenchar;
+ }
+ zCsr++;
+ }
+ }
+
+ /* Run through the tokenchars. Fold them into the output buffer along
+ ** the way. */
+ while( zCsr<zTerm ){
+
+ /* Grow the output buffer so that there is sufficient space to fit the
+ ** largest possible utf-8 character. */
+ if( zOut>pEnd ){
+ aFold = sqlite3_malloc(nFold*2);
+ if( aFold==0 ){
+ rc = SQLITE_NOMEM;
+ goto tokenize_done;
+ }
+ zOut = &aFold[zOut - p->aFold];
+ memcpy(aFold, p->aFold, nFold);
+ sqlite3_free(p->aFold);
+ p->aFold = aFold;
+ p->nFold = nFold = nFold*2;
+ pEnd = &aFold[nFold-6];
+ }
+
+ if( *zCsr & 0x80 ){
+ /* An non-ascii-range character. Fold it into the output buffer if
+ ** it is a token character, or break out of the loop if it is not. */
+ READ_UTF8(zCsr, zTerm, iCode);
+ if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
+ non_ascii_tokenchar:
+ iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic);
+ if( iCode ) WRITE_UTF8(zOut, iCode);
+ }else{
+ break;
+ }
+ }else if( a[*zCsr]==0 ){
+ /* An ascii-range separator character. End of token. */
+ break;
+ }else{
+ ascii_tokenchar:
+ if( *zCsr>='A' && *zCsr<='Z' ){
+ *zOut++ = *zCsr + 32;
+ }else{
+ *zOut++ = *zCsr;
+ }
+ zCsr++;
+ }
+ ie = zCsr - (unsigned char*)pText;
+ }
+
+ /* Invoke the token callback */
+ rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
+ }
+
+ tokenize_done:
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+ return rc;
+}
+
+/**************************************************************************
+** Start of porter stemmer implementation.
+*/
+
+/* Any tokens larger than this (in bytes) are passed through without
+** stemming. */
+#define FTS5_PORTER_MAX_TOKEN 64
+
+typedef struct PorterTokenizer PorterTokenizer;
+struct PorterTokenizer {
+ fts5_tokenizer tokenizer; /* Parent tokenizer module */
+ Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */
+ char aBuf[FTS5_PORTER_MAX_TOKEN + 64];
+};
+
+/*
+** Delete a "porter" tokenizer.
+*/
+static void fts5PorterDelete(Fts5Tokenizer *pTok){
+ if( pTok ){
+ PorterTokenizer *p = (PorterTokenizer*)pTok;
+ if( p->pTokenizer ){
+ p->tokenizer.xDelete(p->pTokenizer);
+ }
+ sqlite3_free(p);
+ }
+}
+
+/*
+** Create a "porter" tokenizer.
+*/
+static int fts5PorterCreate(
+ void *pCtx,
+ const char **azArg, int nArg,
+ Fts5Tokenizer **ppOut
+){
+ fts5_api *pApi = (fts5_api*)pCtx;
+ int rc = SQLITE_OK;
+ PorterTokenizer *pRet;
+ void *pUserdata = 0;
+ const char *zBase = "unicode61";
+
+ if( nArg>0 ){
+ zBase = azArg[0];
+ }
+
+ pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer));
+ if( pRet ){
+ memset(pRet, 0, sizeof(PorterTokenizer));
+ rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer);
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ if( rc==SQLITE_OK ){
+ int nArg2 = (nArg>0 ? nArg-1 : 0);
+ const char **azArg2 = (nArg2 ? &azArg[1] : 0);
+ rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer);
+ }
+
+ if( rc!=SQLITE_OK ){
+ fts5PorterDelete((Fts5Tokenizer*)pRet);
+ pRet = 0;
+ }
+ *ppOut = (Fts5Tokenizer*)pRet;
+ return rc;
+}
+
+typedef struct PorterContext PorterContext;
+struct PorterContext {
+ void *pCtx;
+ int (*xToken)(void*, int, const char*, int, int, int);
+ char *aBuf;
+};
+
+typedef struct PorterRule PorterRule;
+struct PorterRule {
+ const char *zSuffix;
+ int nSuffix;
+ int (*xCond)(char *zStem, int nStem);
+ const char *zOutput;
+ int nOutput;
+};
+
+#if 0
+static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){
+ int ret = -1;
+ int nBuf = *pnBuf;
+ PorterRule *p;
+
+ for(p=aRule; p->zSuffix; p++){
+ assert( strlen(p->zSuffix)==p->nSuffix );
+ assert( strlen(p->zOutput)==p->nOutput );
+ if( nBuf<p->nSuffix ) continue;
+ if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break;
+ }
+
+ if( p->zSuffix ){
+ int nStem = nBuf - p->nSuffix;
+ if( p->xCond==0 || p->xCond(aBuf, nStem) ){
+ memcpy(&aBuf[nStem], p->zOutput, p->nOutput);
+ *pnBuf = nStem + p->nOutput;
+ ret = p - aRule;
+ }
+ }
+
+ return ret;
+}
+#endif
+
+static int fts5PorterIsVowel(char c, int bYIsVowel){
+ return (
+ c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y')
+ );
+}
+
+static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){
+ int i;
+ int bCons = bPrevCons;
+
+ /* Scan for a vowel */
+ for(i=0; i<nStem; i++){
+ if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break;
+ }
+
+ /* Scan for a consonent */
+ for(i++; i<nStem; i++){
+ if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1;
+ }
+ return 0;
+}
+
+/* porter rule condition: (m > 0) */
+static int fts5Porter_MGt0(char *zStem, int nStem){
+ return !!fts5PorterGobbleVC(zStem, nStem, 0);
+}
+
+/* porter rule condition: (m > 1) */
+static int fts5Porter_MGt1(char *zStem, int nStem){
+ int n;
+ n = fts5PorterGobbleVC(zStem, nStem, 0);
+ if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
+ return 1;
+ }
+ return 0;
+}
+
+/* porter rule condition: (m = 1) */
+static int fts5Porter_MEq1(char *zStem, int nStem){
+ int n;
+ n = fts5PorterGobbleVC(zStem, nStem, 0);
+ if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
+ return 1;
+ }
+ return 0;
+}
+
+/* porter rule condition: (*o) */
+static int fts5Porter_Ostar(char *zStem, int nStem){
+ if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){
+ return 0;
+ }else{
+ int i;
+ int mask = 0;
+ int bCons = 0;
+ for(i=0; i<nStem; i++){
+ bCons = !fts5PorterIsVowel(zStem[i], bCons);
+ assert( bCons==0 || bCons==1 );
+ mask = (mask << 1) + bCons;
+ }
+ return ((mask & 0x0007)==0x0005);
+ }
+}
+
+/* porter rule condition: (m > 1 and (*S or *T)) */
+static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
+ assert( nStem>0 );
+ return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
+ && fts5Porter_MGt1(zStem, nStem);
+}
+
+/* porter rule condition: (*v*) */
+static int fts5Porter_Vowel(char *zStem, int nStem){
+ int i;
+ for(i=0; i<nStem; i++){
+ if( fts5PorterIsVowel(zStem[i], i>0) ){
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/**************************************************************************
+***************************************************************************
+** GENERATED CODE STARTS HERE (mkportersteps.tcl)
+*/
+
+static int fts5PorterStep4(char *aBuf, int *pnBuf){
+ int ret = 0;
+ int nBuf = *pnBuf;
+ switch( aBuf[nBuf-2] ){
+
+ case 'a':
+ if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-2) ){
+ *pnBuf = nBuf - 2;
+ }
+ }
+ break;
+
+ case 'c':
+ if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-4) ){
+ *pnBuf = nBuf - 4;
+ }
+ }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-4) ){
+ *pnBuf = nBuf - 4;
+ }
+ }
+ break;
+
+ case 'e':
+ if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-2) ){
+ *pnBuf = nBuf - 2;
+ }
+ }
+ break;
+
+ case 'i':
+ if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-2) ){
+ *pnBuf = nBuf - 2;
+ }
+ }
+ break;
+
+ case 'l':
+ if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-4) ){
+ *pnBuf = nBuf - 4;
+ }
+ }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-4) ){
+ *pnBuf = nBuf - 4;
+ }
+ }
+ break;
+
+ case 'n':
+ if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-5) ){
+ *pnBuf = nBuf - 5;
+ }
+ }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-4) ){
+ *pnBuf = nBuf - 4;
+ }
+ }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }
+ break;
+
+ case 'o':
+ if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-2) ){
+ *pnBuf = nBuf - 2;
+ }
+ }
+ break;
+
+ case 's':
+ if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }
+ break;
+
+ case 't':
+ if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }
+ break;
+
+ case 'u':
+ if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }
+ break;
+
+ case 'v':
+ if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }
+ break;
+
+ case 'z':
+ if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt1(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }
+ break;
+
+ }
+ return ret;
+}
+
+
+static int fts5PorterStep1B2(char *aBuf, int *pnBuf){
+ int ret = 0;
+ int nBuf = *pnBuf;
+ switch( aBuf[nBuf-2] ){
+
+ case 'a':
+ if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){
+ memcpy(&aBuf[nBuf-2], "ate", 3);
+ *pnBuf = nBuf - 2 + 3;
+ ret = 1;
+ }
+ break;
+
+ case 'b':
+ if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){
+ memcpy(&aBuf[nBuf-2], "ble", 3);
+ *pnBuf = nBuf - 2 + 3;
+ ret = 1;
+ }
+ break;
+
+ case 'i':
+ if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){
+ memcpy(&aBuf[nBuf-2], "ize", 3);
+ *pnBuf = nBuf - 2 + 3;
+ ret = 1;
+ }
+ break;
+
+ }
+ return ret;
+}
+
+
+static int fts5PorterStep2(char *aBuf, int *pnBuf){
+ int ret = 0;
+ int nBuf = *pnBuf;
+ switch( aBuf[nBuf-2] ){
+
+ case 'a':
+ if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-7) ){
+ memcpy(&aBuf[nBuf-7], "ate", 3);
+ *pnBuf = nBuf - 7 + 3;
+ }
+ }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-6) ){
+ memcpy(&aBuf[nBuf-6], "tion", 4);
+ *pnBuf = nBuf - 6 + 4;
+ }
+ }
+ break;
+
+ case 'c':
+ if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ memcpy(&aBuf[nBuf-4], "ence", 4);
+ *pnBuf = nBuf - 4 + 4;
+ }
+ }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ memcpy(&aBuf[nBuf-4], "ance", 4);
+ *pnBuf = nBuf - 4 + 4;
+ }
+ }
+ break;
+
+ case 'e':
+ if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ memcpy(&aBuf[nBuf-4], "ize", 3);
+ *pnBuf = nBuf - 4 + 3;
+ }
+ }
+ break;
+
+ case 'g':
+ if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ memcpy(&aBuf[nBuf-4], "log", 3);
+ *pnBuf = nBuf - 4 + 3;
+ }
+ }
+ break;
+
+ case 'l':
+ if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-3) ){
+ memcpy(&aBuf[nBuf-3], "ble", 3);
+ *pnBuf = nBuf - 3 + 3;
+ }
+ }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ memcpy(&aBuf[nBuf-4], "al", 2);
+ *pnBuf = nBuf - 4 + 2;
+ }
+ }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "ent", 3);
+ *pnBuf = nBuf - 5 + 3;
+ }
+ }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-3) ){
+ memcpy(&aBuf[nBuf-3], "e", 1);
+ *pnBuf = nBuf - 3 + 1;
+ }
+ }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "ous", 3);
+ *pnBuf = nBuf - 5 + 3;
+ }
+ }
+ break;
+
+ case 'o':
+ if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-7) ){
+ memcpy(&aBuf[nBuf-7], "ize", 3);
+ *pnBuf = nBuf - 7 + 3;
+ }
+ }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "ate", 3);
+ *pnBuf = nBuf - 5 + 3;
+ }
+ }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ memcpy(&aBuf[nBuf-4], "ate", 3);
+ *pnBuf = nBuf - 4 + 3;
+ }
+ }
+ break;
+
+ case 's':
+ if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "al", 2);
+ *pnBuf = nBuf - 5 + 2;
+ }
+ }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-7) ){
+ memcpy(&aBuf[nBuf-7], "ive", 3);
+ *pnBuf = nBuf - 7 + 3;
+ }
+ }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-7) ){
+ memcpy(&aBuf[nBuf-7], "ful", 3);
+ *pnBuf = nBuf - 7 + 3;
+ }
+ }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-7) ){
+ memcpy(&aBuf[nBuf-7], "ous", 3);
+ *pnBuf = nBuf - 7 + 3;
+ }
+ }
+ break;
+
+ case 't':
+ if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "al", 2);
+ *pnBuf = nBuf - 5 + 2;
+ }
+ }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "ive", 3);
+ *pnBuf = nBuf - 5 + 3;
+ }
+ }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-6) ){
+ memcpy(&aBuf[nBuf-6], "ble", 3);
+ *pnBuf = nBuf - 6 + 3;
+ }
+ }
+ break;
+
+ }
+ return ret;
+}
+
+
+static int fts5PorterStep3(char *aBuf, int *pnBuf){
+ int ret = 0;
+ int nBuf = *pnBuf;
+ switch( aBuf[nBuf-2] ){
+
+ case 'a':
+ if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ memcpy(&aBuf[nBuf-4], "ic", 2);
+ *pnBuf = nBuf - 4 + 2;
+ }
+ }
+ break;
+
+ case 's':
+ if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-4) ){
+ *pnBuf = nBuf - 4;
+ }
+ }
+ break;
+
+ case 't':
+ if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "ic", 2);
+ *pnBuf = nBuf - 5 + 2;
+ }
+ }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "ic", 2);
+ *pnBuf = nBuf - 5 + 2;
+ }
+ }
+ break;
+
+ case 'u':
+ if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ }
+ }
+ break;
+
+ case 'v':
+ if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ *pnBuf = nBuf - 5;
+ }
+ }
+ break;
+
+ case 'z':
+ if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-5) ){
+ memcpy(&aBuf[nBuf-5], "al", 2);
+ *pnBuf = nBuf - 5 + 2;
+ }
+ }
+ break;
+
+ }
+ return ret;
+}
+
+
+static int fts5PorterStep1B(char *aBuf, int *pnBuf){
+ int ret = 0;
+ int nBuf = *pnBuf;
+ switch( aBuf[nBuf-2] ){
+
+ case 'e':
+ if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_MGt0(aBuf, nBuf-3) ){
+ memcpy(&aBuf[nBuf-3], "ee", 2);
+ *pnBuf = nBuf - 3 + 2;
+ }
+ }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){
+ if( fts5Porter_Vowel(aBuf, nBuf-2) ){
+ *pnBuf = nBuf - 2;
+ ret = 1;
+ }
+ }
+ break;
+
+ case 'n':
+ if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){
+ if( fts5Porter_Vowel(aBuf, nBuf-3) ){
+ *pnBuf = nBuf - 3;
+ ret = 1;
+ }
+ }
+ break;
+
+ }
+ return ret;
+}
+
+/*
+** GENERATED CODE ENDS HERE (mkportersteps.tcl)
+***************************************************************************
+**************************************************************************/
+
+static void fts5PorterStep1A(char *aBuf, int *pnBuf){
+ int nBuf = *pnBuf;
+ if( aBuf[nBuf-1]=='s' ){
+ if( aBuf[nBuf-2]=='e' ){
+ if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s')
+ || (nBuf>3 && aBuf[nBuf-3]=='i' )
+ ){
+ *pnBuf = nBuf-2;
+ }else{
+ *pnBuf = nBuf-1;
+ }
+ }
+ else if( aBuf[nBuf-2]!='s' ){
+ *pnBuf = nBuf-1;
+ }
+ }
+}
+
+static int fts5PorterCb(
+ void *pCtx,
+ int tflags,
+ const char *pToken,
+ int nToken,
+ int iStart,
+ int iEnd
+){
+ PorterContext *p = (PorterContext*)pCtx;
+
+ char *aBuf;
+ int nBuf;
+
+ if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through;
+ aBuf = p->aBuf;
+ nBuf = nToken;
+ memcpy(aBuf, pToken, nBuf);
+
+ /* Step 1. */
+ fts5PorterStep1A(aBuf, &nBuf);
+ if( fts5PorterStep1B(aBuf, &nBuf) ){
+ if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){
+ char c = aBuf[nBuf-1];
+ if( fts5PorterIsVowel(c, 0)==0
+ && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2]
+ ){
+ nBuf--;
+ }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){
+ aBuf[nBuf++] = 'e';
+ }
+ }
+ }
+
+ /* Step 1C. */
+ if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){
+ aBuf[nBuf-1] = 'i';
+ }
+
+ /* Steps 2 through 4. */
+ fts5PorterStep2(aBuf, &nBuf);
+ fts5PorterStep3(aBuf, &nBuf);
+ fts5PorterStep4(aBuf, &nBuf);
+
+ /* Step 5a. */
+ assert( nBuf>0 );
+ if( aBuf[nBuf-1]=='e' ){
+ if( fts5Porter_MGt1(aBuf, nBuf-1)
+ || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
+ ){
+ nBuf--;
+ }
+ }
+
+ /* Step 5b. */
+ if( nBuf>1 && aBuf[nBuf-1]=='l'
+ && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1)
+ ){
+ nBuf--;
+ }
+
+ return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
+
+ pass_through:
+ return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
+}
+
+/*
+** Tokenize using the porter tokenizer.
+*/
+static int fts5PorterTokenize(
+ Fts5Tokenizer *pTokenizer,
+ void *pCtx,
+ int flags,
+ const char *pText, int nText,
+ int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
+){
+ PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
+ PorterContext sCtx;
+ sCtx.xToken = xToken;
+ sCtx.pCtx = pCtx;
+ sCtx.aBuf = p->aBuf;
+ return p->tokenizer.xTokenize(
+ p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
+ );
+}
+
+/*
+** Register all built-in tokenizers with FTS5.
+*/
+static int sqlite3Fts5TokenizerInit(fts5_api *pApi){
+ struct BuiltinTokenizer {
+ const char *zName;
+ fts5_tokenizer x;
+ } aBuiltin[] = {
+ { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
+ { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
+ { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
+ };
+
+ int rc = SQLITE_OK; /* Return code */
+ int i; /* To iterate through builtin functions */
+
+ for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
+ rc = pApi->xCreateTokenizer(pApi,
+ aBuiltin[i].zName,
+ (void*)pApi,
+ &aBuiltin[i].x,
+ 0
+ );
+ }
+
+ return rc;
+}
+
+
+
+#line 1 "fts5_unicode2.c"
+/*
+** 2012 May 25
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+/*
+** DO NOT EDIT THIS MACHINE GENERATED FILE.
+*/
+
+
+#include <assert.h>
+
+/*
+** Return true if the argument corresponds to a unicode codepoint
+** classified as either a letter or a number. Otherwise false.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+static int sqlite3Fts5UnicodeIsalnum(int c){
+ /* Each unsigned integer in the following array corresponds to a contiguous
+ ** range of unicode codepoints that are not either letters or numbers (i.e.
+ ** codepoints for which this function should return 0).
+ **
+ ** The most significant 22 bits in each 32-bit value contain the first
+ ** codepoint in the range. The least significant 10 bits are used to store
+ ** the size of the range (always at least 1). In other words, the value
+ ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
+ ** C. It is not possible to represent a range larger than 1023 codepoints
+ ** using this format.
+ */
+ static const unsigned int aEntry[] = {
+ 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
+ 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
+ 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
+ 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
+ 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
+ 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
+ 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
+ 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
+ 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
+ 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
+ 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
+ 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
+ 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
+ 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
+ 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
+ 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
+ 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
+ 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
+ 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
+ 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
+ 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
+ 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
+ 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
+ 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
+ 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
+ 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
+ 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
+ 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
+ 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
+ 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
+ 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
+ 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
+ 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
+ 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
+ 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
+ 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
+ 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
+ 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
+ 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
+ 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
+ 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
+ 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
+ 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
+ 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
+ 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
+ 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
+ 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
+ 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
+ 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
+ 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
+ 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
+ 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
+ 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
+ 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
+ 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
+ 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
+ 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
+ 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
+ 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
+ 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
+ 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
+ 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
+ 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
+ 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
+ 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
+ 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
+ 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
+ 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
+ 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
+ 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
+ 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
+ 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
+ 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
+ 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
+ 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
+ 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
+ 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
+ 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
+ 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
+ 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
+ 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
+ 0x380400F0,
+ };
+ static const unsigned int aAscii[4] = {
+ 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
+ };
+
+ if( (unsigned int)c<128 ){
+ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
+ }else if( (unsigned int)c<(1<<22) ){
+ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
+ int iRes = 0;
+ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+ int iLo = 0;
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( key >= aEntry[iTest] ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( aEntry[0]<key );
+ assert( key>=aEntry[iRes] );
+ return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
+ }
+ return 1;
+}
+
+
+/*
+** If the argument is a codepoint corresponding to a lowercase letter
+** in the ASCII range with a diacritic added, return the codepoint
+** of the ASCII letter only. For example, if passed 235 - "LATIN
+** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
+** E"). The resuls of passing a codepoint that corresponds to an
+** uppercase letter are undefined.
+*/
+static int fts5_remove_diacritic(int c){
+ unsigned short aDia[] = {
+ 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
+ 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
+ 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
+ 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
+ 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
+ 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
+ 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
+ 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
+ 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
+ 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
+ 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
+ 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
+ 62924, 63050, 63082, 63274, 63390,
+ };
+ char aChar[] = {
+ '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
+ 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
+ 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
+ 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
+ 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
+ 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
+ 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
+ 'e', 'i', 'o', 'u', 'y',
+ };
+
+ unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
+ int iRes = 0;
+ int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
+ int iLo = 0;
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( key >= aDia[iTest] ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( key>=aDia[iRes] );
+ return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
+}
+
+
+/*
+** Return true if the argument interpreted as a unicode codepoint
+** is a diacritical modifier character.
+*/
+static int sqlite3Fts5UnicodeIsdiacritic(int c){
+ unsigned int mask0 = 0x08029FDF;
+ unsigned int mask1 = 0x000361F8;
+ if( c<768 || c>817 ) return 0;
+ return (c < 768+32) ?
+ (mask0 & (1 << (c-768))) :
+ (mask1 & (1 << (c-768-32)));
+}
+
+
+/*
+** Interpret the argument as a unicode codepoint. If the codepoint
+** is an upper case character that has a lower case equivalent,
+** return the codepoint corresponding to the lower case version.
+** Otherwise, return a copy of the argument.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
+ /* Each entry in the following array defines a rule for folding a range
+ ** of codepoints to lower case. The rule applies to a range of nRange
+ ** codepoints starting at codepoint iCode.
+ **
+ ** If the least significant bit in flags is clear, then the rule applies
+ ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
+ ** need to be folded). Or, if it is set, then the rule only applies to
+ ** every second codepoint in the range, starting with codepoint C.
+ **
+ ** The 7 most significant bits in flags are an index into the aiOff[]
+ ** array. If a specific codepoint C does require folding, then its lower
+ ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
+ **
+ ** The contents of this array are generated by parsing the CaseFolding.txt
+ ** file distributed as part of the "Unicode Character Database". See
+ ** http://www.unicode.org for details.
+ */
+ static const struct TableEntry {
+ unsigned short iCode;
+ unsigned char flags;
+ unsigned char nRange;
+ } aEntry[] = {
+ {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
+ {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
+ {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
+ {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
+ {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
+ {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
+ {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
+ {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
+ {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
+ {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
+ {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
+ {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
+ {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
+ {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
+ {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
+ {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
+ {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
+ {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
+ {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
+ {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
+ {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
+ {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
+ {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
+ {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
+ {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
+ {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
+ {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
+ {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
+ {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
+ {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
+ {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
+ {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
+ {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
+ {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
+ {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
+ {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
+ {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
+ {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
+ {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
+ {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
+ {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
+ {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
+ {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
+ {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
+ {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
+ {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
+ {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
+ {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
+ {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
+ {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
+ {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
+ {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
+ {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
+ {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
+ {65313, 14, 26},
+ };
+ static const unsigned short aiOff[] = {
+ 1, 2, 8, 15, 16, 26, 28, 32,
+ 37, 38, 40, 48, 63, 64, 69, 71,
+ 79, 80, 116, 202, 203, 205, 206, 207,
+ 209, 210, 211, 213, 214, 217, 218, 219,
+ 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
+ 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
+ 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
+ 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
+ 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
+ 65514, 65521, 65527, 65528, 65529,
+ };
+
+ int ret = c;
+
+ assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
+
+ if( c<128 ){
+ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
+ }else if( c<65536 ){
+ const struct TableEntry *p;
+ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+ int iLo = 0;
+ int iRes = -1;
+
+ assert( c>aEntry[0].iCode );
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ int cmp = (c - aEntry[iTest].iCode);
+ if( cmp>=0 ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+
+ assert( iRes>=0 && c>=aEntry[iRes].iCode );
+ p = &aEntry[iRes];
+ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
+ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
+ assert( ret>0 );
+ }
+
+ if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);
+ }
+
+ else if( c>=66560 && c<66600 ){
+ ret = c + 40;
+ }
+
+ return ret;
+}
+
+#line 1 "fts5_varint.c"
+/*
+** 2015 May 30
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Routines for varint serialization and deserialization.
+*/
+
+
+/* #include "fts5Int.h" */
+
+/*
+** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
+** Except, this version does handle the single byte case that the core
+** version depends on being handled before its function is called.
+*/
+static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
+ u32 a,b;
+
+ /* The 1-byte case. Overwhelmingly the most common. */
+ a = *p;
+ /* a: p0 (unmasked) */
+ if (!(a&0x80))
+ {
+ /* Values between 0 and 127 */
+ *v = a;
+ return 1;
+ }
+
+ /* The 2-byte case */
+ p++;
+ b = *p;
+ /* b: p1 (unmasked) */
+ if (!(b&0x80))
+ {
+ /* Values between 128 and 16383 */
+ a &= 0x7f;
+ a = a<<7;
+ *v = a | b;
+ return 2;
+ }
+
+ /* The 3-byte case */
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p0<<14 | p2 (unmasked) */
+ if (!(a&0x80))
+ {
+ /* Values between 16384 and 2097151 */
+ a &= (0x7f<<14)|(0x7f);
+ b &= 0x7f;
+ b = b<<7;
+ *v = a | b;
+ return 3;
+ }
+
+ /* A 32-bit varint is used to store size information in btrees.
+ ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
+ ** A 3-byte varint is sufficient, for example, to record the size
+ ** of a 1048569-byte BLOB or string.
+ **
+ ** We only unroll the first 1-, 2-, and 3- byte cases. The very
+ ** rare larger cases can be handled by the slower 64-bit varint
+ ** routine.
+ */
+ {
+ u64 v64;
+ u8 n;
+ p -= 2;
+ n = sqlite3Fts5GetVarint(p, &v64);
+ *v = (u32)v64;
+ assert( n>3 && n<=9 );
+ return n;
+ }
+}
+
+
+/*
+** Bitmasks used by sqlite3GetVarint(). These precomputed constants
+** are defined here rather than simply putting the constant expressions
+** inline in order to work around bugs in the RVT compiler.
+**
+** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
+**
+** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
+*/
+#define SLOT_2_0 0x001fc07f
+#define SLOT_4_2_0 0xf01fc07f
+
+/*
+** Read a 64-bit variable-length integer from memory starting at p[0].
+** Return the number of bytes read. The value is stored in *v.
+*/
+static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
+ u32 a,b,s;
+
+ a = *p;
+ /* a: p0 (unmasked) */
+ if (!(a&0x80))
+ {
+ *v = a;
+ return 1;
+ }
+
+ p++;
+ b = *p;
+ /* b: p1 (unmasked) */
+ if (!(b&0x80))
+ {
+ a &= 0x7f;
+ a = a<<7;
+ a |= b;
+ *v = a;
+ return 2;
+ }
+
+ /* Verify that constants are precomputed correctly */
+ assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
+ assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
+
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p0<<14 | p2 (unmasked) */
+ if (!(a&0x80))
+ {
+ a &= SLOT_2_0;
+ b &= 0x7f;
+ b = b<<7;
+ a |= b;
+ *v = a;
+ return 3;
+ }
+
+ /* CSE1 from below */
+ a &= SLOT_2_0;
+ p++;
+ b = b<<14;
+ b |= *p;
+ /* b: p1<<14 | p3 (unmasked) */
+ if (!(b&0x80))
+ {
+ b &= SLOT_2_0;
+ /* moved CSE1 up */
+ /* a &= (0x7f<<14)|(0x7f); */
+ a = a<<7;
+ a |= b;
+ *v = a;
+ return 4;
+ }
+
+ /* a: p0<<14 | p2 (masked) */
+ /* b: p1<<14 | p3 (unmasked) */
+ /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+ /* moved CSE1 up */
+ /* a &= (0x7f<<14)|(0x7f); */
+ b &= SLOT_2_0;
+ s = a;
+ /* s: p0<<14 | p2 (masked) */
+
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p0<<28 | p2<<14 | p4 (unmasked) */
+ if (!(a&0x80))
+ {
+ /* we can skip these cause they were (effectively) done above in calc'ing s */
+ /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
+ /* b &= (0x7f<<14)|(0x7f); */
+ b = b<<7;
+ a |= b;
+ s = s>>18;
+ *v = ((u64)s)<<32 | a;
+ return 5;
+ }
+
+ /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+ s = s<<7;
+ s |= b;
+ /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
+
+ p++;
+ b = b<<14;
+ b |= *p;
+ /* b: p1<<28 | p3<<14 | p5 (unmasked) */
+ if (!(b&0x80))
+ {
+ /* we can skip this cause it was (effectively) done above in calc'ing s */
+ /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
+ a &= SLOT_2_0;
+ a = a<<7;
+ a |= b;
+ s = s>>18;
+ *v = ((u64)s)<<32 | a;
+ return 6;
+ }
+
+ p++;
+ a = a<<14;
+ a |= *p;
+ /* a: p2<<28 | p4<<14 | p6 (unmasked) */
+ if (!(a&0x80))
+ {
+ a &= SLOT_4_2_0;
+ b &= SLOT_2_0;
+ b = b<<7;
+ a |= b;
+ s = s>>11;
+ *v = ((u64)s)<<32 | a;
+ return 7;
+ }
+
+ /* CSE2 from below */
+ a &= SLOT_2_0;
+ p++;
+ b = b<<14;
+ b |= *p;
+ /* b: p3<<28 | p5<<14 | p7 (unmasked) */
+ if (!(b&0x80))
+ {
+ b &= SLOT_4_2_0;
+ /* moved CSE2 up */
+ /* a &= (0x7f<<14)|(0x7f); */
+ a = a<<7;
+ a |= b;
+ s = s>>4;
+ *v = ((u64)s)<<32 | a;
+ return 8;
+ }
+
+ p++;
+ a = a<<15;
+ a |= *p;
+ /* a: p4<<29 | p6<<15 | p8 (unmasked) */
+
+ /* moved CSE2 up */
+ /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
+ b &= SLOT_2_0;
+ b = b<<8;
+ a |= b;
+
+ s = s<<4;
+ b = p[-4];
+ b &= 0x7f;
+ b = b>>3;
+ s |= b;
+
+ *v = ((u64)s)<<32 | a;
+
+ return 9;
+}
+
+/*
+** The variable-length integer encoding is as follows:
+**
+** KEY:
+** A = 0xxxxxxx 7 bits of data and one flag bit
+** B = 1xxxxxxx 7 bits of data and one flag bit
+** C = xxxxxxxx 8 bits of data
+**
+** 7 bits - A
+** 14 bits - BA
+** 21 bits - BBA
+** 28 bits - BBBA
+** 35 bits - BBBBA
+** 42 bits - BBBBBA
+** 49 bits - BBBBBBA
+** 56 bits - BBBBBBBA
+** 64 bits - BBBBBBBBC
+*/
+
+#ifdef SQLITE_NOINLINE
+# define FTS5_NOINLINE SQLITE_NOINLINE
+#else
+# define FTS5_NOINLINE
+#endif
+
+/*
+** Write a 64-bit variable-length integer to memory starting at p[0].
+** The length of data write will be between 1 and 9 bytes. The number
+** of bytes written is returned.
+**
+** A variable-length integer consists of the lower 7 bits of each byte
+** for all bytes that have the 8th bit set and one byte with the 8th
+** bit clear. Except, if we get to the 9th byte, it stores the full
+** 8 bits and is the last byte.
+*/
+static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
+ int i, j, n;
+ u8 buf[10];
+ if( v & (((u64)0xff000000)<<32) ){
+ p[8] = (u8)v;
+ v >>= 8;
+ for(i=7; i>=0; i--){
+ p[i] = (u8)((v & 0x7f) | 0x80);
+ v >>= 7;
+ }
+ return 9;
+ }
+ n = 0;
+ do{
+ buf[n++] = (u8)((v & 0x7f) | 0x80);
+ v >>= 7;
+ }while( v!=0 );
+ buf[0] &= 0x7f;
+ assert( n<=9 );
+ for(i=0, j=n-1; j>=0; j--, i++){
+ p[i] = buf[j];
+ }
+ return n;
+}
+
+static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
+ if( v<=0x7f ){
+ p[0] = v&0x7f;
+ return 1;
+ }
+ if( v<=0x3fff ){
+ p[0] = ((v>>7)&0x7f)|0x80;
+ p[1] = v&0x7f;
+ return 2;
+ }
+ return fts5PutVarint64(p,v);
+}
+
+
+static int sqlite3Fts5GetVarintLen(u32 iVal){
+#if 0
+ if( iVal<(1 << 7 ) ) return 1;
+#endif
+ assert( iVal>=(1 << 7) );
+ if( iVal<(1 << 14) ) return 2;
+ if( iVal<(1 << 21) ) return 3;
+ if( iVal<(1 << 28) ) return 4;
+ return 5;
+}
+
+
+#line 1 "fts5_vocab.c"
+/*
+** 2015 May 08
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** This is an SQLite virtual table module implementing direct access to an
+** existing FTS5 index. The module may create several different types of
+** tables:
+**
+** col:
+** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
+**
+** One row for each term/column combination. The value of $doc is set to
+** the number of fts5 rows that contain at least one instance of term
+** $term within column $col. Field $cnt is set to the total number of
+** instances of term $term in column $col (in any row of the fts5 table).
+**
+** row:
+** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
+**
+** One row for each term in the database. The value of $doc is set to
+** the number of fts5 rows that contain at least one instance of term
+** $term. Field $cnt is set to the total number of instances of term
+** $term in the database.
+*/
+
+
+/* #include "fts5Int.h" */
+
+
+typedef struct Fts5VocabTable Fts5VocabTable;
+typedef struct Fts5VocabCursor Fts5VocabCursor;
+
+struct Fts5VocabTable {
+ sqlite3_vtab base;
+ char *zFts5Tbl; /* Name of fts5 table */
+ char *zFts5Db; /* Db containing fts5 table */
+ sqlite3 *db; /* Database handle */
+ Fts5Global *pGlobal; /* FTS5 global object for this database */
+ int eType; /* FTS5_VOCAB_COL or ROW */
+};
+
+struct Fts5VocabCursor {
+ sqlite3_vtab_cursor base;
+ sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */
+ Fts5Index *pIndex; /* Associated FTS5 index */
+
+ int bEof; /* True if this cursor is at EOF */
+ Fts5IndexIter *pIter; /* Term/rowid iterator object */
+
+ int nLeTerm; /* Size of zLeTerm in bytes */
+ char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */
+
+ /* These are used by 'col' tables only */
+ Fts5Config *pConfig; /* Fts5 table configuration */
+ int iCol;
+ i64 *aCnt;
+ i64 *aDoc;
+
+ /* Output values used by 'row' and 'col' tables */
+ i64 rowid; /* This table's current rowid value */
+ Fts5Buffer term; /* Current value of 'term' column */
+};
+
+#define FTS5_VOCAB_COL 0
+#define FTS5_VOCAB_ROW 1
+
+#define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt"
+#define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt"
+
+/*
+** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
+*/
+#define FTS5_VOCAB_TERM_EQ 0x01
+#define FTS5_VOCAB_TERM_GE 0x02
+#define FTS5_VOCAB_TERM_LE 0x04
+
+
+/*
+** Translate a string containing an fts5vocab table type to an
+** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
+** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
+** and return SQLITE_ERROR.
+*/
+static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
+ int rc = SQLITE_OK;
+ char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
+ if( rc==SQLITE_OK ){
+ sqlite3Fts5Dequote(zCopy);
+ if( sqlite3_stricmp(zCopy, "col")==0 ){
+ *peType = FTS5_VOCAB_COL;
+ }else
+
+ if( sqlite3_stricmp(zCopy, "row")==0 ){
+ *peType = FTS5_VOCAB_ROW;
+ }else
+ {
+ *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
+ rc = SQLITE_ERROR;
+ }
+ sqlite3_free(zCopy);
+ }
+
+ return rc;
+}
+
+
+/*
+** The xDisconnect() virtual table method.
+*/
+static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
+ Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
+ sqlite3_free(pTab);
+ return SQLITE_OK;
+}
+
+/*
+** The xDestroy() virtual table method.
+*/
+static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
+ Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
+ sqlite3_free(pTab);
+ return SQLITE_OK;
+}
+
+/*
+** This function is the implementation of both the xConnect and xCreate
+** methods of the FTS3 virtual table.
+**
+** The argv[] array contains the following:
+**
+** argv[0] -> module name ("fts5vocab")
+** argv[1] -> database name
+** argv[2] -> table name
+**
+** then:
+**
+** argv[3] -> name of fts5 table
+** argv[4] -> type of fts5vocab table
+**
+** or, for tables in the TEMP schema only.
+**
+** argv[3] -> name of fts5 tables database
+** argv[4] -> name of fts5 table
+** argv[5] -> type of fts5vocab table
+*/
+static int fts5VocabInitVtab(
+ sqlite3 *db, /* The SQLite database connection */
+ void *pAux, /* Pointer to Fts5Global object */
+ int argc, /* Number of elements in argv array */
+ const char * const *argv, /* xCreate/xConnect argument array */
+ sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
+ char **pzErr /* Write any error message here */
+){
+ const char *azSchema[] = {
+ "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")",
+ "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")"
+ };
+
+ Fts5VocabTable *pRet = 0;
+ int rc = SQLITE_OK; /* Return code */
+ int bDb;
+
+ bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
+
+ if( argc!=5 && bDb==0 ){
+ *pzErr = sqlite3_mprintf("wrong number of vtable arguments");
+ rc = SQLITE_ERROR;
+ }else{
+ int nByte; /* Bytes of space to allocate */
+ const char *zDb = bDb ? argv[3] : argv[1];
+ const char *zTab = bDb ? argv[4] : argv[3];
+ const char *zType = bDb ? argv[5] : argv[4];
+ int nDb = (int)strlen(zDb)+1;
+ int nTab = (int)strlen(zTab)+1;
+ int eType = 0;
+
+ rc = fts5VocabTableType(zType, pzErr, &eType);
+ if( rc==SQLITE_OK ){
+ assert( eType>=0 && eType<ArraySize(azSchema) );
+ rc = sqlite3_declare_vtab(db, azSchema[eType]);
+ }
+
+ nByte = sizeof(Fts5VocabTable) + nDb + nTab;
+ pRet = sqlite3Fts5MallocZero(&rc, nByte);
+ if( pRet ){
+ pRet->pGlobal = (Fts5Global*)pAux;
+ pRet->eType = eType;
+ pRet->db = db;
+ pRet->zFts5Tbl = (char*)&pRet[1];
+ pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
+ memcpy(pRet->zFts5Tbl, zTab, nTab);
+ memcpy(pRet->zFts5Db, zDb, nDb);
+ sqlite3Fts5Dequote(pRet->zFts5Tbl);
+ sqlite3Fts5Dequote(pRet->zFts5Db);
+ }
+ }
+
+ *ppVTab = (sqlite3_vtab*)pRet;
+ return rc;
+}
+
+
+/*
+** The xConnect() and xCreate() methods for the virtual table. All the
+** work is done in function fts5VocabInitVtab().
+*/
+static int fts5VocabConnectMethod(
+ sqlite3 *db, /* Database connection */
+ void *pAux, /* Pointer to tokenizer hash table */
+ int argc, /* Number of elements in argv array */
+ const char * const *argv, /* xCreate/xConnect argument array */
+ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
+ char **pzErr /* OUT: sqlite3_malloc'd error message */
+){
+ return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
+}
+static int fts5VocabCreateMethod(
+ sqlite3 *db, /* Database connection */
+ void *pAux, /* Pointer to tokenizer hash table */
+ int argc, /* Number of elements in argv array */
+ const char * const *argv, /* xCreate/xConnect argument array */
+ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
+ char **pzErr /* OUT: sqlite3_malloc'd error message */
+){
+ return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
+}
+
+/*
+** Implementation of the xBestIndex method.
+*/
+static int fts5VocabBestIndexMethod(
+ sqlite3_vtab *pUnused,
+ sqlite3_index_info *pInfo
+){
+ int i;
+ int iTermEq = -1;
+ int iTermGe = -1;
+ int iTermLe = -1;
+ int idxNum = 0;
+ int nArg = 0;
+
+ UNUSED_PARAM(pUnused);
+
+ for(i=0; i<pInfo->nConstraint; i++){
+ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
+ if( p->usable==0 ) continue;
+ if( p->iColumn==0 ){ /* term column */
+ if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i;
+ if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i;
+ if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i;
+ if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i;
+ if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i;
+ }
+ }
+
+ if( iTermEq>=0 ){
+ idxNum |= FTS5_VOCAB_TERM_EQ;
+ pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg;
+ pInfo->estimatedCost = 100;
+ }else{
+ pInfo->estimatedCost = 1000000;
+ if( iTermGe>=0 ){
+ idxNum |= FTS5_VOCAB_TERM_GE;
+ pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg;
+ pInfo->estimatedCost = pInfo->estimatedCost / 2;
+ }
+ if( iTermLe>=0 ){
+ idxNum |= FTS5_VOCAB_TERM_LE;
+ pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg;
+ pInfo->estimatedCost = pInfo->estimatedCost / 2;
+ }
+ }
+
+ pInfo->idxNum = idxNum;
+
+ return SQLITE_OK;
+}
+
+/*
+** Implementation of xOpen method.
+*/
+static int fts5VocabOpenMethod(
+ sqlite3_vtab *pVTab,
+ sqlite3_vtab_cursor **ppCsr
+){
+ Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
+ Fts5Index *pIndex = 0;
+ Fts5Config *pConfig = 0;
+ Fts5VocabCursor *pCsr = 0;
+ int rc = SQLITE_OK;
+ sqlite3_stmt *pStmt = 0;
+ char *zSql = 0;
+
+ zSql = sqlite3Fts5Mprintf(&rc,
+ "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
+ pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
+ );
+ if( zSql ){
+ rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
+ }
+ sqlite3_free(zSql);
+ assert( rc==SQLITE_OK || pStmt==0 );
+ if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
+
+ if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
+ i64 iId = sqlite3_column_int64(pStmt, 0);
+ pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &pConfig);
+ }
+
+ if( rc==SQLITE_OK && pIndex==0 ){
+ rc = sqlite3_finalize(pStmt);
+ pStmt = 0;
+ if( rc==SQLITE_OK ){
+ pVTab->zErrMsg = sqlite3_mprintf(
+ "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
+ );
+ rc = SQLITE_ERROR;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ int nByte = pConfig->nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor);
+ pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
+ }
+
+ if( pCsr ){
+ pCsr->pIndex = pIndex;
+ pCsr->pStmt = pStmt;
+ pCsr->pConfig = pConfig;
+ pCsr->aCnt = (i64*)&pCsr[1];
+ pCsr->aDoc = &pCsr->aCnt[pConfig->nCol];
+ }else{
+ sqlite3_finalize(pStmt);
+ }
+
+ *ppCsr = (sqlite3_vtab_cursor*)pCsr;
+ return rc;
+}
+
+static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
+ pCsr->rowid = 0;
+ sqlite3Fts5IterClose(pCsr->pIter);
+ pCsr->pIter = 0;
+ sqlite3_free(pCsr->zLeTerm);
+ pCsr->nLeTerm = -1;
+ pCsr->zLeTerm = 0;
+}
+
+/*
+** Close the cursor. For additional information see the documentation
+** on the xClose method of the virtual table interface.
+*/
+static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
+ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
+ fts5VocabResetCursor(pCsr);
+ sqlite3Fts5BufferFree(&pCsr->term);
+ sqlite3_finalize(pCsr->pStmt);
+ sqlite3_free(pCsr);
+ return SQLITE_OK;
+}
+
+
+/*
+** Advance the cursor to the next row in the table.
+*/
+static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
+ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
+ Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
+ int rc = SQLITE_OK;
+ int nCol = pCsr->pConfig->nCol;
+
+ pCsr->rowid++;
+
+ if( pTab->eType==FTS5_VOCAB_COL ){
+ for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
+ if( pCsr->aDoc[pCsr->iCol] ) break;
+ }
+ }
+
+ if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){
+ if( sqlite3Fts5IterEof(pCsr->pIter) ){
+ pCsr->bEof = 1;
+ }else{
+ const char *zTerm;
+ int nTerm;
+
+ zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
+ if( pCsr->nLeTerm>=0 ){
+ int nCmp = MIN(nTerm, pCsr->nLeTerm);
+ int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
+ if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
+ pCsr->bEof = 1;
+ return SQLITE_OK;
+ }
+ }
+
+ sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
+ memset(pCsr->aCnt, 0, nCol * sizeof(i64));
+ memset(pCsr->aDoc, 0, nCol * sizeof(i64));
+ pCsr->iCol = 0;
+
+ assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
+ while( rc==SQLITE_OK ){
+ const u8 *pPos; int nPos; /* Position list */
+ i64 iPos = 0; /* 64-bit position read from poslist */
+ int iOff = 0; /* Current offset within position list */
+
+ pPos = pCsr->pIter->pData;
+ nPos = pCsr->pIter->nData;
+ switch( pCsr->pConfig->eDetail ){
+ case FTS5_DETAIL_FULL:
+ pPos = pCsr->pIter->pData;
+ nPos = pCsr->pIter->nData;
+ if( pTab->eType==FTS5_VOCAB_ROW ){
+ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
+ pCsr->aCnt[0]++;
+ }
+ pCsr->aDoc[0]++;
+ }else{
+ int iCol = -1;
+ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
+ int ii = FTS5_POS2COLUMN(iPos);
+ pCsr->aCnt[ii]++;
+ if( iCol!=ii ){
+ if( ii>=nCol ){
+ rc = FTS5_CORRUPT;
+ break;
+ }
+ pCsr->aDoc[ii]++;
+ iCol = ii;
+ }
+ }
+ }
+ break;
+
+ case FTS5_DETAIL_COLUMNS:
+ if( pTab->eType==FTS5_VOCAB_ROW ){
+ pCsr->aDoc[0]++;
+ }else{
+ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
+ assert_nc( iPos>=0 && iPos<nCol );
+ if( iPos>=nCol ){
+ rc = FTS5_CORRUPT;
+ break;
+ }
+ pCsr->aDoc[iPos]++;
+ }
+ }
+ break;
+
+ default:
+ assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE );
+ pCsr->aDoc[0]++;
+ break;
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IterNextScan(pCsr->pIter);
+ }
+
+ if( rc==SQLITE_OK ){
+ zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
+ if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ){
+ break;
+ }
+ if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
+ }
+ }
+ }
+ }
+
+ if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
+ while( pCsr->aDoc[pCsr->iCol]==0 ) pCsr->iCol++;
+ assert( pCsr->iCol<pCsr->pConfig->nCol );
+ }
+ return rc;
+}
+
+/*
+** This is the xFilter implementation for the virtual table.
+*/
+static int fts5VocabFilterMethod(
+ sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
+ int idxNum, /* Strategy index */
+ const char *zUnused, /* Unused */
+ int nUnused, /* Number of elements in apVal */
+ sqlite3_value **apVal /* Arguments for the indexing scheme */
+){
+ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
+ int rc = SQLITE_OK;
+
+ int iVal = 0;
+ int f = FTS5INDEX_QUERY_SCAN;
+ const char *zTerm = 0;
+ int nTerm = 0;
+
+ sqlite3_value *pEq = 0;
+ sqlite3_value *pGe = 0;
+ sqlite3_value *pLe = 0;
+
+ UNUSED_PARAM2(zUnused, nUnused);
+
+ fts5VocabResetCursor(pCsr);
+ if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
+ if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
+ if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
+
+ if( pEq ){
+ zTerm = (const char *)sqlite3_value_text(pEq);
+ nTerm = sqlite3_value_bytes(pEq);
+ f = 0;
+ }else{
+ if( pGe ){
+ zTerm = (const char *)sqlite3_value_text(pGe);
+ nTerm = sqlite3_value_bytes(pGe);
+ }
+ if( pLe ){
+ const char *zCopy = (const char *)sqlite3_value_text(pLe);
+ pCsr->nLeTerm = sqlite3_value_bytes(pLe);
+ pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1);
+ if( pCsr->zLeTerm==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1);
+ }
+ }
+ }
+
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
+ }
+ if( rc==SQLITE_OK ){
+ rc = fts5VocabNextMethod(pCursor);
+ }
+
+ return rc;
+}
+
+/*
+** This is the xEof method of the virtual table. SQLite calls this
+** routine to find out if it has reached the end of a result set.
+*/
+static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
+ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
+ return pCsr->bEof;
+}
+
+static int fts5VocabColumnMethod(
+ sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
+ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
+ int iCol /* Index of column to read value from */
+){
+ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
+ int eDetail = pCsr->pConfig->eDetail;
+ int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType;
+ i64 iVal = 0;
+
+ if( iCol==0 ){
+ sqlite3_result_text(
+ pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
+ );
+ }else if( eType==FTS5_VOCAB_COL ){
+ assert( iCol==1 || iCol==2 || iCol==3 );
+ if( iCol==1 ){
+ if( eDetail!=FTS5_DETAIL_NONE ){
+ const char *z = pCsr->pConfig->azCol[pCsr->iCol];
+ sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
+ }
+ }else if( iCol==2 ){
+ iVal = pCsr->aDoc[pCsr->iCol];
+ }else{
+ iVal = pCsr->aCnt[pCsr->iCol];
+ }
+ }else{
+ assert( iCol==1 || iCol==2 );
+ if( iCol==1 ){
+ iVal = pCsr->aDoc[0];
+ }else{
+ iVal = pCsr->aCnt[0];
+ }
+ }
+
+ if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);
+ return SQLITE_OK;
+}
+
+/*
+** This is the xRowid method. The SQLite core calls this routine to
+** retrieve the rowid for the current row of the result set. The
+** rowid should be written to *pRowid.
+*/
+static int fts5VocabRowidMethod(
+ sqlite3_vtab_cursor *pCursor,
+ sqlite_int64 *pRowid
+){
+ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
+ *pRowid = pCsr->rowid;
+ return SQLITE_OK;
+}
+
+static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
+ static const sqlite3_module fts5Vocab = {
+ /* iVersion */ 2,
+ /* xCreate */ fts5VocabCreateMethod,
+ /* xConnect */ fts5VocabConnectMethod,
+ /* xBestIndex */ fts5VocabBestIndexMethod,
+ /* xDisconnect */ fts5VocabDisconnectMethod,
+ /* xDestroy */ fts5VocabDestroyMethod,
+ /* xOpen */ fts5VocabOpenMethod,
+ /* xClose */ fts5VocabCloseMethod,
+ /* xFilter */ fts5VocabFilterMethod,
+ /* xNext */ fts5VocabNextMethod,
+ /* xEof */ fts5VocabEofMethod,
+ /* xColumn */ fts5VocabColumnMethod,
+ /* xRowid */ fts5VocabRowidMethod,
+ /* xUpdate */ 0,
+ /* xBegin */ 0,
+ /* xSync */ 0,
+ /* xCommit */ 0,
+ /* xRollback */ 0,
+ /* xFindFunction */ 0,
+ /* xRename */ 0,
+ /* xSavepoint */ 0,
+ /* xRelease */ 0,
+ /* xRollbackTo */ 0,
+ };
+ void *p = (void*)pGlobal;
+
+ return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
+}
+
+
+
+
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */
diff --git a/src/libtracker-fts/fts5.h b/src/libtracker-fts/fts5.h
new file mode 100644
index 000000000..96ecb38e3
--- /dev/null
+++ b/src/libtracker-fts/fts5.h
@@ -0,0 +1,578 @@
+/*
+** 2014 May 31
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Interfaces to extend FTS5. Using the interfaces defined in this file,
+** FTS5 may be extended with:
+**
+** * custom tokenizers, and
+** * custom auxiliary functions.
+*/
+
+
+#ifndef _FTS5_H
+#define _FTS5_H
+
+#include "sqlite3.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*************************************************************************
+** CUSTOM AUXILIARY FUNCTIONS
+**
+** Virtual table implementations may overload SQL functions by implementing
+** the sqlite3_module.xFindFunction() method.
+*/
+
+typedef struct Fts5ExtensionApi Fts5ExtensionApi;
+typedef struct Fts5Context Fts5Context;
+typedef struct Fts5PhraseIter Fts5PhraseIter;
+
+typedef void (*fts5_extension_function)(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ sqlite3_context *pCtx, /* Context for returning result/error */
+ int nVal, /* Number of values in apVal[] array */
+ sqlite3_value **apVal /* Array of trailing arguments */
+);
+
+struct Fts5PhraseIter {
+ const unsigned char *a;
+ const unsigned char *b;
+};
+
+/*
+** EXTENSION API FUNCTIONS
+**
+** xUserData(pFts):
+** Return a copy of the context pointer the extension function was
+** registered with.
+**
+** xColumnTotalSize(pFts, iCol, pnToken):
+** If parameter iCol is less than zero, set output variable *pnToken
+** to the total number of tokens in the FTS5 table. Or, if iCol is
+** non-negative but less than the number of columns in the table, return
+** the total number of tokens in column iCol, considering all rows in
+** the FTS5 table.
+**
+** If parameter iCol is greater than or equal to the number of columns
+** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
+** an OOM condition or IO error), an appropriate SQLite error code is
+** returned.
+**
+** xColumnCount(pFts):
+** Return the number of columns in the table.
+**
+** xColumnSize(pFts, iCol, pnToken):
+** If parameter iCol is less than zero, set output variable *pnToken
+** to the total number of tokens in the current row. Or, if iCol is
+** non-negative but less than the number of columns in the table, set
+** *pnToken to the number of tokens in column iCol of the current row.
+**
+** If parameter iCol is greater than or equal to the number of columns
+** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
+** an OOM condition or IO error), an appropriate SQLite error code is
+** returned.
+**
+** This function may be quite inefficient if used with an FTS5 table
+** created with the "columnsize=0" option.
+**
+** xColumnText:
+** This function attempts to retrieve the text of column iCol of the
+** current document. If successful, (*pz) is set to point to a buffer
+** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
+** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
+** if an error occurs, an SQLite error code is returned and the final values
+** of (*pz) and (*pn) are undefined.
+**
+** xPhraseCount:
+** Returns the number of phrases in the current query expression.
+**
+** xPhraseSize:
+** Returns the number of tokens in phrase iPhrase of the query. Phrases
+** are numbered starting from zero.
+**
+** xInstCount:
+** Set *pnInst to the total number of occurrences of all phrases within
+** the query within the current row. Return SQLITE_OK if successful, or
+** an error code (i.e. SQLITE_NOMEM) if an error occurs.
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" or "detail=column" option. If the FTS5 table is created
+** with either "detail=none" or "detail=column" and "content=" option
+** (i.e. if it is a contentless table), then this API always returns 0.
+**
+** xInst:
+** Query for the details of phrase match iIdx within the current row.
+** Phrase matches are numbered starting from zero, so the iIdx argument
+** should be greater than or equal to zero and smaller than the value
+** output by xInstCount().
+**
+** Usually, output parameter *piPhrase is set to the phrase number, *piCol
+** to the column in which it occurs and *piOff the token offset of the
+** first token of the phrase. The exception is if the table was created
+** with the offsets=0 option specified. In this case *piOff is always
+** set to -1.
+**
+** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
+** if an error occurs.
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" or "detail=column" option.
+**
+** xRowid:
+** Returns the rowid of the current row.
+**
+** xTokenize:
+** Tokenize text using the tokenizer belonging to the FTS5 table.
+**
+** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
+** This API function is used to query the FTS table for phrase iPhrase
+** of the current query. Specifically, a query equivalent to:
+**
+** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
+**
+** with $p set to a phrase equivalent to the phrase iPhrase of the
+** current query is executed. For each row visited, the callback function
+** passed as the fourth argument is invoked. The context and API objects
+** passed to the callback function may be used to access the properties of
+** each matched row. Invoking Api.xUserData() returns a copy of the pointer
+** passed as the third argument to pUserData.
+**
+** If the callback function returns any value other than SQLITE_OK, the
+** query is abandoned and the xQueryPhrase function returns immediately.
+** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
+** Otherwise, the error code is propagated upwards.
+**
+** If the query runs to completion without incident, SQLITE_OK is returned.
+** Or, if some error occurs before the query completes or is aborted by
+** the callback, an SQLite error code is returned.
+**
+**
+** xSetAuxdata(pFts5, pAux, xDelete)
+**
+** Save the pointer passed as the second argument as the extension functions
+** "auxiliary data". The pointer may then be retrieved by the current or any
+** future invocation of the same fts5 extension function made as part of
+** of the same MATCH query using the xGetAuxdata() API.
+**
+** Each extension function is allocated a single auxiliary data slot for
+** each FTS query (MATCH expression). If the extension function is invoked
+** more than once for a single FTS query, then all invocations share a
+** single auxiliary data context.
+**
+** If there is already an auxiliary data pointer when this function is
+** invoked, then it is replaced by the new pointer. If an xDelete callback
+** was specified along with the original pointer, it is invoked at this
+** point.
+**
+** The xDelete callback, if one is specified, is also invoked on the
+** auxiliary data pointer after the FTS5 query has finished.
+**
+** If an error (e.g. an OOM condition) occurs within this function, an
+** the auxiliary data is set to NULL and an error code returned. If the
+** xDelete parameter was not NULL, it is invoked on the auxiliary data
+** pointer before returning.
+**
+**
+** xGetAuxdata(pFts5, bClear)
+**
+** Returns the current auxiliary data pointer for the fts5 extension
+** function. See the xSetAuxdata() method for details.
+**
+** If the bClear argument is non-zero, then the auxiliary data is cleared
+** (set to NULL) before this function returns. In this case the xDelete,
+** if any, is not invoked.
+**
+**
+** xRowCount(pFts5, pnRow)
+**
+** This function is used to retrieve the total number of rows in the table.
+** In other words, the same value that would be returned by:
+**
+** SELECT count(*) FROM ftstable;
+**
+** xPhraseFirst()
+** This function is used, along with type Fts5PhraseIter and the xPhraseNext
+** method, to iterate through all instances of a single query phrase within
+** the current row. This is the same information as is accessible via the
+** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
+** to use, this API may be faster under some circumstances. To iterate
+** through instances of phrase iPhrase, use the following code:
+**
+** Fts5PhraseIter iter;
+** int iCol, iOff;
+** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
+** iCol>=0;
+** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
+** ){
+** // An instance of phrase iPhrase at offset iOff of column iCol
+** }
+**
+** The Fts5PhraseIter structure is defined above. Applications should not
+** modify this structure directly - it should only be used as shown above
+** with the xPhraseFirst() and xPhraseNext() API methods (and by
+** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" or "detail=column" option. If the FTS5 table is created
+** with either "detail=none" or "detail=column" and "content=" option
+** (i.e. if it is a contentless table), then this API always iterates
+** through an empty set (all calls to xPhraseFirst() set iCol to -1).
+**
+** xPhraseNext()
+** See xPhraseFirst above.
+**
+** xPhraseFirstColumn()
+** This function and xPhraseNextColumn() are similar to the xPhraseFirst()
+** and xPhraseNext() APIs described above. The difference is that instead
+** of iterating through all instances of a phrase in the current row, these
+** APIs are used to iterate through the set of columns in the current row
+** that contain one or more instances of a specified phrase. For example:
+**
+** Fts5PhraseIter iter;
+** int iCol;
+** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
+** iCol>=0;
+** pApi->xPhraseNextColumn(pFts, &iter, &iCol)
+** ){
+** // Column iCol contains at least one instance of phrase iPhrase
+** }
+**
+** This API can be quite slow if used with an FTS5 table created with the
+** "detail=none" option. If the FTS5 table is created with either
+** "detail=none" "content=" option (i.e. if it is a contentless table),
+** then this API always iterates through an empty set (all calls to
+** xPhraseFirstColumn() set iCol to -1).
+**
+** The information accessed using this API and its companion
+** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
+** (or xInst/xInstCount). The chief advantage of this API is that it is
+** significantly more efficient than those alternatives when used with
+** "detail=column" tables.
+**
+** xPhraseNextColumn()
+** See xPhraseFirstColumn above.
+*/
+struct Fts5ExtensionApi {
+ int iVersion; /* Currently always set to 3 */
+
+ void *(*xUserData)(Fts5Context*);
+
+ int (*xColumnCount)(Fts5Context*);
+ int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
+ int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
+
+ int (*xTokenize)(Fts5Context*,
+ const char *pText, int nText, /* Text to tokenize */
+ void *pCtx, /* Context passed to xToken() */
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
+ );
+
+ int (*xPhraseCount)(Fts5Context*);
+ int (*xPhraseSize)(Fts5Context*, int iPhrase);
+
+ int (*xInstCount)(Fts5Context*, int *pnInst);
+ int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
+
+ sqlite3_int64 (*xRowid)(Fts5Context*);
+ int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
+ int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
+
+ int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
+ int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
+ );
+ int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
+ void *(*xGetAuxdata)(Fts5Context*, int bClear);
+
+ int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
+ void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
+
+ int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
+ void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
+};
+
+/*
+** CUSTOM AUXILIARY FUNCTIONS
+*************************************************************************/
+
+/*************************************************************************
+** CUSTOM TOKENIZERS
+**
+** Applications may also register custom tokenizer types. A tokenizer
+** is registered by providing fts5 with a populated instance of the
+** following structure. All structure methods must be defined, setting
+** any member of the fts5_tokenizer struct to NULL leads to undefined
+** behaviour. The structure methods are expected to function as follows:
+**
+** xCreate:
+** This function is used to allocate and inititalize a tokenizer instance.
+** A tokenizer instance is required to actually tokenize text.
+**
+** The first argument passed to this function is a copy of the (void*)
+** pointer provided by the application when the fts5_tokenizer object
+** was registered with FTS5 (the third argument to xCreateTokenizer()).
+** The second and third arguments are an array of nul-terminated strings
+** containing the tokenizer arguments, if any, specified following the
+** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
+** to create the FTS5 table.
+**
+** The final argument is an output variable. If successful, (*ppOut)
+** should be set to point to the new tokenizer handle and SQLITE_OK
+** returned. If an error occurs, some value other than SQLITE_OK should
+** be returned. In this case, fts5 assumes that the final value of *ppOut
+** is undefined.
+**
+** xDelete:
+** This function is invoked to delete a tokenizer handle previously
+** allocated using xCreate(). Fts5 guarantees that this function will
+** be invoked exactly once for each successful call to xCreate().
+**
+** xTokenize:
+** This function is expected to tokenize the nText byte string indicated
+** by argument pText. pText may or may not be nul-terminated. The first
+** argument passed to this function is a pointer to an Fts5Tokenizer object
+** returned by an earlier call to xCreate().
+**
+** The second argument indicates the reason that FTS5 is requesting
+** tokenization of the supplied text. This is always one of the following
+** four values:
+**
+** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
+** or removed from the FTS table. The tokenizer is being invoked to
+** determine the set of tokens to add to (or delete from) the
+** FTS index.
+**
+** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
+** against the FTS index. The tokenizer is being called to tokenize
+** a bareword or quoted string specified as part of the query.
+**
+** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
+** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
+** followed by a "*" character, indicating that the last token
+** returned by the tokenizer will be treated as a token prefix.
+**
+** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
+** satisfy an fts5_api.xTokenize() request made by an auxiliary
+** function. Or an fts5_api.xColumnSize() request made by the same
+** on a columnsize=0 database.
+** </ul>
+**
+** For each token in the input string, the supplied callback xToken() must
+** be invoked. The first argument to it should be a copy of the pointer
+** passed as the second argument to xTokenize(). The third and fourth
+** arguments are a pointer to a buffer containing the token text, and the
+** size of the token in bytes. The 4th and 5th arguments are the byte offsets
+** of the first byte of and first byte immediately following the text from
+** which the token is derived within the input.
+**
+** The second argument passed to the xToken() callback ("tflags") should
+** normally be set to 0. The exception is if the tokenizer supports
+** synonyms. In this case see the discussion below for details.
+**
+** FTS5 assumes the xToken() callback is invoked for each token in the
+** order that they occur within the input text.
+**
+** If an xToken() callback returns any value other than SQLITE_OK, then
+** the tokenization should be abandoned and the xTokenize() method should
+** immediately return a copy of the xToken() return value. Or, if the
+** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
+** if an error occurs with the xTokenize() implementation itself, it
+** may abandon the tokenization and return any error code other than
+** SQLITE_OK or SQLITE_DONE.
+**
+** SYNONYM SUPPORT
+**
+** Custom tokenizers may also support synonyms. Consider a case in which a
+** user wishes to query for a phrase such as "first place". Using the
+** built-in tokenizers, the FTS5 query 'first + place' will match instances
+** of "first place" within the document set, but not alternative forms
+** such as "1st place". In some applications, it would be better to match
+** all instances of "first place" or "1st place" regardless of which form
+** the user specified in the MATCH query text.
+**
+** There are several ways to approach this in FTS5:
+**
+** <ol><li> By mapping all synonyms to a single token. In this case, the
+** In the above example, this means that the tokenizer returns the
+** same token for inputs "first" and "1st". Say that token is in
+** fact "first", so that when the user inserts the document "I won
+** 1st place" entries are added to the index for tokens "i", "won",
+** "first" and "place". If the user then queries for '1st + place',
+** the tokenizer substitutes "first" for "1st" and the query works
+** as expected.
+**
+** <li> By adding multiple synonyms for a single term to the FTS index.
+** In this case, when tokenizing query text, the tokenizer may
+** provide multiple synonyms for a single term within the document.
+** FTS5 then queries the index for each synonym individually. For
+** example, faced with the query:
+**
+** <codeblock>
+** ... MATCH 'first place'</codeblock>
+**
+** the tokenizer offers both "1st" and "first" as synonyms for the
+** first token in the MATCH query and FTS5 effectively runs a query
+** similar to:
+**
+** <codeblock>
+** ... MATCH '(first OR 1st) place'</codeblock>
+**
+** except that, for the purposes of auxiliary functions, the query
+** still appears to contain just two phrases - "(first OR 1st)"
+** being treated as a single phrase.
+**
+** <li> By adding multiple synonyms for a single term to the FTS index.
+** Using this method, when tokenizing document text, the tokenizer
+** provides multiple synonyms for each token. So that when a
+** document such as "I won first place" is tokenized, entries are
+** added to the FTS index for "i", "won", "first", "1st" and
+** "place".
+**
+** This way, even if the tokenizer does not provide synonyms
+** when tokenizing query text (it should not - to do would be
+** inefficient), it doesn't matter if the user queries for
+** 'first + place' or '1st + place', as there are entires in the
+** FTS index corresponding to both forms of the first token.
+** </ol>
+**
+** Whether it is parsing document or query text, any call to xToken that
+** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
+** is considered to supply a synonym for the previous token. For example,
+** when parsing the document "I won first place", a tokenizer that supports
+** synonyms would call xToken() 5 times, as follows:
+**
+** <codeblock>
+** xToken(pCtx, 0, "i", 1, 0, 1);
+** xToken(pCtx, 0, "won", 3, 2, 5);
+** xToken(pCtx, 0, "first", 5, 6, 11);
+** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
+** xToken(pCtx, 0, "place", 5, 12, 17);
+**</codeblock>
+**
+** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
+** xToken() is called. Multiple synonyms may be specified for a single token
+** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
+** There is no limit to the number of synonyms that may be provided for a
+** single token.
+**
+** In many cases, method (1) above is the best approach. It does not add
+** extra data to the FTS index or require FTS5 to query for multiple terms,
+** so it is efficient in terms of disk space and query speed. However, it
+** does not support prefix queries very well. If, as suggested above, the
+** token "first" is subsituted for "1st" by the tokenizer, then the query:
+**
+** <codeblock>
+** ... MATCH '1s*'</codeblock>
+**
+** will not match documents that contain the token "1st" (as the tokenizer
+** will probably not map "1s" to any prefix of "first").
+**
+** For full prefix support, method (3) may be preferred. In this case,
+** because the index contains entries for both "first" and "1st", prefix
+** queries such as 'fi*' or '1s*' will match correctly. However, because
+** extra entries are added to the FTS index, this method uses more space
+** within the database.
+**
+** Method (2) offers a midpoint between (1) and (3). Using this method,
+** a query such as '1s*' will match documents that contain the literal
+** token "1st", but not "first" (assuming the tokenizer is not able to
+** provide synonyms for prefixes). However, a non-prefix query like '1st'
+** will match against "1st" and "first". This method does not require
+** extra disk space, as no extra entries are added to the FTS index.
+** On the other hand, it may require more CPU cycles to run MATCH queries,
+** as separate queries of the FTS index are required for each synonym.
+**
+** When using methods (2) or (3), it is important that the tokenizer only
+** provide synonyms when tokenizing document text (method (2)) or query
+** text (method (3)), not both. Doing so will not cause any errors, but is
+** inefficient.
+*/
+typedef struct Fts5Tokenizer Fts5Tokenizer;
+typedef struct fts5_tokenizer fts5_tokenizer;
+struct fts5_tokenizer {
+ int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
+ void (*xDelete)(Fts5Tokenizer*);
+ int (*xTokenize)(Fts5Tokenizer*,
+ void *pCtx,
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
+ const char *pText, int nText,
+ int (*xToken)(
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Pointer to buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStart, /* Byte offset of token within input text */
+ int iEnd /* Byte offset of end of token within input text */
+ )
+ );
+};
+
+/* Flags that may be passed as the third argument to xTokenize() */
+#define FTS5_TOKENIZE_QUERY 0x0001
+#define FTS5_TOKENIZE_PREFIX 0x0002
+#define FTS5_TOKENIZE_DOCUMENT 0x0004
+#define FTS5_TOKENIZE_AUX 0x0008
+
+/* Flags that may be passed by the tokenizer implementation back to FTS5
+** as the third argument to the supplied xToken callback. */
+#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
+
+/*
+** END OF CUSTOM TOKENIZERS
+*************************************************************************/
+
+/*************************************************************************
+** FTS5 EXTENSION REGISTRATION API
+*/
+typedef struct fts5_api fts5_api;
+struct fts5_api {
+ int iVersion; /* Currently always set to 2 */
+
+ /* Create a new tokenizer */
+ int (*xCreateTokenizer)(
+ fts5_api *pApi,
+ const char *zName,
+ void *pContext,
+ fts5_tokenizer *pTokenizer,
+ void (*xDestroy)(void*)
+ );
+
+ /* Find an existing tokenizer */
+ int (*xFindTokenizer)(
+ fts5_api *pApi,
+ const char *zName,
+ void **ppContext,
+ fts5_tokenizer *pTokenizer
+ );
+
+ /* Create a new auxiliary function */
+ int (*xCreateFunction)(
+ fts5_api *pApi,
+ const char *zName,
+ void *pContext,
+ fts5_extension_function xFunction,
+ void (*xDestroy)(void*)
+ );
+};
+
+/*
+** END OF REGISTRATION API
+*************************************************************************/
+
+#ifdef __cplusplus
+} /* end of the 'extern "C"' block */
+#endif
+
+#endif /* _FTS5_H */
+
diff --git a/src/libtracker-fts/tracker-fts-tokenizer.c b/src/libtracker-fts/tracker-fts-tokenizer.c
index c45d73d9a..26764aaf3 100644
--- a/src/libtracker-fts/tracker-fts-tokenizer.c
+++ b/src/libtracker-fts/tracker-fts-tokenizer.c
@@ -19,7 +19,7 @@
* 02110-1301 USA
*/
-/* FTS3/4 Tokenizer using TrackerParser */
+/* FTS5 Tokenizer using TrackerParser */
#include "config.h"
@@ -27,209 +27,392 @@
#include <string.h>
#include <libtracker-common/tracker-parser.h>
+#include <libtracker-data/tracker-ontologies.h>
#include "tracker-fts-tokenizer.h"
#include "tracker-fts-config.h"
-#include "fts3_tokenizer.h"
+#include "fts5.h"
+typedef struct TrackerTokenizerData TrackerTokenizerData;
typedef struct TrackerTokenizer TrackerTokenizer;
-typedef struct TrackerCursor TrackerCursor;
+
+struct TrackerTokenizerData {
+ TrackerLanguage *language;
+ int max_word_length;
+ int max_words;
+ gboolean enable_stemmer;
+ gboolean enable_unaccent;
+ gboolean ignore_numbers;
+ gboolean ignore_stop_words;
+};
struct TrackerTokenizer {
- sqlite3_tokenizer base;
- TrackerLanguage *language;
- int max_word_length;
- int max_words;
- gboolean enable_stemmer;
- gboolean enable_unaccent;
- gboolean ignore_numbers;
- gboolean ignore_stop_words;
+ TrackerTokenizerData *data;
+ TrackerParser *parser;
};
-struct TrackerCursor {
- sqlite3_tokenizer_cursor base;
+static int
+tracker_tokenizer_create (void *data,
+ const char **argv,
+ int argc,
+ Fts5Tokenizer **tokenizer_out)
+{
+ TrackerTokenizer *tokenizer;
+
+ tokenizer = g_new0 (TrackerTokenizer, 1);
+ tokenizer->data = data;
+ tokenizer->parser = tracker_parser_new (tokenizer->data->language);
+
+ *tokenizer_out = (Fts5Tokenizer *) tokenizer;
+
+ return SQLITE_OK;
+}
+
+static void
+tracker_tokenizer_destroy (Fts5Tokenizer *fts5_tokenizer)
+{
+ TrackerTokenizer *tokenizer = (TrackerTokenizer *) fts5_tokenizer;
+
+ tracker_parser_free (tokenizer->parser);
+ g_free (tokenizer);
+}
- TrackerTokenizer *tokenizer;
- TrackerParser *parser;
- guint n_words;
+typedef int (*TokenFunc) (void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ int flags, /* Mask of FTS5_TOKEN_* flags */
+ const char *token, /* Pointer to buffer containing token */
+ int n_token, /* Size of token in bytes */
+ int start, /* Byte offset of token within input text */
+ int end); /* Byte offset of end of token within input text */
+
+static int
+tracker_tokenizer_tokenize (Fts5Tokenizer *fts5_tokenizer,
+ void *ctx,
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
+ const char *text,
+ int length,
+ TokenFunc token_func)
+{
+ TrackerTokenizer *tokenizer = (TrackerTokenizer *) fts5_tokenizer;
+ TrackerTokenizerData *data = tokenizer->data;
+ const gchar *token;
+ gboolean stop_word;
+ int n_tokens = 0, pos, start, end, len;
+ int rc = SQLITE_OK;
+
+ if (length <= 0)
+ return rc;
+
+ tracker_parser_reset (tokenizer->parser, text, length,
+ data->max_word_length,
+ data->enable_stemmer,
+ data->enable_unaccent,
+ data->ignore_stop_words,
+ TRUE,
+ data->ignore_numbers);
+
+ while (n_tokens < data->max_words) {
+ token = tracker_parser_next (tokenizer->parser,
+ &pos,
+ &start, &end,
+ &stop_word,
+ &len);
+
+ if (!token)
+ break;
+
+ if (stop_word && data->ignore_stop_words)
+ continue;
+
+ rc = token_func (ctx, 0, token, len, start, end);
+
+ if (rc != SQLITE_OK)
+ break;
+
+ n_tokens++;
+ }
+
+ return rc;
+}
+
+/* Our custom tokenizer: */
+static const fts5_tokenizer tracker_tokenizer_module = {
+ tracker_tokenizer_create, /* xCreate */
+ tracker_tokenizer_destroy, /* xDelete */
+ tracker_tokenizer_tokenize, /* xTokenize */
};
-/*
-** Create a new tokenizer instance.
-*/
-static int trackerCreate(
- int argc, /* Number of entries in argv[] */
- const char * const *argv, /* Tokenizer creation arguments */
- sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
-){
- TrackerTokenizer *p;
- TrackerFTSConfig *config;
-
- p = (TrackerTokenizer *)sqlite3_malloc(sizeof(TrackerTokenizer));
- if( !p ){
- return SQLITE_NOMEM;
- }
- memset(p, 0, sizeof(TrackerTokenizer));
- p->language = tracker_language_new (NULL);
-
- config = tracker_fts_config_new ();
-
- p->max_word_length = tracker_fts_config_get_max_word_length (config);
- p->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
- p->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
- p->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
-
- /* disable stop words if TRACKER_FTS_STOP_WORDS is set to 0 - used by tests
- * otherwise, get value from the conf file */
- p->ignore_stop_words = (g_strcmp0 (g_getenv ("TRACKER_FTS_STOP_WORDS"), "0") == 0 ?
- FALSE : tracker_fts_config_get_ignore_stop_words (config));
-
- p->max_words = tracker_fts_config_get_max_words_to_index (config);
-
- g_object_unref (config);
-
- *ppTokenizer = (sqlite3_tokenizer *)p;
-
- return SQLITE_OK;
+static TrackerTokenizerData *
+tracker_tokenizer_data_new (void)
+{
+ TrackerTokenizerData *p;
+ TrackerFTSConfig *config;
+
+ config = tracker_fts_config_new ();
+
+ p = g_new0 (TrackerTokenizerData, 1);
+ p->language = tracker_language_new (NULL);
+ p->max_word_length = tracker_fts_config_get_max_word_length (config);
+ p->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
+ p->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
+ p->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
+ p->max_words = tracker_fts_config_get_max_words_to_index (config);
+
+ /* disable stop words if TRACKER_FTS_STOP_WORDS is set to 0 - used by tests
+ * otherwise, get value from the conf file */
+ p->ignore_stop_words = (g_strcmp0 (g_getenv ("TRACKER_FTS_STOP_WORDS"), "0") == 0 ?
+ FALSE : tracker_fts_config_get_ignore_stop_words (config));
+
+ g_object_unref (config);
+
+ return p;
}
-/*
-** Destroy a tokenizer
-*/
-static int trackerDestroy(sqlite3_tokenizer *pTokenizer){
- TrackerTokenizer *p = (TrackerTokenizer *)pTokenizer;
- g_object_unref (p->language);
- sqlite3_free(p);
- return SQLITE_OK;
+static void
+tracker_tokenizer_data_free (gpointer user_data)
+{
+ TrackerTokenizerData *data = user_data;
+
+ g_object_unref (data->language);
+ g_free (data);
}
-/*
-** Prepare to begin tokenizing a particular string. The input
-** string to be tokenized is pInput[0..nBytes-1]. A cursor
-** used to incrementally tokenize this string is returned in
-** *ppCursor.
-*/
-static int trackerOpen(
- sqlite3_tokenizer *pTokenizer, /* The tokenizer */
- const char *zInput, /* Input string */
- int nInput, /* Length of zInput in bytes */
- sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
-){
- TrackerTokenizer *p = (TrackerTokenizer *)pTokenizer;
- TrackerParser *parser;
- TrackerCursor *pCsr;
-
- if ( nInput<0 ){
- nInput = strlen(zInput);
- }
-
- parser = tracker_parser_new (p->language);
- tracker_parser_reset (parser, zInput, nInput,
- p->max_word_length,
- p->enable_stemmer,
- p->enable_unaccent,
- p->ignore_stop_words,
- TRUE,
- p->ignore_numbers);
-
- pCsr = (TrackerCursor *)sqlite3_malloc(sizeof(TrackerCursor));
- memset(pCsr, 0, sizeof(TrackerCursor));
- pCsr->tokenizer = p;
- pCsr->parser = parser;
-
- *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
- return SQLITE_OK;
+static int
+offsets_tokenizer_func (void *data,
+ int flags,
+ const char *token,
+ int n_token,
+ int start,
+ int end)
+{
+ GArray *offsets = data;
+ g_array_append_val (offsets, start);
+ return SQLITE_OK;
}
-/*
-** Close a tokenization cursor.
-*/
-static int trackerClose(sqlite3_tokenizer_cursor *pCursor){
- TrackerCursor *pCsr = (TrackerCursor *)pCursor;
-
- tracker_parser_free (pCsr->parser);
- sqlite3_free(pCsr);
- return SQLITE_OK;
+static void
+tracker_offsets_function (const Fts5ExtensionApi *api,
+ Fts5Context *fts_ctx,
+ sqlite3_context *ctx,
+ int n_args,
+ sqlite3_value **args)
+{
+ GString *str;
+ int rc, n_hits, i;
+ GArray *offsets = NULL;
+ const gchar * const *property_names;
+ gint cur_col = -1;
+
+ if (n_args > 0) {
+ sqlite3_result_error (ctx, "Invalid argument count", -1);
+ return;
+ }
+
+ property_names = api->xUserData (fts_ctx);
+ rc = api->xInstCount (fts_ctx, &n_hits);
+
+ if (rc != SQLITE_OK) {
+ sqlite3_result_null (ctx);
+ return;
+ }
+
+ str = g_string_new (NULL);
+
+ for (i = 0; i < n_hits; i++) {
+ int phrase, col, n_token;
+
+ rc = api->xInst (fts_ctx, i, &phrase, &col, &n_token);
+
+ if (cur_col != col) {
+ const char *text;
+ int length;
+
+ if (offsets)
+ g_array_free (offsets, TRUE);
+
+ offsets = g_array_new (FALSE, FALSE, sizeof (gint));
+ api->xColumnText (fts_ctx, col, &text, &length);
+ api->xTokenize (fts_ctx, text, length,
+ offsets, &offsets_tokenizer_func);
+ cur_col = col;
+ }
+
+ if (str->len != 0)
+ g_string_append_c (str, ',');
+
+ if (rc != SQLITE_OK)
+ break;
+
+ g_string_append_printf (str, "%s,%d",
+ property_names[col],
+ g_array_index (offsets, gint, n_token));
+ }
+
+ if (offsets)
+ g_array_free (offsets, TRUE);
+
+ if (rc == SQLITE_OK) {
+ sqlite3_result_text (ctx, str->str, str->len, g_free);
+ g_string_free (str, FALSE);
+ } else {
+ sqlite3_result_error_code (ctx, rc);
+ g_string_free (str, TRUE);
+ }
}
-/*
-** Extract the next token from a tokenization cursor.
-*/
-static int trackerNext(
- sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
- const char **ppToken, /* OUT: *ppToken is the token text */
- int *pnBytes, /* OUT: Number of bytes in token */
- int *piStartOffset, /* OUT: Starting offset of token */
- int *piEndOffset, /* OUT: Ending offset of token */
- int *piPosition /* OUT: Position integer of token */
-){
- TrackerCursor *cursor = (TrackerCursor *) pCursor;
- TrackerTokenizer *p;
- const gchar *pToken;
- gboolean stop_word;
- int pos, start, end, len;
-
- p = cursor->tokenizer;
-
- if (cursor->n_words > p->max_words){
- return SQLITE_DONE;
- }
-
- do {
- pToken = tracker_parser_next (cursor->parser,
- &pos,
- &start, &end,
- &stop_word,
- &len);
-
- if (!pToken){
- return SQLITE_DONE;
- }
- } while (stop_word && p->ignore_stop_words);
-
- *ppToken = pToken;
- *piStartOffset = start;
- *piEndOffset = end;
- *piPosition = pos;
- *pnBytes = len;
-
- cursor->n_words++;
-
- return SQLITE_OK;
+static GHashTable *
+get_fts_weights (sqlite3_context *context)
+{
+ static GHashTable *weights = NULL;
+ static GMutex mutex;
+ int rc = SQLITE_DONE;
+
+ g_mutex_lock (&mutex);
+
+ if (G_UNLIKELY (weights == NULL)) {
+ sqlite3_stmt *stmt;
+ sqlite3 *db;
+ const gchar *uri;
+
+ weights = g_hash_table_new (g_str_hash, g_str_equal);
+ db = sqlite3_context_db_handle (context);
+ rc = sqlite3_prepare_v2 (db,
+ "SELECT \"rdf:Property\".\"tracker:weight\", "
+ "(SELECT Uri FROM Resource where Resource.ID=\"rdf:Property\".ID) "
+ "FROM \"rdf:Property\" "
+ "WHERE \"rdf:Property\".\"tracker:fulltextIndexed\" = 1 ",
+ -1, &stmt, NULL);
+
+ while ((rc = sqlite3_step (stmt)) != SQLITE_DONE) {
+ if (rc == SQLITE_ROW) {
+ TrackerProperty *property;
+ guint weight;
+
+ weight = sqlite3_column_int (stmt, 0);
+ uri = sqlite3_column_text (stmt, 1);
+
+ property = tracker_ontologies_get_property_by_uri (uri);
+ g_hash_table_insert (weights,
+ (gpointer) tracker_property_get_name (property),
+ GUINT_TO_POINTER (weight));
+ } else if (rc != SQLITE_BUSY) {
+ break;
+ }
+ }
+
+ sqlite3_finalize (stmt);
+
+ if (rc != SQLITE_DONE) {
+ g_hash_table_destroy (weights);
+ weights = NULL;
+ }
+ }
+
+ g_mutex_unlock (&mutex);
+
+ return weights;
}
-/*
-** The set of routines that implement the simple tokenizer
-*/
-static const sqlite3_tokenizer_module trackerTokenizerModule = {
- 0, /* iVersion */
- trackerCreate, /* xCreate */
- trackerDestroy, /* xDestroy */
- trackerOpen, /* xOpen */
- trackerClose, /* xClose */
- trackerNext, /* xNext */
-};
+static void
+tracker_rank_function (const Fts5ExtensionApi *api,
+ Fts5Context *fts_ctx,
+ sqlite3_context *ctx,
+ int n_args,
+ sqlite3_value **args)
+{
+ const gchar * const *property_names;
+ int i, rc, n_columns, n_tokens;
+ GHashTable *weights;
+ gdouble rank = 0;
+
+ if (n_args != 0) {
+ sqlite3_result_error (ctx, "Invalid argument count", -1);
+ return;
+ }
+
+ n_columns = api->xColumnCount (fts_ctx);
+ property_names = api->xUserData (fts_ctx);
+ weights = get_fts_weights (ctx);
+
+ if (!weights) {
+ sqlite3_result_error (ctx, "Could not read FTS weights", -1);
+ return;
+ }
+
+ for (i = 0; i < n_columns; i++) {
+ const gchar *property;
+ guint weight;
+
+ rc = api->xColumnSize (fts_ctx, i, &n_tokens);
+ if (rc != SQLITE_OK)
+ break;
+
+ if (n_tokens <= 0)
+ continue;
+
+ property = property_names[i];
+ weight = GPOINTER_TO_UINT (g_hash_table_lookup (weights, property));
+ rank += weight;
+ }
+
+ if (rc == SQLITE_OK) {
+ sqlite3_result_double (ctx, rank);
+ } else {
+ sqlite3_result_error_code (ctx, rc);
+ }
+}
-/*
-** Set *ppModule to point at the implementation of the tracker tokenizer.
-*/
-gboolean tracker_tokenizer_initialize (sqlite3 *db) {
- const sqlite3_tokenizer_module *pTokenizer;
- int rc = SQLITE_OK;
- sqlite3_stmt *stmt;
-
- pTokenizer = &trackerTokenizerModule;
- rc = sqlite3_prepare_v2(db, "SELECT fts3_tokenizer(?, ?)",
- -1, &stmt, 0);
-
- if (rc != SQLITE_OK) {
- return FALSE;
- }
-
- sqlite3_bind_text(stmt, 1, "TrackerTokenizer", -1, SQLITE_STATIC);
- sqlite3_bind_blob(stmt, 2, &pTokenizer, sizeof(pTokenizer), SQLITE_STATIC);
- sqlite3_step(stmt);
- rc = sqlite3_finalize(stmt);
-
- return (rc == SQLITE_OK);
+static fts5_api *
+get_fts5_api (sqlite3 *db) {
+ int rc = SQLITE_OK;
+ sqlite3_stmt *stmt;
+ fts5_api *api;
+
+ rc = sqlite3_prepare_v2(db, "SELECT fts5()",
+ -1, &stmt, 0);
+
+ if (rc != SQLITE_OK) {
+ return NULL;
+ }
+
+ if (sqlite3_step (stmt) != SQLITE_ROW) {
+ return NULL;
+ }
+
+ memcpy (&api, sqlite3_column_blob (stmt, 0), sizeof (api));
+ sqlite3_finalize (stmt);
+
+ return api;
+}
+
+gboolean
+tracker_tokenizer_initialize (sqlite3 *db,
+ const gchar **property_names)
+{
+ TrackerTokenizerData *data;
+ fts5_tokenizer *tokenizer;
+ fts5_api *api;
+
+ api = get_fts5_api (db);
+
+ if (!api)
+ return FALSE;
+
+ data = tracker_tokenizer_data_new ();
+ tokenizer = (fts5_tokenizer *) &tracker_tokenizer_module;
+ api->xCreateTokenizer (api, "TrackerTokenizer", data, tokenizer,
+ tracker_tokenizer_data_free);
+
+ /* Offsets */
+ api->xCreateFunction (api, "tracker_offsets",
+ g_strdupv ((gchar **) property_names),
+ &tracker_offsets_function,
+ (GDestroyNotify) g_strfreev);
+
+ /* Rank */
+ api->xCreateFunction (api, "tracker_rank",
+ g_strdupv ((gchar **) property_names),
+ &tracker_rank_function,
+ (GDestroyNotify) g_strfreev);
+
+ return TRUE;
}
diff --git a/src/libtracker-fts/tracker-fts-tokenizer.h b/src/libtracker-fts/tracker-fts-tokenizer.h
index 3e86295a5..30843d23e 100644
--- a/src/libtracker-fts/tracker-fts-tokenizer.h
+++ b/src/libtracker-fts/tracker-fts-tokenizer.h
@@ -21,11 +21,11 @@
#include <sqlite3.h>
#include <glib.h>
-#include "fts3_tokenizer.h"
#ifndef __TRACKER_FTS_TOKENIZER_H__
#define __TRACKER_FTS_TOKENIZER_H__
-gboolean tracker_tokenizer_initialize (sqlite3 *db);
+gboolean tracker_tokenizer_initialize (sqlite3 *db,
+ const gchar **property_names);
#endif /* __TRACKER_FTS_TOKENIZER_H__ */
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index 0571fb0f4..deb951a23 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -28,13 +28,16 @@
#ifndef HAVE_BUILTIN_FTS
-#include "fts3.h"
+#include "sqlite3.h"
+#include "fts5.h"
+
static gsize module_initialized = 0;
+int sqlite3_fts5_init ();
+
#endif
static gboolean initialized = FALSE;
-static GPrivate property_names_key = G_PRIVATE_INIT ((GDestroyNotify) g_strfreev);
gboolean
@@ -47,13 +50,13 @@ tracker_fts_init (void)
#ifdef HAVE_BUILTIN_FTS
initialized = TRUE;
- /* SQLite has all needed FTS4 features compiled in */
+ /* SQLite has all needed FTS5 features compiled in */
return TRUE;
#else
int rc = SQLITE_OK;
if (g_once_init_enter (&module_initialized)) {
- rc = sqlite3_auto_extension ((void (*) (void)) fts4_extension_init);
+ rc = sqlite3_auto_extension ((void (*) (void)) sqlite3_fts5_init);
g_once_init_leave (&module_initialized, (rc == SQLITE_OK));
}
@@ -70,202 +73,45 @@ tracker_fts_shutdown (void)
return TRUE;
}
- /* Nothing to do, there is no fts4_extension_shutdown() */
initialized = FALSE;
return TRUE;
}
-static void
-function_rank (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- guint *matchinfo, *weights;
- gdouble rank = 0;
- gint i, n_columns;
-
- if (argc != 2) {
- sqlite3_result_error(context,
- "wrong number of arguments to function rank()",
- -1);
- return;
- }
-
- matchinfo = (unsigned int *) sqlite3_value_blob (argv[0]);
- weights = (unsigned int *) sqlite3_value_blob (argv[1]);
- n_columns = matchinfo[0];
-
- for (i = 0; i < n_columns; i++) {
- if (matchinfo[i + 1] != 0) {
- rank += (gdouble) weights[i];
- }
- }
-
- sqlite3_result_double(context, rank);
-}
-
-static void
-function_offsets (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
+static gchar **
+get_fts_properties (GHashTable *tables)
{
- gchar *offsets;
- const gchar * const * names;
- gint offset_values[4];
- GString *result = NULL;
- gint i = 0;
-
- if (argc != 2) {
- sqlite3_result_error(context,
- "wrong number of arguments to function tracker_offsets()",
- -1);
- return;
- }
-
- offsets = (gchar *) sqlite3_value_text (argv[0]);
- names = sqlite3_value_blob (argv[1]);
-
- while (offsets && *offsets) {
- offset_values[i] = g_strtod (offsets, &offsets);
-
- /* All 4 values from the quartet have been gathered */
- if (i == 3) {
- if (!result) {
- result = g_string_new ("");
- } else {
- g_string_append_c (result, ',');
- }
-
- g_string_append_printf (result,
- "%s,%d",
- names[offset_values[0]],
- offset_values[2]);
-
- }
-
- i = (i + 1) % 4;
- }
-
- sqlite3_result_text (context,
- (result) ? g_string_free (result, FALSE) : NULL,
- -1, g_free);
-}
-
-static void
-function_weights (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- static guint *weights = NULL;
- static GMutex mutex;
- static gint length;
- int rc = SQLITE_DONE;
-
- g_mutex_lock (&mutex);
-
- if (G_UNLIKELY (weights == NULL)) {
- GArray *weight_array;
- sqlite3_stmt *stmt;
- sqlite3 *db;
-
- weight_array = g_array_new (FALSE, FALSE, sizeof (guint));
- db = sqlite3_context_db_handle (context);
- rc = sqlite3_prepare_v2 (db,
- "SELECT \"rdf:Property\".\"tracker:weight\" "
- "FROM \"rdf:Property\" "
- "WHERE \"rdf:Property\".\"tracker:fulltextIndexed\" = 1 "
- "ORDER BY \"rdf:Property\".ID ",
- -1, &stmt, NULL);
-
- while ((rc = sqlite3_step (stmt)) != SQLITE_DONE) {
- if (rc == SQLITE_ROW) {
- guint weight;
- weight = sqlite3_column_int (stmt, 0);
- g_array_append_val (weight_array, weight);
- } else if (rc != SQLITE_BUSY) {
- break;
- }
- }
+ GList *table_columns, *columns;
+ gchar **property_names;
+ GHashTableIter iter;
- sqlite3_finalize (stmt);
+ columns = NULL;
+ g_hash_table_iter_init (&iter, tables);
- if (rc == SQLITE_DONE) {
- length = weight_array->len * g_array_get_element_size (weight_array);
- weights = (guint *) g_array_free (weight_array, FALSE);
- } else {
- g_array_free (weight_array, TRUE);
- }
+ while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &table_columns)) {
+ columns = g_list_concat (columns, g_list_copy (table_columns));
}
- g_mutex_unlock (&mutex);
-
- if (rc == SQLITE_DONE)
- sqlite3_result_blob (context, weights, length, NULL);
- else
- sqlite3_result_error_code (context, rc);
-}
-
-static void
-function_property_names (sqlite3_context *context,
- int argc,
- sqlite3_value *argv[])
-{
- gchar **property_names;
- gint length;
-
- property_names = g_private_get (&property_names_key);
- length = g_strv_length (property_names) * sizeof (gchar *);
- sqlite3_result_blob (context, property_names, length, NULL);
-}
+ property_names = tracker_glist_to_string_list (columns);
+ g_list_free (columns);
-static void
-fts_register_functions (sqlite3 *db)
-{
- sqlite3_create_function (db, "tracker_rank", 2, SQLITE_ANY,
- NULL, &function_rank,
- NULL, NULL);
- sqlite3_create_function (db, "tracker_offsets", 2, SQLITE_ANY,
- NULL, &function_offsets,
- NULL, NULL);
- sqlite3_create_function (db, "fts_column_weights", 0, SQLITE_ANY,
- NULL, &function_weights,
- NULL, NULL);
- sqlite3_create_function (db, "fts_property_names", 0, SQLITE_ANY,
- NULL, &function_property_names,
- NULL, NULL);
+ return property_names;
}
gboolean
tracker_fts_init_db (sqlite3 *db,
GHashTable *tables)
{
- GHashTableIter iter;
- GList *columns;
- GList *table_columns;
gchar **property_names;
+ gboolean retval;
g_return_val_if_fail (initialized == TRUE, FALSE);
- if (!tracker_tokenizer_initialize (db)) {
- return FALSE;
- }
-
- /* Set up GStrv 'property_names' */
- columns = NULL;
- g_hash_table_iter_init (&iter, tables);
- while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &table_columns)) {
- columns = g_list_concat (columns, g_list_copy (table_columns));
- }
-
- property_names = tracker_glist_to_string_list (columns);
- g_private_replace (&property_names_key, property_names);
- g_list_free (columns);
-
- /* Register functions with the database, including one to get property names */
- fts_register_functions (db);
+ property_names = get_fts_properties (tables);
+ retval = tracker_tokenizer_initialize (db, (const gchar **) property_names);
+ g_strfreev (property_names);
- return TRUE;
+ return retval;
}
gboolean
@@ -282,13 +128,16 @@ tracker_fts_create_table (sqlite3 *db,
g_return_val_if_fail (initialized == TRUE, FALSE);
+ if (g_hash_table_size (tables) == 0)
+ return TRUE;
+
/* Create view on tables/columns marked as FTS-indexed */
g_hash_table_iter_init (&iter, tables);
str = g_string_new ("CREATE VIEW fts_view AS SELECT Resource.ID as rowid ");
from = g_string_new ("FROM Resource ");
fts = g_string_new ("CREATE VIRTUAL TABLE ");
- g_string_append_printf (fts, "%s USING fts4(content=\"fts_view\", ",
+ g_string_append_printf (fts, "%s USING fts5(content=\"fts_view\", ",
table_name);
while (g_hash_table_iter_next (&iter, (gpointer *) &index_table,
@@ -321,17 +170,28 @@ tracker_fts_create_table (sqlite3 *db,
g_string_append (str, from->str);
g_string_free (from, TRUE);
- rc = sqlite3_exec(db, str->str, NULL, 0, NULL);
+ rc = sqlite3_exec(db, str->str, NULL, NULL, NULL);
g_string_free (str, TRUE);
if (rc != SQLITE_OK) {
+ g_assert_not_reached();
return FALSE;
}
g_string_append (fts, "tokenize=TrackerTokenizer)");
- rc = sqlite3_exec(db, fts->str, NULL, 0, NULL);
+ rc = sqlite3_exec(db, fts->str, NULL, NULL, NULL);
g_string_free (fts, TRUE);
+ if (rc != SQLITE_OK)
+ return FALSE;
+
+ str = g_string_new (NULL);
+ g_string_append_printf (str,
+ "INSERT INTO %s(%s, rank) VALUES('rank', 'tracker_rank()')",
+ table_name, table_name);
+ rc = sqlite3_exec (db, str->str, NULL, NULL, NULL);
+ g_string_free (str, TRUE);
+
return (rc == SQLITE_OK);
}
@@ -349,17 +209,21 @@ tracker_fts_alter_table (sqlite3 *db,
tmp_name = g_strdup_printf ("%s_TMP", table_name);
query = g_strdup_printf ("DROP VIEW fts_view");
- rc = sqlite3_prepare_v2 (db, query, -1, NULL, NULL);
+ rc = sqlite3_exec (db, query, NULL, NULL, NULL);
+ g_free (query);
+
+ query = g_strdup_printf ("DROP TABLE fts5");
+ rc = sqlite3_exec (db, query, NULL, NULL, NULL);
+ g_free (query);
if (!tracker_fts_create_table (db, tmp_name, tables, grouped_columns)) {
g_free (tmp_name);
- g_free (query);
return FALSE;
}
- query = g_strdup_printf ("INSERT INTO %s (docid) SELECT docid FROM %s",
- tmp_name, table_name);
- rc = sqlite3_prepare_v2 (db, query, -1, NULL, NULL);
+ query = g_strdup_printf ("INSERT INTO %s (rowid) SELECT rowid FROM fts_view",
+ tmp_name);
+ rc = sqlite3_exec (db, query, NULL, NULL, NULL);
g_free (query);
if (rc != SQLITE_OK) {
@@ -369,7 +233,7 @@ tracker_fts_alter_table (sqlite3 *db,
query = g_strdup_printf ("INSERT INTO %s(%s) VALUES('rebuild')",
tmp_name, tmp_name);
- rc = sqlite3_prepare_v2 (db, query, -1, NULL, NULL);
+ rc = sqlite3_exec (db, query, NULL, NULL, NULL);
g_free (query);
if (rc != SQLITE_OK) {
@@ -379,17 +243,11 @@ tracker_fts_alter_table (sqlite3 *db,
query = g_strdup_printf ("ALTER TABLE %s RENAME TO %s",
tmp_name, table_name);
- rc = sqlite3_prepare_v2 (db, query, -1, NULL, NULL);
+ rc = sqlite3_exec (db, query, NULL, NULL, NULL);
g_free (query);
g_free (tmp_name);
-
- if (rc != SQLITE_OK) {
- g_free (tmp_name);
- return FALSE;
- }
-
- return TRUE;
+ return rc == SQLITE_OK;
}
void
@@ -401,6 +259,6 @@ tracker_fts_rebuild_tokens (sqlite3 *db,
/* This special query rebuilds the tokens in the given FTS table */
query = g_strdup_printf ("INSERT INTO %s(%s) VALUES('rebuild')",
table_name, table_name);
- sqlite3_exec(db, query, NULL, 0, NULL);
+ sqlite3_exec(db, query, NULL, NULL, NULL);
g_free (query);
}
diff --git a/src/ontologies/32-nco.ontology b/src/ontologies/32-nco.ontology
index a7c80f5ba..6e7abb5a9 100644
--- a/src/ontologies/32-nco.ontology
+++ b/src/ontologies/32-nco.ontology
@@ -11,7 +11,7 @@
nco: a tracker:Namespace, tracker:Ontology ;
tracker:prefix "nco" ;
- nao:lastModified "2011-09-29T12:20:00Z" .
+ nao:lastModified "2016-02-28T21:30:00Z" .
nco:Role a rdfs:Class ;
rdfs:label "Role" ;
@@ -599,7 +599,6 @@ nco:hobby a rdf:Property ;
nrl:maxCardinality 1 ;
rdfs:domain nco:PersonContact ;
rdfs:range xsd:string ;
- tracker:fulltextIndexed true ;
tracker:weight 2 .
# Same remarks as in url