diff options
author | Richard Hughes <richard@hughsie.com> | 2016-11-17 22:44:15 +0000 |
---|---|---|
committer | Richard Hughes <richard@hughsie.com> | 2016-11-21 12:31:35 +0000 |
commit | c21087431fa22214057e53c3a5ac032722f415e0 (patch) | |
tree | 003cfbac994fa0a156cea93a4271354212827a5b | |
parent | 871cef89deba05bce68d6797dbc1fd3cefa575a6 (diff) | |
download | appstream-glib-c21087431fa22214057e53c3a5ac032722f415e0.tar.gz |
Cache the stemmer results in a hash table
This reduces the time it takes to populate the token cache by 60%.
-rw-r--r-- | libappstream-glib/as-app.c | 18 | ||||
-rw-r--r-- | libappstream-glib/as-self-test.c | 2 | ||||
-rw-r--r-- | libappstream-glib/as-stemmer.c | 46 | ||||
-rw-r--r-- | libappstream-glib/as-stemmer.h | 2 | ||||
-rw-r--r-- | libappstream-glib/as-store.c | 4 |
5 files changed, 50 insertions, 22 deletions
diff --git a/libappstream-glib/as-app.c b/libappstream-glib/as-app.c index df702f5..3aed890 100644 --- a/libappstream-glib/as-app.c +++ b/libappstream-glib/as-app.c @@ -110,8 +110,8 @@ typedef struct AsRefString *branch; gint priority; gsize token_cache_valid; - GHashTable *token_cache; /* of string:AsAppTokenType* */ - GHashTable *search_blacklist; /* of stemmed-string:1 */ + GHashTable *token_cache; /* of AsRefString:AsAppTokenType* */ + GHashTable *search_blacklist; /* of AsRefString:1 */ } AsAppPrivate; G_DEFINE_TYPE_WITH_PRIVATE (AsApp, as_app, G_TYPE_OBJECT) @@ -566,7 +566,9 @@ as_app_init (AsApp *app) priv->urls = g_hash_table_new_full (g_str_hash, g_str_equal, (GDestroyNotify) as_ref_string_unref, (GDestroyNotify) as_ref_string_unref); - priv->token_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); + priv->token_cache = g_hash_table_new_full (g_str_hash, g_str_equal, + (GDestroyNotify) as_ref_string_unref, + g_free); priv->search_match = AS_APP_SEARCH_MATCH_LAST; } @@ -5128,7 +5130,7 @@ as_app_add_token_internal (AsApp *app, { AsAppPrivate *priv = GET_PRIVATE (app); AsAppTokenType *match_pval; - g_autofree gchar *value_stem = NULL; + g_autoptr(AsRefString) value_stem = NULL; /* invalid */ if (!as_utils_search_token_valid (value)) @@ -5138,7 +5140,7 @@ as_app_add_token_internal (AsApp *app, if (priv->stemmer != NULL) value_stem = as_stemmer_process (priv->stemmer, value); if (value_stem == NULL) - value_stem = g_utf8_strdown (value, -1); + return; /* blacklisted */ if (priv->search_blacklist != NULL && @@ -5156,7 +5158,7 @@ as_app_add_token_internal (AsApp *app, match_pval = g_new0 (AsAppTokenType, 1); *match_pval = match_flag; g_hash_table_insert (priv->token_cache, - g_steal_pointer (&value_stem), + as_ref_string_ref (value_stem), match_pval); } @@ -5326,7 +5328,7 @@ as_app_search_matches (AsApp *app, const gchar *search) GList *l; AsAppSearchMatch result = 0; g_autoptr(GList) keys = NULL; - g_autofree gchar *search_stem = NULL; + g_autoptr(AsRefString) search_stem = NULL; /* ensure the token cache is created */ if (g_once_init_enter (&priv->token_cache_valid)) { @@ -5342,7 +5344,7 @@ as_app_search_matches (AsApp *app, const gchar *search) if (priv->stemmer != NULL) search_stem = as_stemmer_process (priv->stemmer, search); if (search_stem == NULL) - search_stem = g_utf8_strdown (search, -1); + return 0; match_pval = g_hash_table_lookup (priv->token_cache, search_stem); if (match_pval != NULL) return (guint) *match_pval << 2; diff --git a/libappstream-glib/as-self-test.c b/libappstream-glib/as-self-test.c index 09c9619..49be359 100644 --- a/libappstream-glib/as-self-test.c +++ b/libappstream-glib/as-self-test.c @@ -2706,8 +2706,10 @@ as_test_app_search_func (void) const gchar *mime[] = { "application/vnd.oasis.opendocument.text", NULL }; g_autoptr(AsApp) app = NULL; g_autoptr(GHashTable) search_blacklist = NULL; + g_autoptr(AsStemmer) stemmer = as_stemmer_new (); app = as_app_new (); + as_app_set_stemmer (app, stemmer); as_app_set_id (app, "gnome-software"); as_app_add_pkgname (app, "gnome-software"); as_app_set_name (app, NULL, "GNOME Software X-Plane"); diff --git a/libappstream-glib/as-stemmer.c b/libappstream-glib/as-stemmer.c index b5017ad..af043c7 100644 --- a/libappstream-glib/as-stemmer.c +++ b/libappstream-glib/as-stemmer.c @@ -28,11 +28,13 @@ #endif #include "as-stemmer.h" +#include "as-ref-string.h" struct _AsStemmer { GObject parent_instance; gboolean enabled; + GHashTable *hash; struct sb_stemmer *ctx; GMutex ctx_mutex; }; @@ -48,35 +50,54 @@ G_DEFINE_TYPE (AsStemmer, as_stemmer, G_TYPE_OBJECT) * * Since: 0.2.2 * - * Returns: A new string + * Returns: A new refcounted string **/ -gchar * +const gchar * as_stemmer_process (AsStemmer *stemmer, const gchar *value) { #ifdef HAVE_LIBSTEMMER - gchar *new; + AsRefString *new; + const gchar *tmp; + gsize value_len; g_autoptr(GMutexLocker) locker = g_mutex_locker_new (&stemmer->ctx_mutex); + + /* look for word in the cache */ + new = g_hash_table_lookup (stemmer->hash, value); + if (new != NULL) + return as_ref_string_ref (new); + + /* not enabled */ if (stemmer->ctx == NULL || !stemmer->enabled) - return g_strdup (value); - new = g_strdup ((gchar *) sb_stemmer_stem (stemmer->ctx, - (guchar *) value, - strlen (value))); -// if (g_strcmp0 (value, new) != 0) -// g_debug ("stemmed %s->%s", value, new); + return as_ref_string_new (value); + + /* stem, then add to the cache */ + value_len = strlen (value); + tmp = (const gchar *) sb_stemmer_stem (stemmer->ctx, + (guchar *) value, + (gint) value_len); + if (value_len == (gsize) sb_stemmer_length (stemmer->ctx)) { + new = as_ref_string_new_with_length (value, value_len); + } else { + new = as_ref_string_new_copy (tmp); + } + g_hash_table_insert (stemmer->hash, + as_ref_string_new (value), + as_ref_string_ref (new)); return new; #else - return g_strdup (value); + return as_ref_string_new (value); #endif } static void as_stemmer_finalize (GObject *object) { -#ifdef HAVE_LIBSTEMMER AsStemmer *stemmer = AS_STEMMER (object); +#ifdef HAVE_LIBSTEMMER sb_stemmer_delete (stemmer->ctx); g_mutex_clear (&stemmer->ctx_mutex); #endif + g_hash_table_unref (stemmer->hash); G_OBJECT_CLASS (as_stemmer_parent_class)->finalize (object); } @@ -96,6 +117,9 @@ as_stemmer_init (AsStemmer *stemmer) g_mutex_init (&stemmer->ctx_mutex); #endif stemmer->enabled = g_getenv ("APPSTREAM_GLIB_DISABLE_STEMMER") == NULL; + stemmer->hash = g_hash_table_new_full (g_str_hash, g_str_equal, + (GDestroyNotify) as_ref_string_unref, + (GDestroyNotify) as_ref_string_unref); } /** diff --git a/libappstream-glib/as-stemmer.h b/libappstream-glib/as-stemmer.h index f277d84..1b77e3b 100644 --- a/libappstream-glib/as-stemmer.h +++ b/libappstream-glib/as-stemmer.h @@ -31,7 +31,7 @@ G_BEGIN_DECLS G_DECLARE_FINAL_TYPE (AsStemmer, as_stemmer, AS, STEMMER, GObject) AsStemmer *as_stemmer_new (void); -gchar *as_stemmer_process (AsStemmer *stemmer, +const gchar *as_stemmer_process (AsStemmer *stemmer, const gchar *value); G_END_DECLS diff --git a/libappstream-glib/as-store.c b/libappstream-glib/as-store.c index b46e481..1a4a594 100644 --- a/libappstream-glib/as-store.c +++ b/libappstream-glib/as-store.c @@ -73,7 +73,7 @@ typedef struct AsMonitor *monitor; GHashTable *metadata_indexes; /* GHashTable{key} */ GHashTable *appinfo_dirs; /* GHashTable{path:AsStorePathData} */ - GHashTable *search_blacklist; /* GHashTable{string:1} */ + GHashTable *search_blacklist; /* GHashTable{AsRefString:1} */ AsStoreAddFlags add_flags; AsStoreWatchFlags watch_flags; AsStoreProblems problems; @@ -3389,7 +3389,7 @@ as_store_init (AsStore *store) priv->search_match = AS_APP_SEARCH_MATCH_LAST; priv->search_blacklist = g_hash_table_new_full (g_str_hash, g_str_equal, - g_free, + (GDestroyNotify) as_ref_string_unref, NULL); priv->hash_id = g_hash_table_new_full (g_str_hash, g_str_equal, |