summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Hughes <richard@hughsie.com>2016-11-17 22:44:15 +0000
committerRichard Hughes <richard@hughsie.com>2016-11-21 15:19:55 +0000
commit3c7b5441bcbd35e5a8f195359653ae5d7af3062a (patch)
treef2e3a92a288541d1bf1049b9b13012811e96a506
parent67cc566b0b16f89a32349c0e3f348680970ede24 (diff)
downloadappstream-glib-3c7b5441bcbd35e5a8f195359653ae5d7af3062a.tar.gz
Cache the stemmer results in a hash table
This reduces the time it takes to populate the token cache by 60%.
-rw-r--r--libappstream-glib/as-app.c18
-rw-r--r--libappstream-glib/as-self-test.c2
-rw-r--r--libappstream-glib/as-stemmer.c46
-rw-r--r--libappstream-glib/as-stemmer.h2
-rw-r--r--libappstream-glib/as-store.c4
5 files changed, 50 insertions, 22 deletions
diff --git a/libappstream-glib/as-app.c b/libappstream-glib/as-app.c
index df702f5..3aed890 100644
--- a/libappstream-glib/as-app.c
+++ b/libappstream-glib/as-app.c
@@ -110,8 +110,8 @@ typedef struct
AsRefString *branch;
gint priority;
gsize token_cache_valid;
- GHashTable *token_cache; /* of string:AsAppTokenType* */
- GHashTable *search_blacklist; /* of stemmed-string:1 */
+ GHashTable *token_cache; /* of AsRefString:AsAppTokenType* */
+ GHashTable *search_blacklist; /* of AsRefString:1 */
} AsAppPrivate;
G_DEFINE_TYPE_WITH_PRIVATE (AsApp, as_app, G_TYPE_OBJECT)
@@ -566,7 +566,9 @@ as_app_init (AsApp *app)
priv->urls = g_hash_table_new_full (g_str_hash, g_str_equal,
(GDestroyNotify) as_ref_string_unref,
(GDestroyNotify) as_ref_string_unref);
- priv->token_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
+ priv->token_cache = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify) as_ref_string_unref,
+ g_free);
priv->search_match = AS_APP_SEARCH_MATCH_LAST;
}
@@ -5128,7 +5130,7 @@ as_app_add_token_internal (AsApp *app,
{
AsAppPrivate *priv = GET_PRIVATE (app);
AsAppTokenType *match_pval;
- g_autofree gchar *value_stem = NULL;
+ g_autoptr(AsRefString) value_stem = NULL;
/* invalid */
if (!as_utils_search_token_valid (value))
@@ -5138,7 +5140,7 @@ as_app_add_token_internal (AsApp *app,
if (priv->stemmer != NULL)
value_stem = as_stemmer_process (priv->stemmer, value);
if (value_stem == NULL)
- value_stem = g_utf8_strdown (value, -1);
+ return;
/* blacklisted */
if (priv->search_blacklist != NULL &&
@@ -5156,7 +5158,7 @@ as_app_add_token_internal (AsApp *app,
match_pval = g_new0 (AsAppTokenType, 1);
*match_pval = match_flag;
g_hash_table_insert (priv->token_cache,
- g_steal_pointer (&value_stem),
+ as_ref_string_ref (value_stem),
match_pval);
}
@@ -5326,7 +5328,7 @@ as_app_search_matches (AsApp *app, const gchar *search)
GList *l;
AsAppSearchMatch result = 0;
g_autoptr(GList) keys = NULL;
- g_autofree gchar *search_stem = NULL;
+ g_autoptr(AsRefString) search_stem = NULL;
/* ensure the token cache is created */
if (g_once_init_enter (&priv->token_cache_valid)) {
@@ -5342,7 +5344,7 @@ as_app_search_matches (AsApp *app, const gchar *search)
if (priv->stemmer != NULL)
search_stem = as_stemmer_process (priv->stemmer, search);
if (search_stem == NULL)
- search_stem = g_utf8_strdown (search, -1);
+ return 0;
match_pval = g_hash_table_lookup (priv->token_cache, search_stem);
if (match_pval != NULL)
return (guint) *match_pval << 2;
diff --git a/libappstream-glib/as-self-test.c b/libappstream-glib/as-self-test.c
index 09c9619..49be359 100644
--- a/libappstream-glib/as-self-test.c
+++ b/libappstream-glib/as-self-test.c
@@ -2706,8 +2706,10 @@ as_test_app_search_func (void)
const gchar *mime[] = { "application/vnd.oasis.opendocument.text", NULL };
g_autoptr(AsApp) app = NULL;
g_autoptr(GHashTable) search_blacklist = NULL;
+ g_autoptr(AsStemmer) stemmer = as_stemmer_new ();
app = as_app_new ();
+ as_app_set_stemmer (app, stemmer);
as_app_set_id (app, "gnome-software");
as_app_add_pkgname (app, "gnome-software");
as_app_set_name (app, NULL, "GNOME Software X-Plane");
diff --git a/libappstream-glib/as-stemmer.c b/libappstream-glib/as-stemmer.c
index b5017ad..af043c7 100644
--- a/libappstream-glib/as-stemmer.c
+++ b/libappstream-glib/as-stemmer.c
@@ -28,11 +28,13 @@
#endif
#include "as-stemmer.h"
+#include "as-ref-string.h"
struct _AsStemmer
{
GObject parent_instance;
gboolean enabled;
+ GHashTable *hash;
struct sb_stemmer *ctx;
GMutex ctx_mutex;
};
@@ -48,35 +50,54 @@ G_DEFINE_TYPE (AsStemmer, as_stemmer, G_TYPE_OBJECT)
*
* Since: 0.2.2
*
- * Returns: A new string
+ * Returns: A new refcounted string
**/
-gchar *
+const gchar *
as_stemmer_process (AsStemmer *stemmer, const gchar *value)
{
#ifdef HAVE_LIBSTEMMER
- gchar *new;
+ AsRefString *new;
+ const gchar *tmp;
+ gsize value_len;
g_autoptr(GMutexLocker) locker = g_mutex_locker_new (&stemmer->ctx_mutex);
+
+ /* look for word in the cache */
+ new = g_hash_table_lookup (stemmer->hash, value);
+ if (new != NULL)
+ return as_ref_string_ref (new);
+
+ /* not enabled */
if (stemmer->ctx == NULL || !stemmer->enabled)
- return g_strdup (value);
- new = g_strdup ((gchar *) sb_stemmer_stem (stemmer->ctx,
- (guchar *) value,
- strlen (value)));
-// if (g_strcmp0 (value, new) != 0)
-// g_debug ("stemmed %s->%s", value, new);
+ return as_ref_string_new (value);
+
+ /* stem, then add to the cache */
+ value_len = strlen (value);
+ tmp = (const gchar *) sb_stemmer_stem (stemmer->ctx,
+ (guchar *) value,
+ (gint) value_len);
+ if (value_len == (gsize) sb_stemmer_length (stemmer->ctx)) {
+ new = as_ref_string_new_with_length (value, value_len);
+ } else {
+ new = as_ref_string_new_copy (tmp);
+ }
+ g_hash_table_insert (stemmer->hash,
+ as_ref_string_new (value),
+ as_ref_string_ref (new));
return new;
#else
- return g_strdup (value);
+ return as_ref_string_new (value);
#endif
}
static void
as_stemmer_finalize (GObject *object)
{
-#ifdef HAVE_LIBSTEMMER
AsStemmer *stemmer = AS_STEMMER (object);
+#ifdef HAVE_LIBSTEMMER
sb_stemmer_delete (stemmer->ctx);
g_mutex_clear (&stemmer->ctx_mutex);
#endif
+ g_hash_table_unref (stemmer->hash);
G_OBJECT_CLASS (as_stemmer_parent_class)->finalize (object);
}
@@ -96,6 +117,9 @@ as_stemmer_init (AsStemmer *stemmer)
g_mutex_init (&stemmer->ctx_mutex);
#endif
stemmer->enabled = g_getenv ("APPSTREAM_GLIB_DISABLE_STEMMER") == NULL;
+ stemmer->hash = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify) as_ref_string_unref,
+ (GDestroyNotify) as_ref_string_unref);
}
/**
diff --git a/libappstream-glib/as-stemmer.h b/libappstream-glib/as-stemmer.h
index f277d84..1b77e3b 100644
--- a/libappstream-glib/as-stemmer.h
+++ b/libappstream-glib/as-stemmer.h
@@ -31,7 +31,7 @@ G_BEGIN_DECLS
G_DECLARE_FINAL_TYPE (AsStemmer, as_stemmer, AS, STEMMER, GObject)
AsStemmer *as_stemmer_new (void);
-gchar *as_stemmer_process (AsStemmer *stemmer,
+const gchar *as_stemmer_process (AsStemmer *stemmer,
const gchar *value);
G_END_DECLS
diff --git a/libappstream-glib/as-store.c b/libappstream-glib/as-store.c
index b46e481..1a4a594 100644
--- a/libappstream-glib/as-store.c
+++ b/libappstream-glib/as-store.c
@@ -73,7 +73,7 @@ typedef struct
AsMonitor *monitor;
GHashTable *metadata_indexes; /* GHashTable{key} */
GHashTable *appinfo_dirs; /* GHashTable{path:AsStorePathData} */
- GHashTable *search_blacklist; /* GHashTable{string:1} */
+ GHashTable *search_blacklist; /* GHashTable{AsRefString:1} */
AsStoreAddFlags add_flags;
AsStoreWatchFlags watch_flags;
AsStoreProblems problems;
@@ -3389,7 +3389,7 @@ as_store_init (AsStore *store)
priv->search_match = AS_APP_SEARCH_MATCH_LAST;
priv->search_blacklist = g_hash_table_new_full (g_str_hash,
g_str_equal,
- g_free,
+ (GDestroyNotify) as_ref_string_unref,
NULL);
priv->hash_id = g_hash_table_new_full (g_str_hash,
g_str_equal,