summaryrefslogtreecommitdiff
path: root/libappstream-glib/as-stemmer.c
diff options
context:
space:
mode:
authorRichard Hughes <richard@hughsie.com>2016-06-27 17:11:00 +0100
committerRichard Hughes <richard@hughsie.com>2016-06-28 08:12:01 +0100
commit63d2d89eca6c185c251c02036328fec5a6692623 (patch)
tree73e5e7c7892b04ab6bde3c2e2d9f4bfddbdbed08 /libappstream-glib/as-stemmer.c
parenta520edea8874bb0dd914d9e13e9f34aaff8d7e2f (diff)
downloadappstream-glib-63d2d89eca6c185c251c02036328fec5a6692623.tar.gz
Optionally use libstemmer for keyword stemming
This allows us to search for 'networking', 'networks', or 'networked' and to return results for all network* keywords.
Diffstat (limited to 'libappstream-glib/as-stemmer.c')
-rw-r--r--libappstream-glib/as-stemmer.c120
1 files changed, 120 insertions, 0 deletions
diff --git a/libappstream-glib/as-stemmer.c b/libappstream-glib/as-stemmer.c
new file mode 100644
index 0000000..4af4410
--- /dev/null
+++ b/libappstream-glib/as-stemmer.c
@@ -0,0 +1,120 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
+ *
+ * Copyright (C) 2016 Richard Hughes <richard@hughsie.com>
+ *
+ * Licensed under the GNU General Public License Version 2
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+
+#include <glib/gi18n.h>
+
+#ifdef HAVE_LIBSTEMMER
+ #include "libstemmer.h"
+#endif
+
+#include "as-stemmer.h"
+
+struct _AsStemmer
+{
+ GObject parent_instance;
+ struct sb_stemmer *ctx;
+ GMutex ctx_mutex;
+};
+
+G_DEFINE_TYPE (AsStemmer, as_stemmer, G_TYPE_OBJECT)
+
+static gpointer as_stemmer_object = NULL;
+
+/**
+ * as_stemmer_process:
+ * @stemmer: A #AsStemmer
+ * @value: The input string
+ *
+ * Stems a string using the Porter algorithm.
+ *
+ * Since: 0.2.2
+ *
+ * Returns: A new string
+ **/
+gchar *
+as_stemmer_process (AsStemmer *stemmer, const gchar *value)
+{
+#ifdef HAVE_LIBSTEMMER
+ gchar *new;
+ g_autoptr(GMutexLocker) locker = g_mutex_locker_new (&stemmer->ctx_mutex);
+ if (stemmer->ctx == NULL)
+ return g_strdup (value);
+ new = g_strdup ((gchar *) sb_stemmer_stem (stemmer->ctx,
+ (guchar *) value,
+ strlen (value)));
+// if (g_strcmp0 (value, new) != 0)
+// g_debug ("stemmed %s->%s", value, new);
+ return new;
+#else
+ return g_strdup (value);
+#endif
+}
+
+static void
+as_stemmer_finalize (GObject *object)
+{
+ AsStemmer *stemmer = AS_STEMMER (object);
+#ifdef HAVE_LIBSTEMMER
+ sb_stemmer_delete (stemmer->ctx);
+ g_mutex_clear (&stemmer->ctx_mutex);
+#endif
+ G_OBJECT_CLASS (as_stemmer_parent_class)->finalize (object);
+}
+
+static void
+as_stemmer_class_init (AsStemmerClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+ object_class->finalize = as_stemmer_finalize;
+}
+
+static void
+as_stemmer_init (AsStemmer *stemmer)
+{
+ /* FIXME: use as_utils_locale_to_language()? */
+#ifdef HAVE_LIBSTEMMER
+ stemmer->ctx = sb_stemmer_new ("en", NULL);
+ g_mutex_init (&stemmer->ctx_mutex);
+#endif
+}
+
+/**
+ * as_stemmer_new:
+ *
+ * Creates a new #AsStemmer.
+ *
+ * Returns: (transfer full): a #AsStemmer
+ *
+ * Since: 0.2.2
+ **/
+AsStemmer *
+as_stemmer_new (void)
+{
+ if (as_stemmer_object != NULL) {
+ g_object_ref (as_stemmer_object);
+ } else {
+ as_stemmer_object = g_object_new (AS_TYPE_STEMMER, NULL);
+ g_object_add_weak_pointer (as_stemmer_object, &as_stemmer_object);
+ }
+ return AS_STEMMER (as_stemmer_object);
+}