diff options
author | Richard Hughes <richard@hughsie.com> | 2016-06-27 17:11:00 +0100 |
---|---|---|
committer | Richard Hughes <richard@hughsie.com> | 2016-06-28 08:12:01 +0100 |
commit | 63d2d89eca6c185c251c02036328fec5a6692623 (patch) | |
tree | 73e5e7c7892b04ab6bde3c2e2d9f4bfddbdbed08 /libappstream-glib/as-stemmer.c | |
parent | a520edea8874bb0dd914d9e13e9f34aaff8d7e2f (diff) | |
download | appstream-glib-63d2d89eca6c185c251c02036328fec5a6692623.tar.gz |
Optionally use libstemmer for keyword stemming
This allows us to search for 'networking', 'networks', or 'networked' and to
return results for all network* keywords.
Diffstat (limited to 'libappstream-glib/as-stemmer.c')
-rw-r--r-- | libappstream-glib/as-stemmer.c | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/libappstream-glib/as-stemmer.c b/libappstream-glib/as-stemmer.c new file mode 100644 index 0000000..4af4410 --- /dev/null +++ b/libappstream-glib/as-stemmer.c @@ -0,0 +1,120 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- + * + * Copyright (C) 2016 Richard Hughes <richard@hughsie.com> + * + * Licensed under the GNU General Public License Version 2 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" + +#include <glib/gi18n.h> + +#ifdef HAVE_LIBSTEMMER + #include "libstemmer.h" +#endif + +#include "as-stemmer.h" + +struct _AsStemmer +{ + GObject parent_instance; + struct sb_stemmer *ctx; + GMutex ctx_mutex; +}; + +G_DEFINE_TYPE (AsStemmer, as_stemmer, G_TYPE_OBJECT) + +static gpointer as_stemmer_object = NULL; + +/** + * as_stemmer_process: + * @stemmer: A #AsStemmer + * @value: The input string + * + * Stems a string using the Porter algorithm. + * + * Since: 0.2.2 + * + * Returns: A new string + **/ +gchar * +as_stemmer_process (AsStemmer *stemmer, const gchar *value) +{ +#ifdef HAVE_LIBSTEMMER + gchar *new; + g_autoptr(GMutexLocker) locker = g_mutex_locker_new (&stemmer->ctx_mutex); + if (stemmer->ctx == NULL) + return g_strdup (value); + new = g_strdup ((gchar *) sb_stemmer_stem (stemmer->ctx, + (guchar *) value, + strlen (value))); +// if (g_strcmp0 (value, new) != 0) +// g_debug ("stemmed %s->%s", value, new); + return new; +#else + return g_strdup (value); +#endif +} + +static void +as_stemmer_finalize (GObject *object) +{ + AsStemmer *stemmer = AS_STEMMER (object); +#ifdef HAVE_LIBSTEMMER + sb_stemmer_delete (stemmer->ctx); + g_mutex_clear (&stemmer->ctx_mutex); +#endif + G_OBJECT_CLASS (as_stemmer_parent_class)->finalize (object); +} + +static void +as_stemmer_class_init (AsStemmerClass *klass) +{ + GObjectClass *object_class = G_OBJECT_CLASS (klass); + object_class->finalize = as_stemmer_finalize; +} + +static void +as_stemmer_init (AsStemmer *stemmer) +{ + /* FIXME: use as_utils_locale_to_language()? */ +#ifdef HAVE_LIBSTEMMER + stemmer->ctx = sb_stemmer_new ("en", NULL); + g_mutex_init (&stemmer->ctx_mutex); +#endif +} + +/** + * as_stemmer_new: + * + * Creates a new #AsStemmer. + * + * Returns: (transfer full): a #AsStemmer + * + * Since: 0.2.2 + **/ +AsStemmer * +as_stemmer_new (void) +{ + if (as_stemmer_object != NULL) { + g_object_ref (as_stemmer_object); + } else { + as_stemmer_object = g_object_new (AS_TYPE_STEMMER, NULL); + g_object_add_weak_pointer (as_stemmer_object, &as_stemmer_object); + } + return AS_STEMMER (as_stemmer_object); +} |