summaryrefslogtreecommitdiff
path: root/libappstream-glib/as-stemmer.c
blob: b3505dd192b6670f23172c2dfa22c8aaca0a74fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
 *
 * Copyright (C) 2016 Richard Hughes <richard@hughsie.com>
 *
 * Licensed under the GNU General Public License Version 2
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include "config.h"

#include <glib/gi18n.h>

#ifdef HAVE_LIBSTEMMER
  #include "libstemmer.h"
#endif

#include "as-stemmer.h"

struct _AsStemmer
{
	GObject			 parent_instance;
	gboolean		 enabled;
	struct sb_stemmer	*ctx;
	GMutex			 ctx_mutex;
};

G_DEFINE_TYPE (AsStemmer, as_stemmer, G_TYPE_OBJECT)

static gpointer as_stemmer_object = NULL;

/**
 * as_stemmer_process:
 * @stemmer: A #AsStemmer
 * @value: The input string
 *
 * Stems a string using the Porter algorithm.
 *
 * Since: 0.2.2
 *
 * Returns: A new string
 **/
gchar *
as_stemmer_process (AsStemmer *stemmer, const gchar *value)
{
#ifdef HAVE_LIBSTEMMER
	gchar *new;
	g_autoptr(GMutexLocker) locker = g_mutex_locker_new (&stemmer->ctx_mutex);
	if (stemmer->ctx == NULL || !stemmer->enabled)
		return g_strdup (value);
	new = g_strdup ((gchar *) sb_stemmer_stem (stemmer->ctx,
						   (guchar *) value,
						   strlen (value)));
//	if (g_strcmp0 (value, new) != 0)
//		g_debug ("stemmed %s->%s", value, new);
	return new;
#else
	return g_strdup (value);
#endif
}

static void
as_stemmer_finalize (GObject *object)
{
#ifdef HAVE_LIBSTEMMER
	AsStemmer *stemmer = AS_STEMMER (object);
	sb_stemmer_delete (stemmer->ctx);
	g_mutex_clear (&stemmer->ctx_mutex);
#endif
	G_OBJECT_CLASS (as_stemmer_parent_class)->finalize (object);
}

static void
as_stemmer_class_init (AsStemmerClass *klass)
{
	GObjectClass *object_class = G_OBJECT_CLASS (klass);
	object_class->finalize = as_stemmer_finalize;
}

static void
as_stemmer_init (AsStemmer *stemmer)
{
	/* FIXME: use as_utils_locale_to_language()? */
#ifdef HAVE_LIBSTEMMER
	stemmer->ctx = sb_stemmer_new ("en", NULL);
	g_mutex_init (&stemmer->ctx_mutex);
#endif
	stemmer->enabled = g_getenv ("APPSTREAM_GLIB_DISABLE_STEMMER") == NULL;
}

/**
 * as_stemmer_new:
 *
 * Creates a new #AsStemmer.
 *
 * Returns: (transfer full): a #AsStemmer
 *
 * Since: 0.2.2
 **/
AsStemmer *
as_stemmer_new (void)
{
	if (as_stemmer_object != NULL) {
		g_object_ref (as_stemmer_object);
	} else {
		as_stemmer_object = g_object_new (AS_TYPE_STEMMER, NULL);
		g_object_add_weak_pointer (as_stemmer_object, &as_stemmer_object);
	}
	return AS_STEMMER (as_stemmer_object);
}