summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.ac37
-rw-r--r--extensions/fts++/Makefile.am5
-rw-r--r--extensions/fts++/indexer.cpp12
-rw-r--r--extensions/fts++/stringutils.cpp37
-rw-r--r--extensions/fts++/stringutils.h2
-rw-r--r--extensions/fts++/test/Makefile.am5
-rw-r--r--extensions/fts++/test/test-stringutils.cpp29
7 files changed, 124 insertions, 3 deletions
diff --git a/configure.ac b/configure.ac
index 83b06deb..cea365cc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -40,6 +40,30 @@ AC_SUBST(ZEITGEIST_CFLAGS)
AC_SUBST(ZEITGEIST_LIBS)
#################################################
+# Dee-ICU check
+#################################################
+DEE_ICU_REQUIRED=1.0.2
+
+AC_ARG_WITH([dee-icu],
+ AS_HELP_STRING([--with-dee-icu[=@<:@no/auto/yes@:>@]],
+ [Build the FTS extension with dee-icu]),
+ [with_dee_icu=$withval],
+ [with_dee_icu="auto"])
+
+if test "x$with_dee_icu" = "xauto" ; then
+ PKG_CHECK_EXISTS([dee-icu-1.0 >= $DEE_ICU_REQUIRED],
+ with_dee_icu="yes",
+ with_dee_icu="no")
+fi
+
+if test "x$with_dee_icu" = "xyes" ; then
+ PKG_CHECK_MODULES(DEE_ICU, dee-icu-1.0 >= $DEE_ICU_REQUIRED)
+ AC_DEFINE(HAVE_DEE_ICU, 1, [Have dee-icu])
+fi
+
+AM_CONDITIONAL(HAVE_DEE_ICU, test "x$with_dee_icu" = "xyes")
+
+#################################################
# DBus service
#################################################
@@ -88,3 +112,16 @@ else
fi
AC_OUTPUT
+
+cat <<EOF
+
+${PACKAGE}-${VERSION}
+
+ Build Environment
+ Install Prefix: ${prefix}
+
+ Optional dependencies
+ dee-icu: ${with_dee_icu}
+
+EOF
+
diff --git a/extensions/fts++/Makefile.am b/extensions/fts++/Makefile.am
index 931695f7..5869945c 100644
--- a/extensions/fts++/Makefile.am
+++ b/extensions/fts++/Makefile.am
@@ -76,6 +76,11 @@ zeitgeist_fts_LDADD = \
-lxapian \
$(NULL)
+if HAVE_DEE_ICU
+AM_CPPFLAGS += $(DEE_ICU_CFLAGS)
+zeitgeist_fts_LDADD += $(DEE_ICU_LIBS)
+endif
+
BUILT_SOURCES = \
zeitgeist-internal.stamp \
zeitgeist-fts_vala.stamp \
diff --git a/extensions/fts++/indexer.cpp b/extensions/fts++/indexer.cpp
index d97f7ebd..763894fe 100644
--- a/extensions/fts++/indexer.cpp
+++ b/extensions/fts++/indexer.cpp
@@ -358,8 +358,8 @@ void Indexer::AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc)
void Indexer::IndexText (std::string const& text)
{
- // FIXME: ascii folding!
tokenizer->index_text (text, 5);
+ tokenizer->index_text (StringUtils::AsciiFold (text), 5);
}
void Indexer::IndexUri (std::string const& uri, std::string const& origin)
@@ -593,22 +593,28 @@ bool Indexer::IndexActor (std::string const& actor, bool is_subject)
unsigned name_weight = is_subject ? 5 : 2;
unsigned comment_weight = 2;
- // FIXME: ascii folding somewhere
-
val = g_app_info_get_display_name (ai);
if (val && val[0] != '\0')
{
std::string display_name (val);
+ std::string display_name_folded (StringUtils::AsciiFold (display_name));
+
tokenizer->index_text (display_name, name_weight);
tokenizer->index_text (display_name, name_weight, "A");
+ tokenizer->index_text (display_name_folded, name_weight);
+ tokenizer->index_text (display_name_folded, name_weight, "A");
}
val = g_desktop_app_info_get_generic_name (dai);
if (val && val[0] != '\0')
{
std::string generic_name (val);
+ std::string generic_name_folded (StringUtils::AsciiFold (generic_name));
+
tokenizer->index_text (generic_name, name_weight);
tokenizer->index_text (generic_name, name_weight, "A");
+ tokenizer->index_text (generic_name_folded, name_weight);
+ tokenizer->index_text (generic_name_folded, name_weight, "A");
}
if (!is_subject) return true;
diff --git a/extensions/fts++/stringutils.cpp b/extensions/fts++/stringutils.cpp
index 12b0baf8..59a6dd2c 100644
--- a/extensions/fts++/stringutils.cpp
+++ b/extensions/fts++/stringutils.cpp
@@ -21,6 +21,10 @@
#include "stringutils.h"
+#ifdef HAVE_DEE_ICU
+#include <dee-icu.h>
+#endif
+
using namespace std;
namespace ZeitgeistFTS {
@@ -123,6 +127,39 @@ void SplitUri (string const& uri, string &authority,
}
}
+#ifdef HAVE_DEE_ICU
+static DeeICUTermFilter *icu_filter = NULL;
+
+/**
+ * Use ascii folding filter on the input text and return folded version
+ * of the original string.
+ *
+ * Note that if the folded version is exactly the same as the original
+ * empty string will be returned.
+ */
+string AsciiFold (string const& input)
+{
+ if (icu_filter == NULL)
+ {
+ icu_filter = dee_icu_term_filter_new_ascii_folder ();
+ if (icu_filter == NULL) return "";
+ }
+
+ // FIXME: check first if the input contains any non-ascii chars?
+
+ gchar *folded = dee_icu_term_filter_apply (icu_filter, input.c_str ());
+ string result (folded);
+ g_free (folded);
+
+ return result == input ? "" : result;
+}
+#else
+string AsciiFold (string const& input)
+{
+ return "";
+}
+#endif
+
} /* namespace StringUtils */
} /* namespace ZeitgeistFTS */
diff --git a/extensions/fts++/stringutils.h b/extensions/fts++/stringutils.h
index ef011d00..1fbc5a3a 100644
--- a/extensions/fts++/stringutils.h
+++ b/extensions/fts++/stringutils.h
@@ -37,6 +37,8 @@ void SplitUri (std::string const& uri,
std::string &path,
std::string &basename);
+std::string AsciiFold (std::string const& input);
+
} /* namespace StringUtils */
} /* namespace ZeitgeistFTS */
diff --git a/extensions/fts++/test/Makefile.am b/extensions/fts++/test/Makefile.am
index e36cf773..915dd4fd 100644
--- a/extensions/fts++/test/Makefile.am
+++ b/extensions/fts++/test/Makefile.am
@@ -25,3 +25,8 @@ test_fts_LDADD = \
-lxapian \
$(NULL)
+if HAVE_DEE_ICU
+AM_CPPFLAGS += $(DEE_ICU_CFLAGS)
+test_fts_LDADD += $(DEE_ICU_LIBS)
+endif
+
diff --git a/extensions/fts++/test/test-stringutils.cpp b/extensions/fts++/test/test-stringutils.cpp
index 3f9405fa..e83fc22a 100644
--- a/extensions/fts++/test/test-stringutils.cpp
+++ b/extensions/fts++/test/test-stringutils.cpp
@@ -163,6 +163,31 @@ test_split (Fixture *fix, gconstpointer data)
g_assert_cmpstr ("type=A", ==, query.c_str ());
}
+static void
+test_ascii_fold (Fixture *fix, gconstpointer data)
+{
+ std::string folded;
+
+ folded = StringUtils::AsciiFold ("");
+ g_assert_cmpstr ("", ==, folded.c_str ());
+
+ // if the original matches the folded version, AsciiFold returns ""
+ folded = StringUtils::AsciiFold ("a");
+ g_assert_cmpstr ("", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("abcdef");
+ g_assert_cmpstr ("", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("å");
+ g_assert_cmpstr ("a", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("åå");
+ g_assert_cmpstr ("aa", ==, folded.c_str ());
+
+ folded = StringUtils::AsciiFold ("aåaåa");
+ g_assert_cmpstr ("aaaaa", ==, folded.c_str ());
+}
+
G_BEGIN_DECLS
void test_stringutils_create_suite (void)
@@ -173,6 +198,10 @@ void test_stringutils_create_suite (void)
setup, test_mangle, teardown);
g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0,
setup, test_split, teardown);
+#ifdef HAVE_DEE_ICU
+ g_test_add ("/Zeitgeist/FTS/StringUtils/AsciiFold", Fixture, 0,
+ setup, test_ascii_fold, teardown);
+#endif
}
G_END_DECLS