diff options
-rw-r--r-- | configure.ac | 37 | ||||
-rw-r--r-- | extensions/fts++/Makefile.am | 5 | ||||
-rw-r--r-- | extensions/fts++/indexer.cpp | 12 | ||||
-rw-r--r-- | extensions/fts++/stringutils.cpp | 37 | ||||
-rw-r--r-- | extensions/fts++/stringutils.h | 2 | ||||
-rw-r--r-- | extensions/fts++/test/Makefile.am | 5 | ||||
-rw-r--r-- | extensions/fts++/test/test-stringutils.cpp | 29 |
7 files changed, 124 insertions, 3 deletions
diff --git a/configure.ac b/configure.ac index 83b06deb..cea365cc 100644 --- a/configure.ac +++ b/configure.ac @@ -40,6 +40,30 @@ AC_SUBST(ZEITGEIST_CFLAGS) AC_SUBST(ZEITGEIST_LIBS) ################################################# +# Dee-ICU check +################################################# +DEE_ICU_REQUIRED=1.0.2 + +AC_ARG_WITH([dee-icu], + AS_HELP_STRING([--with-dee-icu[=@<:@no/auto/yes@:>@]], + [Build the FTS extension with dee-icu]), + [with_dee_icu=$withval], + [with_dee_icu="auto"]) + +if test "x$with_dee_icu" = "xauto" ; then + PKG_CHECK_EXISTS([dee-icu-1.0 >= $DEE_ICU_REQUIRED], + with_dee_icu="yes", + with_dee_icu="no") +fi + +if test "x$with_dee_icu" = "xyes" ; then + PKG_CHECK_MODULES(DEE_ICU, dee-icu-1.0 >= $DEE_ICU_REQUIRED) + AC_DEFINE(HAVE_DEE_ICU, 1, [Have dee-icu]) +fi + +AM_CONDITIONAL(HAVE_DEE_ICU, test "x$with_dee_icu" = "xyes") + +################################################# # DBus service ################################################# @@ -88,3 +112,16 @@ else fi AC_OUTPUT + +cat <<EOF + +${PACKAGE}-${VERSION} + + Build Environment + Install Prefix: ${prefix} + + Optional dependencies + dee-icu: ${with_dee_icu} + +EOF + diff --git a/extensions/fts++/Makefile.am b/extensions/fts++/Makefile.am index 931695f7..5869945c 100644 --- a/extensions/fts++/Makefile.am +++ b/extensions/fts++/Makefile.am @@ -76,6 +76,11 @@ zeitgeist_fts_LDADD = \ -lxapian \ $(NULL) +if HAVE_DEE_ICU +AM_CPPFLAGS += $(DEE_ICU_CFLAGS) +zeitgeist_fts_LDADD += $(DEE_ICU_LIBS) +endif + BUILT_SOURCES = \ zeitgeist-internal.stamp \ zeitgeist-fts_vala.stamp \ diff --git a/extensions/fts++/indexer.cpp b/extensions/fts++/indexer.cpp index d97f7ebd..763894fe 100644 --- a/extensions/fts++/indexer.cpp +++ b/extensions/fts++/indexer.cpp @@ -358,8 +358,8 @@ void Indexer::AddDocFilters (ZeitgeistEvent *event, Xapian::Document &doc) void Indexer::IndexText (std::string const& text) { - // FIXME: ascii folding! tokenizer->index_text (text, 5); + tokenizer->index_text (StringUtils::AsciiFold (text), 5); } void Indexer::IndexUri (std::string const& uri, std::string const& origin) @@ -593,22 +593,28 @@ bool Indexer::IndexActor (std::string const& actor, bool is_subject) unsigned name_weight = is_subject ? 5 : 2; unsigned comment_weight = 2; - // FIXME: ascii folding somewhere - val = g_app_info_get_display_name (ai); if (val && val[0] != '\0') { std::string display_name (val); + std::string display_name_folded (StringUtils::AsciiFold (display_name)); + tokenizer->index_text (display_name, name_weight); tokenizer->index_text (display_name, name_weight, "A"); + tokenizer->index_text (display_name_folded, name_weight); + tokenizer->index_text (display_name_folded, name_weight, "A"); } val = g_desktop_app_info_get_generic_name (dai); if (val && val[0] != '\0') { std::string generic_name (val); + std::string generic_name_folded (StringUtils::AsciiFold (generic_name)); + tokenizer->index_text (generic_name, name_weight); tokenizer->index_text (generic_name, name_weight, "A"); + tokenizer->index_text (generic_name_folded, name_weight); + tokenizer->index_text (generic_name_folded, name_weight, "A"); } if (!is_subject) return true; diff --git a/extensions/fts++/stringutils.cpp b/extensions/fts++/stringutils.cpp index 12b0baf8..59a6dd2c 100644 --- a/extensions/fts++/stringutils.cpp +++ b/extensions/fts++/stringutils.cpp @@ -21,6 +21,10 @@ #include "stringutils.h" +#ifdef HAVE_DEE_ICU +#include <dee-icu.h> +#endif + using namespace std; namespace ZeitgeistFTS { @@ -123,6 +127,39 @@ void SplitUri (string const& uri, string &authority, } } +#ifdef HAVE_DEE_ICU +static DeeICUTermFilter *icu_filter = NULL; + +/** + * Use ascii folding filter on the input text and return folded version + * of the original string. + * + * Note that if the folded version is exactly the same as the original + * empty string will be returned. + */ +string AsciiFold (string const& input) +{ + if (icu_filter == NULL) + { + icu_filter = dee_icu_term_filter_new_ascii_folder (); + if (icu_filter == NULL) return ""; + } + + // FIXME: check first if the input contains any non-ascii chars? + + gchar *folded = dee_icu_term_filter_apply (icu_filter, input.c_str ()); + string result (folded); + g_free (folded); + + return result == input ? "" : result; +} +#else +string AsciiFold (string const& input) +{ + return ""; +} +#endif + } /* namespace StringUtils */ } /* namespace ZeitgeistFTS */ diff --git a/extensions/fts++/stringutils.h b/extensions/fts++/stringutils.h index ef011d00..1fbc5a3a 100644 --- a/extensions/fts++/stringutils.h +++ b/extensions/fts++/stringutils.h @@ -37,6 +37,8 @@ void SplitUri (std::string const& uri, std::string &path, std::string &basename); +std::string AsciiFold (std::string const& input); + } /* namespace StringUtils */ } /* namespace ZeitgeistFTS */ diff --git a/extensions/fts++/test/Makefile.am b/extensions/fts++/test/Makefile.am index e36cf773..915dd4fd 100644 --- a/extensions/fts++/test/Makefile.am +++ b/extensions/fts++/test/Makefile.am @@ -25,3 +25,8 @@ test_fts_LDADD = \ -lxapian \ $(NULL) +if HAVE_DEE_ICU +AM_CPPFLAGS += $(DEE_ICU_CFLAGS) +test_fts_LDADD += $(DEE_ICU_LIBS) +endif + diff --git a/extensions/fts++/test/test-stringutils.cpp b/extensions/fts++/test/test-stringutils.cpp index 3f9405fa..e83fc22a 100644 --- a/extensions/fts++/test/test-stringutils.cpp +++ b/extensions/fts++/test/test-stringutils.cpp @@ -163,6 +163,31 @@ test_split (Fixture *fix, gconstpointer data) g_assert_cmpstr ("type=A", ==, query.c_str ()); } +static void +test_ascii_fold (Fixture *fix, gconstpointer data) +{ + std::string folded; + + folded = StringUtils::AsciiFold (""); + g_assert_cmpstr ("", ==, folded.c_str ()); + + // if the original matches the folded version, AsciiFold returns "" + folded = StringUtils::AsciiFold ("a"); + g_assert_cmpstr ("", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("abcdef"); + g_assert_cmpstr ("", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("å"); + g_assert_cmpstr ("a", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("åå"); + g_assert_cmpstr ("aa", ==, folded.c_str ()); + + folded = StringUtils::AsciiFold ("aåaåa"); + g_assert_cmpstr ("aaaaa", ==, folded.c_str ()); +} + G_BEGIN_DECLS void test_stringutils_create_suite (void) @@ -173,6 +198,10 @@ void test_stringutils_create_suite (void) setup, test_mangle, teardown); g_test_add ("/Zeitgeist/FTS/StringUtils/SplitUri", Fixture, 0, setup, test_split, teardown); +#ifdef HAVE_DEE_ICU + g_test_add ("/Zeitgeist/FTS/StringUtils/AsciiFold", Fixture, 0, + setup, test_ascii_fold, teardown); +#endif } G_END_DECLS |