summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitrij Mijoski <dmjpp@hotmail.com>2022-02-14 01:30:43 +0100
committerReuben Thomas <rrt@sc3d.org>2022-02-18 08:20:14 +0000
commitb158b93901b0364b0fad725b613c0e9b6ccbfa05 (patch)
tree108f2ed0302bf2cd966cb384c9c364dab6acbf1b
parentda55e9635655ec6a9d7c8d848a47df4b5d17526e (diff)
downloadenchant-b158b93901b0364b0fad725b613c0e9b6ccbfa05.tar.gz
Completely rework provider Nuspell, require Nuspell v5.1.0.
The new provider is much smaller in code size, more correct and uses some new APIs introduced in v5.1.0 for enumerating the dictionaries.
-rw-r--r--.appveyor.yml2
-rw-r--r--configure.ac2
-rw-r--r--providers/enchant_nuspell.cpp420
3 files changed, 88 insertions, 336 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 82cc38b..c413347 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -26,7 +26,7 @@ for:
init:
- sudo apt-get -y update && sudo apt-get -y upgrade
- sudo apt-get -y install libglib2.0-dev libaspell-dev hspell libhunspell-dev libvoikko-dev voikko-fi aspell-en libunittest++-dev hunspell-fr libicu-dev ninja-build
- - wget https://github.com/nuspell/nuspell/archive/refs/tags/v5.0.0.tar.gz -O - | tar -xz
+ - wget https://github.com/nuspell/nuspell/archive/refs/tags/v5.1.0.tar.gz -O - | tar -xz
- cmake -S nuspell-* -B nuspell-build -G Ninja -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=0
- cmake --build nuspell-build
- sudo cmake --install nuspell-build
diff --git a/configure.ac b/configure.ac
index 35e3b80..78df0ce 100644
--- a/configure.ac
+++ b/configure.ac
@@ -209,7 +209,7 @@ build_providers=
dnl Standard providers
ENCHANT_CHECK_PKG_CONFIG_PROVIDER([hunspell], [HUNSPELL])
-ENCHANT_CHECK_PKG_CONFIG_PROVIDER([nuspell], [NUSPELL], [nuspell >= 4.1.0])
+ENCHANT_CHECK_PKG_CONFIG_PROVIDER([nuspell], [NUSPELL], [nuspell >= 5.1.0])
ENCHANT_CHECK_LIB_PROVIDER([aspell], [ASPELL], [get_aspell_dict_info_list])
ENCHANT_CHECK_LIB_PROVIDER([hspell], [HSPELL], [hspell_get_dictionary_path],, [-lz])
ENCHANT_CHECK_PKG_CONFIG_PROVIDER([voikko], [VOIKKO], [libvoikko])
diff --git a/providers/enchant_nuspell.cpp b/providers/enchant_nuspell.cpp
index 01e3edd..91d9502 100644
--- a/providers/enchant_nuspell.cpp
+++ b/providers/enchant_nuspell.cpp
@@ -1,4 +1,5 @@
/* enchant
+ * Copyright (C) 2022 Dimitrij Mijoski
* Copyright (C) 2020 Sander van Geloven
*
* This library is free software; you can redistribute it and/or
@@ -35,12 +36,7 @@
#include "config.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-#include <vector>
+#include <memory>
#include "enchant-provider.h"
#include "unused-parameter.h"
@@ -51,372 +47,130 @@
#include <glib.h>
using namespace std;
-using namespace nuspell;
-
-/***************************************************************************/
-class NuspellChecker
+// EnchantDict functions
+static int nuspell_dict_check(EnchantDict* me, const char* const word,
+ size_t len)
{
-public:
- bool checkWord (const char *word, size_t len);
- char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
-
- bool requestDictionary (const char * szLang);
-
-private:
- Dictionary nuspell;
-};
+ auto dict = static_cast<nuspell::Dictionary*>(me->user_data);
-/***************************************************************************/
-
-bool
-NuspellChecker::checkWord(const char *utf8Word, size_t len)
-{
- // the 8-bit encodings use precomposed forms
- char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
- auto ret = nuspell.spell(normalizedWord);
- g_free(normalizedWord);
- return ret;
+ using UniquePtr = unique_ptr<char[], decltype(&g_free)>;
+ auto normalized_word =
+ UniquePtr(g_utf8_normalize(word, len, G_NORMALIZE_NFC), g_free);
+ return !dict->spell(normalized_word.get());
}
-char**
-NuspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug)
+static char** nuspell_dict_suggest(EnchantDict* me, const char* const word,
+ size_t len, size_t* out_n_suggs)
{
+ auto dict = static_cast<nuspell::Dictionary*>(me->user_data);
+
+ using UniquePtr = unique_ptr<char[], decltype(&g_free)>;
// the 8-bit encodings use precomposed forms
- char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
+ auto normalized_word =
+ UniquePtr(g_utf8_normalize(word, len, G_NORMALIZE_NFC), g_free);
auto suggestions = vector<string>();
- nuspell.suggest(normalizedWord, suggestions);
- g_free(normalizedWord);
- if (suggestions.empty())
+ dict->suggest(normalized_word.get(), suggestions);
+ if (empty(suggestions)) {
+ *out_n_suggs = 0;
return nullptr;
- *nsug = suggestions.size();
- char **sug = g_new0 (char *, *nsug + 1);
- size_t i = 0;
- for (auto& suggest : suggestions) {
- char *word = g_new0(char, suggest.size() + 1);
- strcpy(word, suggest.c_str());
- sug[i] = word;
- i++;
- }
- return sug;
-}
-
-static void
-s_buildDictionaryDirs (vector<string> & dirs)
-{
- dirs.clear ();
-
- /* 1. personal overrides for Enchant
- * ~/.config/enchant/nuspell
- */
- gchar * tmp;
- char * config_dir = enchant_get_user_config_dir ();
- tmp = g_build_filename (config_dir, "nuspell", nullptr);
- dirs.push_back (tmp);
- free (config_dir);
- g_free(tmp);
-
- /* Dynamically retrieved from Nuspell dictionary finder:
- * 2. personal overrides for Hunspell
- * $XDG_DATA_HOME/hunspell
- * $XDG_DATA_HOME by default is $HOME/.local/share/
- * 3. system installed for Hunspell
- * $XDG_DATA_DIRS/hunspell
- * $XDG_DATA_DIRS/myspell (needed for Fedora)
- * $XDG_DATA_DIRS by default are /usr/local/share and /usr/share
- */
- nuspell::append_default_dir_paths(dirs);
-
- /* 5. system installations by Enchant
- * /usr/local/share/enchant/nuspell
- * /usr/share/enchant/nuspell
- */
- char * enchant_prefix = enchant_get_prefix_dir();
- if (enchant_prefix) {
- tmp = g_build_filename(enchant_prefix, "share", "enchant", "nuspell", nullptr);
- dirs.push_back (tmp);
- g_free(enchant_prefix);
- g_free(tmp);
- }
-
- /* Hunspell paths are used, therefore ENCHANT_NUSPELL_DICT_DIR is
- * irrelevant. Hence, the following paths are not to be considered:
- * /usr/local/share/nuspell and /usr/share/nuspell
- */
-}
-
-static void
-s_buildHashNames (vector<string> & names, const char * dict)
-{
- names.clear ();
-
- vector<string> dirs;
- s_buildDictionaryDirs (dirs);
-
- char *dict_dic = g_strconcat(dict, ".dic", nullptr);
- for (size_t i = 0; i < dirs.size(); i++) {
- char *tmp = g_build_filename (dirs[i].c_str(), dict_dic, nullptr);
- names.push_back (tmp);
- g_free (tmp);
- }
-
- g_free(dict_dic);
-}
-
-static const string
-s_correspondingAffFile(const string & dicFile)
-{
- string aff = dicFile;
- if (aff.size() >= 4 && aff.compare(aff.size() - 4, 4, ".dic") == 0) {
- aff.erase(aff.size() - 3);
- aff += "aff";
}
- return aff;
+ char** sug_list = g_new0(char*, size(suggestions) + 1);
+ transform(begin(suggestions), end(suggestions), sug_list,
+ [](const string& sug) { return g_strdup(sug.c_str()); });
+ *out_n_suggs = size(suggestions);
+ return sug_list;
}
+// End EnchantDict functions
-static bool
-s_fileExists(const string & file)
-{
- return g_file_test(file.c_str(), G_FILE_TEST_EXISTS) != 0;
-}
+// EnchantProvider functions
+static void nuspell_provider_dispose(EnchantProvider* me) { g_free(me); }
-static bool is_plausible_dict_for_tag(const char *dir_entry, const char *tag)
+static EnchantDict*
+nuspell_provider_request_dict([[maybe_unused]] EnchantProvider* me,
+ const char* const tag)
{
- const char *dic_suffix = ".dic";
- size_t dic_suffix_len = strlen(dic_suffix);
- size_t dir_entry_len = strlen(dir_entry);
- size_t tag_len = strlen(tag);
-
- if (dir_entry_len - dic_suffix_len < tag_len)
- return false;
- if (strcmp(dir_entry + dir_entry_len - dic_suffix_len, dic_suffix) != 0)
- return false;
- if (strncmp (dir_entry, tag, tag_len) != 0)
- return false;
- //e.g. requested dict for "fi",
- //reject "fil_PH.dic"
- //allow "fi-FOO.dic", "fi_FOO.dic", "fi.dic", etc.
- if (!ispunct(dir_entry[tag_len]))
- return false;
- return true;
-}
-
-static char *
-nuspell_request_dictionary (const char * tag)
-{
- vector<string> names;
-
- s_buildHashNames (names, tag);
-
- for (size_t i = 0; i < names.size (); i++) {
- if (g_file_test(names[i].c_str(), G_FILE_TEST_EXISTS) &&
- s_fileExists(s_correspondingAffFile(names[i]))) {
- return strdup (names[i].c_str());
- }
- }
-
- vector<string> dirs;
- s_buildDictionaryDirs (dirs);
-
- for (size_t i = 0; i < dirs.size(); i++) {
- GDir *dir = g_dir_open (dirs[i].c_str(), 0, nullptr);
- if (dir) {
- const char *dir_entry;
- while ((dir_entry = g_dir_read_name (dir)) != NULL) {
- if (is_plausible_dict_for_tag(dir_entry, tag)) {
- char *dict = g_build_filename (dirs[i].c_str(),
- dir_entry, nullptr);
- if(s_fileExists(s_correspondingAffFile(dict))) {
- g_dir_close (dir);
- return dict;
- }
- g_free(dict);
- }
- }
-
- g_dir_close (dir);
- }
- }
-
- return NULL;
-}
+ auto dirs = vector<filesystem::path>();
+ nuspell::append_default_dir_paths(dirs);
+ auto dic_path = nuspell::search_dirs_for_one_dict(dirs, tag);
+ if (empty(dic_path))
+ return nullptr;
-bool
-NuspellChecker::requestDictionary(const char *szLang)
-{
- char *dic = nuspell_request_dictionary (szLang);
- if (!dic)
- return false;
- string aff(s_correspondingAffFile(dic));
- if (!s_fileExists(aff))
- return false;
- auto path = string(dic);
- free(dic);
- if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".dic") == 0)
- path.erase(path.size() - 4);
- else
- return false;
+ auto dict_cpp = make_unique<nuspell::Dictionary>();
try {
- nuspell = nuspell::Dictionary::load_from_path(path);
- } catch (const std::runtime_error& e) {
- return false;
+ dict_cpp->load_aff_dic(dic_path);
}
-
- return true;
-}
-
-/*
- * Enchant
- */
-
-static char **
-nuspell_dict_suggest (EnchantDict * me, const char *const word,
- size_t len, size_t * out_n_suggs)
-{
- NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
- return checker->suggestWord (word, len, out_n_suggs);
-}
-
-static int
-nuspell_dict_check (EnchantDict * me, const char *const word, size_t len)
-{
- NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
-
- return !(checker->checkWord(word, len));
-}
-
-static int
-nuspell_dict_is_word_character (EnchantDict * me _GL_UNUSED_PARAMETER,
- uint32_t uc, size_t n _GL_UNUSED_PARAMETER)
-{
- return g_unichar_isalpha(uc);
-}
-
-static void
-nuspell_provider_enum_dicts (const char * const directory,
- vector<string> & out_dicts)
-{
- GDir * dir = g_dir_open (directory, 0, nullptr);
- if (dir) {
- const char * entry;
- while ((entry = g_dir_read_name (dir)) != NULL) {
- char * utf8_entry = g_filename_to_utf8 (entry, -1, nullptr, nullptr, nullptr);
- if (utf8_entry) {
- string dir_entry (utf8_entry);
- g_free (utf8_entry);
-
- int hit = dir_entry.rfind (".dic");
- // don't include hyphenation dictionaries
- if (hit != -1) {
- // require .aff file to be present
- if(dir_entry.compare (0, 5, "hyph_") != 0) {
- char * dic = g_build_filename(directory, dir_entry.c_str(), nullptr);
- if (s_fileExists(s_correspondingAffFile(dic))) {
- out_dicts.push_back (dir_entry.substr (0, hit));
- }
- g_free(dic);
- }
- }
- }
- }
-
- g_dir_close (dir);
- }
-}
-
-extern "C" {
-
-static char **
-nuspell_provider_list_dicts (EnchantProvider * me _GL_UNUSED_PARAMETER,
- size_t * out_n_dicts)
-{
- vector<string> dict_dirs, dicts;
- char ** dictionary_list = NULL;
-
- s_buildDictionaryDirs (dict_dirs);
-
- for (size_t i = 0; i < dict_dirs.size(); i++) {
- nuspell_provider_enum_dicts (dict_dirs[i].c_str(), dicts);
- }
-
- if (dicts.size () > 0) {
- dictionary_list = g_new0 (char *, dicts.size() + 1);
-
- for (size_t i = 0; i < dicts.size(); i++)
- dictionary_list[i] = g_strdup (dicts[i].c_str());
- }
-
- *out_n_dicts = dicts.size ();
- return dictionary_list;
-}
-
-static EnchantDict *
-nuspell_provider_request_dict(EnchantProvider * me _GL_UNUSED_PARAMETER, const char *const tag)
-{
- NuspellChecker * checker = new NuspellChecker();
-
- if (!checker)
- return NULL;
-
- if (!checker->requestDictionary(tag)) {
- delete checker;
- return NULL;
+ catch (const nuspell::Dictionary_Loading_Error&) {
+ return nullptr;
}
- EnchantDict *dict = g_new0(EnchantDict, 1);
- dict->user_data = (void *) checker;
+ EnchantDict* dict = g_new0(EnchantDict, 1);
+ dict->user_data = static_cast<void*>(dict_cpp.release());
dict->check = nuspell_dict_check;
dict->suggest = nuspell_dict_suggest;
- // don't implement personal, session
- dict->is_word_character = nuspell_dict_is_word_character;
-
return dict;
}
-static void
-nuspell_provider_dispose_dict (EnchantProvider * me _GL_UNUSED_PARAMETER, EnchantDict * dict)
+static void nuspell_provider_dispose_dict([[maybe_unused]] EnchantProvider* me,
+ EnchantDict* dict)
{
- NuspellChecker *checker = (NuspellChecker *) dict->user_data;
- delete checker;
-
- g_free (dict);
+ auto dict_cpp = static_cast<nuspell::Dictionary*>(dict->user_data);
+ delete dict_cpp;
+ g_free(dict);
}
static int
-nuspell_provider_dictionary_exists (struct str_enchant_provider * me _GL_UNUSED_PARAMETER,
- const char *const tag)
+nuspell_provider_dictionary_exists([[maybe_unused]] EnchantProvider* me,
+ const char* const tag)
{
- vector <string> names;
- s_buildHashNames (names, tag);
- for (size_t i = 0; i < names.size(); i++) {
- if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS) &&
- s_fileExists(s_correspondingAffFile(names[i]))) {
- return 1;
- }
- }
-
- return 0;
+ auto dirs = vector<filesystem::path>();
+ nuspell::append_default_dir_paths(dirs);
+ auto dic_path = nuspell::search_dirs_for_one_dict(dirs, tag);
+ return !empty(dic_path);
}
-static void
-nuspell_provider_dispose (EnchantProvider * me)
+static const char*
+nuspell_provider_identify([[maybe_unused]] EnchantProvider* me)
{
- g_free (me);
+ return "nuspell";
}
-static const char *
-nuspell_provider_identify (EnchantProvider * me _GL_UNUSED_PARAMETER)
+static const char*
+nuspell_provider_describe([[maybe_unused]] EnchantProvider* me)
{
- return "nuspell";
+ return "Nuspell Provider";
}
-static const char *
-nuspell_provider_describe (EnchantProvider * me _GL_UNUSED_PARAMETER)
+static char**
+nuspell_provider_list_dicts(EnchantProvider* me _GL_UNUSED_PARAMETER,
+ size_t* out_n_dicts)
{
- return "Nuspell Provider";
+ auto dicts = nuspell::search_default_dirs_for_dicts();
+ if (empty(dicts)) {
+ *out_n_dicts = 0;
+ return nullptr;
+ }
+ for (auto& d : dicts)
+ d = d.stem();
+ sort(begin(dicts), end(dicts));
+ auto it = unique(begin(dicts), end(dicts));
+ it = remove_if(begin(dicts), it, [](const filesystem::path& p) {
+ auto& n = p.native();
+ return any_of(begin(n), end(n),
+ [](auto c) { return c < 0 || c > 127; });
+ });
+ dicts.erase(it, end(dicts));
+
+ char** dictionary_list = g_new0(char*, size(dicts) + 1);
+ transform(begin(dicts), end(dicts), dictionary_list,
+ [](const filesystem::path& p) {
+ return g_strdup(p.string().c_str());
+ });
+ *out_n_dicts = size(dicts);
+ return dictionary_list;
}
-EnchantProvider *init_enchant_provider (void);
+extern "C" EnchantProvider* init_enchant_provider(void);
EnchantProvider *
init_enchant_provider (void)
@@ -432,5 +186,3 @@ init_enchant_provider (void)
return provider;
}
-
-} // extern C linkage