diff options
Diffstat (limited to 'providers/enchant_nuspell.cpp')
-rw-r--r-- | providers/enchant_nuspell.cpp | 420 |
1 files changed, 86 insertions, 334 deletions
diff --git a/providers/enchant_nuspell.cpp b/providers/enchant_nuspell.cpp index 01e3edd..91d9502 100644 --- a/providers/enchant_nuspell.cpp +++ b/providers/enchant_nuspell.cpp @@ -1,4 +1,5 @@ /* enchant + * Copyright (C) 2022 Dimitrij Mijoski * Copyright (C) 2020 Sander van Geloven * * This library is free software; you can redistribute it and/or @@ -35,12 +36,7 @@ #include "config.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <string> -#include <vector> +#include <memory> #include "enchant-provider.h" #include "unused-parameter.h" @@ -51,372 +47,130 @@ #include <glib.h> using namespace std; -using namespace nuspell; - -/***************************************************************************/ -class NuspellChecker +// EnchantDict functions +static int nuspell_dict_check(EnchantDict* me, const char* const word, + size_t len) { -public: - bool checkWord (const char *word, size_t len); - char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs); - - bool requestDictionary (const char * szLang); - -private: - Dictionary nuspell; -}; + auto dict = static_cast<nuspell::Dictionary*>(me->user_data); -/***************************************************************************/ - -bool -NuspellChecker::checkWord(const char *utf8Word, size_t len) -{ - // the 8-bit encodings use precomposed forms - char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC); - auto ret = nuspell.spell(normalizedWord); - g_free(normalizedWord); - return ret; + using UniquePtr = unique_ptr<char[], decltype(&g_free)>; + auto normalized_word = + UniquePtr(g_utf8_normalize(word, len, G_NORMALIZE_NFC), g_free); + return !dict->spell(normalized_word.get()); } -char** -NuspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug) +static char** nuspell_dict_suggest(EnchantDict* me, const char* const word, + size_t len, size_t* out_n_suggs) { + auto dict = static_cast<nuspell::Dictionary*>(me->user_data); + + using UniquePtr = unique_ptr<char[], decltype(&g_free)>; // the 8-bit encodings use precomposed forms - char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC); + auto normalized_word = + UniquePtr(g_utf8_normalize(word, len, G_NORMALIZE_NFC), g_free); auto suggestions = vector<string>(); - nuspell.suggest(normalizedWord, suggestions); - g_free(normalizedWord); - if (suggestions.empty()) + dict->suggest(normalized_word.get(), suggestions); + if (empty(suggestions)) { + *out_n_suggs = 0; return nullptr; - *nsug = suggestions.size(); - char **sug = g_new0 (char *, *nsug + 1); - size_t i = 0; - for (auto& suggest : suggestions) { - char *word = g_new0(char, suggest.size() + 1); - strcpy(word, suggest.c_str()); - sug[i] = word; - i++; - } - return sug; -} - -static void -s_buildDictionaryDirs (vector<string> & dirs) -{ - dirs.clear (); - - /* 1. personal overrides for Enchant - * ~/.config/enchant/nuspell - */ - gchar * tmp; - char * config_dir = enchant_get_user_config_dir (); - tmp = g_build_filename (config_dir, "nuspell", nullptr); - dirs.push_back (tmp); - free (config_dir); - g_free(tmp); - - /* Dynamically retrieved from Nuspell dictionary finder: - * 2. personal overrides for Hunspell - * $XDG_DATA_HOME/hunspell - * $XDG_DATA_HOME by default is $HOME/.local/share/ - * 3. system installed for Hunspell - * $XDG_DATA_DIRS/hunspell - * $XDG_DATA_DIRS/myspell (needed for Fedora) - * $XDG_DATA_DIRS by default are /usr/local/share and /usr/share - */ - nuspell::append_default_dir_paths(dirs); - - /* 5. system installations by Enchant - * /usr/local/share/enchant/nuspell - * /usr/share/enchant/nuspell - */ - char * enchant_prefix = enchant_get_prefix_dir(); - if (enchant_prefix) { - tmp = g_build_filename(enchant_prefix, "share", "enchant", "nuspell", nullptr); - dirs.push_back (tmp); - g_free(enchant_prefix); - g_free(tmp); - } - - /* Hunspell paths are used, therefore ENCHANT_NUSPELL_DICT_DIR is - * irrelevant. Hence, the following paths are not to be considered: - * /usr/local/share/nuspell and /usr/share/nuspell - */ -} - -static void -s_buildHashNames (vector<string> & names, const char * dict) -{ - names.clear (); - - vector<string> dirs; - s_buildDictionaryDirs (dirs); - - char *dict_dic = g_strconcat(dict, ".dic", nullptr); - for (size_t i = 0; i < dirs.size(); i++) { - char *tmp = g_build_filename (dirs[i].c_str(), dict_dic, nullptr); - names.push_back (tmp); - g_free (tmp); - } - - g_free(dict_dic); -} - -static const string -s_correspondingAffFile(const string & dicFile) -{ - string aff = dicFile; - if (aff.size() >= 4 && aff.compare(aff.size() - 4, 4, ".dic") == 0) { - aff.erase(aff.size() - 3); - aff += "aff"; } - return aff; + char** sug_list = g_new0(char*, size(suggestions) + 1); + transform(begin(suggestions), end(suggestions), sug_list, + [](const string& sug) { return g_strdup(sug.c_str()); }); + *out_n_suggs = size(suggestions); + return sug_list; } +// End EnchantDict functions -static bool -s_fileExists(const string & file) -{ - return g_file_test(file.c_str(), G_FILE_TEST_EXISTS) != 0; -} +// EnchantProvider functions +static void nuspell_provider_dispose(EnchantProvider* me) { g_free(me); } -static bool is_plausible_dict_for_tag(const char *dir_entry, const char *tag) +static EnchantDict* +nuspell_provider_request_dict([[maybe_unused]] EnchantProvider* me, + const char* const tag) { - const char *dic_suffix = ".dic"; - size_t dic_suffix_len = strlen(dic_suffix); - size_t dir_entry_len = strlen(dir_entry); - size_t tag_len = strlen(tag); - - if (dir_entry_len - dic_suffix_len < tag_len) - return false; - if (strcmp(dir_entry + dir_entry_len - dic_suffix_len, dic_suffix) != 0) - return false; - if (strncmp (dir_entry, tag, tag_len) != 0) - return false; - //e.g. requested dict for "fi", - //reject "fil_PH.dic" - //allow "fi-FOO.dic", "fi_FOO.dic", "fi.dic", etc. - if (!ispunct(dir_entry[tag_len])) - return false; - return true; -} - -static char * -nuspell_request_dictionary (const char * tag) -{ - vector<string> names; - - s_buildHashNames (names, tag); - - for (size_t i = 0; i < names.size (); i++) { - if (g_file_test(names[i].c_str(), G_FILE_TEST_EXISTS) && - s_fileExists(s_correspondingAffFile(names[i]))) { - return strdup (names[i].c_str()); - } - } - - vector<string> dirs; - s_buildDictionaryDirs (dirs); - - for (size_t i = 0; i < dirs.size(); i++) { - GDir *dir = g_dir_open (dirs[i].c_str(), 0, nullptr); - if (dir) { - const char *dir_entry; - while ((dir_entry = g_dir_read_name (dir)) != NULL) { - if (is_plausible_dict_for_tag(dir_entry, tag)) { - char *dict = g_build_filename (dirs[i].c_str(), - dir_entry, nullptr); - if(s_fileExists(s_correspondingAffFile(dict))) { - g_dir_close (dir); - return dict; - } - g_free(dict); - } - } - - g_dir_close (dir); - } - } - - return NULL; -} + auto dirs = vector<filesystem::path>(); + nuspell::append_default_dir_paths(dirs); + auto dic_path = nuspell::search_dirs_for_one_dict(dirs, tag); + if (empty(dic_path)) + return nullptr; -bool -NuspellChecker::requestDictionary(const char *szLang) -{ - char *dic = nuspell_request_dictionary (szLang); - if (!dic) - return false; - string aff(s_correspondingAffFile(dic)); - if (!s_fileExists(aff)) - return false; - auto path = string(dic); - free(dic); - if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".dic") == 0) - path.erase(path.size() - 4); - else - return false; + auto dict_cpp = make_unique<nuspell::Dictionary>(); try { - nuspell = nuspell::Dictionary::load_from_path(path); - } catch (const std::runtime_error& e) { - return false; + dict_cpp->load_aff_dic(dic_path); } - - return true; -} - -/* - * Enchant - */ - -static char ** -nuspell_dict_suggest (EnchantDict * me, const char *const word, - size_t len, size_t * out_n_suggs) -{ - NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data); - return checker->suggestWord (word, len, out_n_suggs); -} - -static int -nuspell_dict_check (EnchantDict * me, const char *const word, size_t len) -{ - NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data); - - return !(checker->checkWord(word, len)); -} - -static int -nuspell_dict_is_word_character (EnchantDict * me _GL_UNUSED_PARAMETER, - uint32_t uc, size_t n _GL_UNUSED_PARAMETER) -{ - return g_unichar_isalpha(uc); -} - -static void -nuspell_provider_enum_dicts (const char * const directory, - vector<string> & out_dicts) -{ - GDir * dir = g_dir_open (directory, 0, nullptr); - if (dir) { - const char * entry; - while ((entry = g_dir_read_name (dir)) != NULL) { - char * utf8_entry = g_filename_to_utf8 (entry, -1, nullptr, nullptr, nullptr); - if (utf8_entry) { - string dir_entry (utf8_entry); - g_free (utf8_entry); - - int hit = dir_entry.rfind (".dic"); - // don't include hyphenation dictionaries - if (hit != -1) { - // require .aff file to be present - if(dir_entry.compare (0, 5, "hyph_") != 0) { - char * dic = g_build_filename(directory, dir_entry.c_str(), nullptr); - if (s_fileExists(s_correspondingAffFile(dic))) { - out_dicts.push_back (dir_entry.substr (0, hit)); - } - g_free(dic); - } - } - } - } - - g_dir_close (dir); - } -} - -extern "C" { - -static char ** -nuspell_provider_list_dicts (EnchantProvider * me _GL_UNUSED_PARAMETER, - size_t * out_n_dicts) -{ - vector<string> dict_dirs, dicts; - char ** dictionary_list = NULL; - - s_buildDictionaryDirs (dict_dirs); - - for (size_t i = 0; i < dict_dirs.size(); i++) { - nuspell_provider_enum_dicts (dict_dirs[i].c_str(), dicts); - } - - if (dicts.size () > 0) { - dictionary_list = g_new0 (char *, dicts.size() + 1); - - for (size_t i = 0; i < dicts.size(); i++) - dictionary_list[i] = g_strdup (dicts[i].c_str()); - } - - *out_n_dicts = dicts.size (); - return dictionary_list; -} - -static EnchantDict * -nuspell_provider_request_dict(EnchantProvider * me _GL_UNUSED_PARAMETER, const char *const tag) -{ - NuspellChecker * checker = new NuspellChecker(); - - if (!checker) - return NULL; - - if (!checker->requestDictionary(tag)) { - delete checker; - return NULL; + catch (const nuspell::Dictionary_Loading_Error&) { + return nullptr; } - EnchantDict *dict = g_new0(EnchantDict, 1); - dict->user_data = (void *) checker; + EnchantDict* dict = g_new0(EnchantDict, 1); + dict->user_data = static_cast<void*>(dict_cpp.release()); dict->check = nuspell_dict_check; dict->suggest = nuspell_dict_suggest; - // don't implement personal, session - dict->is_word_character = nuspell_dict_is_word_character; - return dict; } -static void -nuspell_provider_dispose_dict (EnchantProvider * me _GL_UNUSED_PARAMETER, EnchantDict * dict) +static void nuspell_provider_dispose_dict([[maybe_unused]] EnchantProvider* me, + EnchantDict* dict) { - NuspellChecker *checker = (NuspellChecker *) dict->user_data; - delete checker; - - g_free (dict); + auto dict_cpp = static_cast<nuspell::Dictionary*>(dict->user_data); + delete dict_cpp; + g_free(dict); } static int -nuspell_provider_dictionary_exists (struct str_enchant_provider * me _GL_UNUSED_PARAMETER, - const char *const tag) +nuspell_provider_dictionary_exists([[maybe_unused]] EnchantProvider* me, + const char* const tag) { - vector <string> names; - s_buildHashNames (names, tag); - for (size_t i = 0; i < names.size(); i++) { - if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS) && - s_fileExists(s_correspondingAffFile(names[i]))) { - return 1; - } - } - - return 0; + auto dirs = vector<filesystem::path>(); + nuspell::append_default_dir_paths(dirs); + auto dic_path = nuspell::search_dirs_for_one_dict(dirs, tag); + return !empty(dic_path); } -static void -nuspell_provider_dispose (EnchantProvider * me) +static const char* +nuspell_provider_identify([[maybe_unused]] EnchantProvider* me) { - g_free (me); + return "nuspell"; } -static const char * -nuspell_provider_identify (EnchantProvider * me _GL_UNUSED_PARAMETER) +static const char* +nuspell_provider_describe([[maybe_unused]] EnchantProvider* me) { - return "nuspell"; + return "Nuspell Provider"; } -static const char * -nuspell_provider_describe (EnchantProvider * me _GL_UNUSED_PARAMETER) +static char** +nuspell_provider_list_dicts(EnchantProvider* me _GL_UNUSED_PARAMETER, + size_t* out_n_dicts) { - return "Nuspell Provider"; + auto dicts = nuspell::search_default_dirs_for_dicts(); + if (empty(dicts)) { + *out_n_dicts = 0; + return nullptr; + } + for (auto& d : dicts) + d = d.stem(); + sort(begin(dicts), end(dicts)); + auto it = unique(begin(dicts), end(dicts)); + it = remove_if(begin(dicts), it, [](const filesystem::path& p) { + auto& n = p.native(); + return any_of(begin(n), end(n), + [](auto c) { return c < 0 || c > 127; }); + }); + dicts.erase(it, end(dicts)); + + char** dictionary_list = g_new0(char*, size(dicts) + 1); + transform(begin(dicts), end(dicts), dictionary_list, + [](const filesystem::path& p) { + return g_strdup(p.string().c_str()); + }); + *out_n_dicts = size(dicts); + return dictionary_list; } -EnchantProvider *init_enchant_provider (void); +extern "C" EnchantProvider* init_enchant_provider(void); EnchantProvider * init_enchant_provider (void) @@ -432,5 +186,3 @@ init_enchant_provider (void) return provider; } - -} // extern C linkage |