summaryrefslogtreecommitdiff
path: root/providers/enchant_nuspell.cpp
diff options
context:
space:
mode:
authorDimitrij Mijoski <dmjpp@hotmail.com>2022-02-14 01:30:43 +0100
committerReuben Thomas <rrt@sc3d.org>2022-02-18 08:20:14 +0000
commitb158b93901b0364b0fad725b613c0e9b6ccbfa05 (patch)
tree108f2ed0302bf2cd966cb384c9c364dab6acbf1b /providers/enchant_nuspell.cpp
parentda55e9635655ec6a9d7c8d848a47df4b5d17526e (diff)
downloadenchant-b158b93901b0364b0fad725b613c0e9b6ccbfa05.tar.gz
Completely rework provider Nuspell, require Nuspell v5.1.0.
The new provider is much smaller in code size, more correct and uses some new APIs introduced in v5.1.0 for enumerating the dictionaries.
Diffstat (limited to 'providers/enchant_nuspell.cpp')
-rw-r--r--providers/enchant_nuspell.cpp420
1 files changed, 86 insertions, 334 deletions
diff --git a/providers/enchant_nuspell.cpp b/providers/enchant_nuspell.cpp
index 01e3edd..91d9502 100644
--- a/providers/enchant_nuspell.cpp
+++ b/providers/enchant_nuspell.cpp
@@ -1,4 +1,5 @@
/* enchant
+ * Copyright (C) 2022 Dimitrij Mijoski
* Copyright (C) 2020 Sander van Geloven
*
* This library is free software; you can redistribute it and/or
@@ -35,12 +36,7 @@
#include "config.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <string>
-#include <vector>
+#include <memory>
#include "enchant-provider.h"
#include "unused-parameter.h"
@@ -51,372 +47,130 @@
#include <glib.h>
using namespace std;
-using namespace nuspell;
-
-/***************************************************************************/
-class NuspellChecker
+// EnchantDict functions
+static int nuspell_dict_check(EnchantDict* me, const char* const word,
+ size_t len)
{
-public:
- bool checkWord (const char *word, size_t len);
- char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
-
- bool requestDictionary (const char * szLang);
-
-private:
- Dictionary nuspell;
-};
+ auto dict = static_cast<nuspell::Dictionary*>(me->user_data);
-/***************************************************************************/
-
-bool
-NuspellChecker::checkWord(const char *utf8Word, size_t len)
-{
- // the 8-bit encodings use precomposed forms
- char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
- auto ret = nuspell.spell(normalizedWord);
- g_free(normalizedWord);
- return ret;
+ using UniquePtr = unique_ptr<char[], decltype(&g_free)>;
+ auto normalized_word =
+ UniquePtr(g_utf8_normalize(word, len, G_NORMALIZE_NFC), g_free);
+ return !dict->spell(normalized_word.get());
}
-char**
-NuspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsug)
+static char** nuspell_dict_suggest(EnchantDict* me, const char* const word,
+ size_t len, size_t* out_n_suggs)
{
+ auto dict = static_cast<nuspell::Dictionary*>(me->user_data);
+
+ using UniquePtr = unique_ptr<char[], decltype(&g_free)>;
// the 8-bit encodings use precomposed forms
- char *normalizedWord = g_utf8_normalize (utf8Word, len, G_NORMALIZE_NFC);
+ auto normalized_word =
+ UniquePtr(g_utf8_normalize(word, len, G_NORMALIZE_NFC), g_free);
auto suggestions = vector<string>();
- nuspell.suggest(normalizedWord, suggestions);
- g_free(normalizedWord);
- if (suggestions.empty())
+ dict->suggest(normalized_word.get(), suggestions);
+ if (empty(suggestions)) {
+ *out_n_suggs = 0;
return nullptr;
- *nsug = suggestions.size();
- char **sug = g_new0 (char *, *nsug + 1);
- size_t i = 0;
- for (auto& suggest : suggestions) {
- char *word = g_new0(char, suggest.size() + 1);
- strcpy(word, suggest.c_str());
- sug[i] = word;
- i++;
- }
- return sug;
-}
-
-static void
-s_buildDictionaryDirs (vector<string> & dirs)
-{
- dirs.clear ();
-
- /* 1. personal overrides for Enchant
- * ~/.config/enchant/nuspell
- */
- gchar * tmp;
- char * config_dir = enchant_get_user_config_dir ();
- tmp = g_build_filename (config_dir, "nuspell", nullptr);
- dirs.push_back (tmp);
- free (config_dir);
- g_free(tmp);
-
- /* Dynamically retrieved from Nuspell dictionary finder:
- * 2. personal overrides for Hunspell
- * $XDG_DATA_HOME/hunspell
- * $XDG_DATA_HOME by default is $HOME/.local/share/
- * 3. system installed for Hunspell
- * $XDG_DATA_DIRS/hunspell
- * $XDG_DATA_DIRS/myspell (needed for Fedora)
- * $XDG_DATA_DIRS by default are /usr/local/share and /usr/share
- */
- nuspell::append_default_dir_paths(dirs);
-
- /* 5. system installations by Enchant
- * /usr/local/share/enchant/nuspell
- * /usr/share/enchant/nuspell
- */
- char * enchant_prefix = enchant_get_prefix_dir();
- if (enchant_prefix) {
- tmp = g_build_filename(enchant_prefix, "share", "enchant", "nuspell", nullptr);
- dirs.push_back (tmp);
- g_free(enchant_prefix);
- g_free(tmp);
- }
-
- /* Hunspell paths are used, therefore ENCHANT_NUSPELL_DICT_DIR is
- * irrelevant. Hence, the following paths are not to be considered:
- * /usr/local/share/nuspell and /usr/share/nuspell
- */
-}
-
-static void
-s_buildHashNames (vector<string> & names, const char * dict)
-{
- names.clear ();
-
- vector<string> dirs;
- s_buildDictionaryDirs (dirs);
-
- char *dict_dic = g_strconcat(dict, ".dic", nullptr);
- for (size_t i = 0; i < dirs.size(); i++) {
- char *tmp = g_build_filename (dirs[i].c_str(), dict_dic, nullptr);
- names.push_back (tmp);
- g_free (tmp);
- }
-
- g_free(dict_dic);
-}
-
-static const string
-s_correspondingAffFile(const string & dicFile)
-{
- string aff = dicFile;
- if (aff.size() >= 4 && aff.compare(aff.size() - 4, 4, ".dic") == 0) {
- aff.erase(aff.size() - 3);
- aff += "aff";
}
- return aff;
+ char** sug_list = g_new0(char*, size(suggestions) + 1);
+ transform(begin(suggestions), end(suggestions), sug_list,
+ [](const string& sug) { return g_strdup(sug.c_str()); });
+ *out_n_suggs = size(suggestions);
+ return sug_list;
}
+// End EnchantDict functions
-static bool
-s_fileExists(const string & file)
-{
- return g_file_test(file.c_str(), G_FILE_TEST_EXISTS) != 0;
-}
+// EnchantProvider functions
+static void nuspell_provider_dispose(EnchantProvider* me) { g_free(me); }
-static bool is_plausible_dict_for_tag(const char *dir_entry, const char *tag)
+static EnchantDict*
+nuspell_provider_request_dict([[maybe_unused]] EnchantProvider* me,
+ const char* const tag)
{
- const char *dic_suffix = ".dic";
- size_t dic_suffix_len = strlen(dic_suffix);
- size_t dir_entry_len = strlen(dir_entry);
- size_t tag_len = strlen(tag);
-
- if (dir_entry_len - dic_suffix_len < tag_len)
- return false;
- if (strcmp(dir_entry + dir_entry_len - dic_suffix_len, dic_suffix) != 0)
- return false;
- if (strncmp (dir_entry, tag, tag_len) != 0)
- return false;
- //e.g. requested dict for "fi",
- //reject "fil_PH.dic"
- //allow "fi-FOO.dic", "fi_FOO.dic", "fi.dic", etc.
- if (!ispunct(dir_entry[tag_len]))
- return false;
- return true;
-}
-
-static char *
-nuspell_request_dictionary (const char * tag)
-{
- vector<string> names;
-
- s_buildHashNames (names, tag);
-
- for (size_t i = 0; i < names.size (); i++) {
- if (g_file_test(names[i].c_str(), G_FILE_TEST_EXISTS) &&
- s_fileExists(s_correspondingAffFile(names[i]))) {
- return strdup (names[i].c_str());
- }
- }
-
- vector<string> dirs;
- s_buildDictionaryDirs (dirs);
-
- for (size_t i = 0; i < dirs.size(); i++) {
- GDir *dir = g_dir_open (dirs[i].c_str(), 0, nullptr);
- if (dir) {
- const char *dir_entry;
- while ((dir_entry = g_dir_read_name (dir)) != NULL) {
- if (is_plausible_dict_for_tag(dir_entry, tag)) {
- char *dict = g_build_filename (dirs[i].c_str(),
- dir_entry, nullptr);
- if(s_fileExists(s_correspondingAffFile(dict))) {
- g_dir_close (dir);
- return dict;
- }
- g_free(dict);
- }
- }
-
- g_dir_close (dir);
- }
- }
-
- return NULL;
-}
+ auto dirs = vector<filesystem::path>();
+ nuspell::append_default_dir_paths(dirs);
+ auto dic_path = nuspell::search_dirs_for_one_dict(dirs, tag);
+ if (empty(dic_path))
+ return nullptr;
-bool
-NuspellChecker::requestDictionary(const char *szLang)
-{
- char *dic = nuspell_request_dictionary (szLang);
- if (!dic)
- return false;
- string aff(s_correspondingAffFile(dic));
- if (!s_fileExists(aff))
- return false;
- auto path = string(dic);
- free(dic);
- if (path.size() >= 4 && path.compare(path.size() - 4, 4, ".dic") == 0)
- path.erase(path.size() - 4);
- else
- return false;
+ auto dict_cpp = make_unique<nuspell::Dictionary>();
try {
- nuspell = nuspell::Dictionary::load_from_path(path);
- } catch (const std::runtime_error& e) {
- return false;
+ dict_cpp->load_aff_dic(dic_path);
}
-
- return true;
-}
-
-/*
- * Enchant
- */
-
-static char **
-nuspell_dict_suggest (EnchantDict * me, const char *const word,
- size_t len, size_t * out_n_suggs)
-{
- NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
- return checker->suggestWord (word, len, out_n_suggs);
-}
-
-static int
-nuspell_dict_check (EnchantDict * me, const char *const word, size_t len)
-{
- NuspellChecker * checker = static_cast<NuspellChecker *>(me->user_data);
-
- return !(checker->checkWord(word, len));
-}
-
-static int
-nuspell_dict_is_word_character (EnchantDict * me _GL_UNUSED_PARAMETER,
- uint32_t uc, size_t n _GL_UNUSED_PARAMETER)
-{
- return g_unichar_isalpha(uc);
-}
-
-static void
-nuspell_provider_enum_dicts (const char * const directory,
- vector<string> & out_dicts)
-{
- GDir * dir = g_dir_open (directory, 0, nullptr);
- if (dir) {
- const char * entry;
- while ((entry = g_dir_read_name (dir)) != NULL) {
- char * utf8_entry = g_filename_to_utf8 (entry, -1, nullptr, nullptr, nullptr);
- if (utf8_entry) {
- string dir_entry (utf8_entry);
- g_free (utf8_entry);
-
- int hit = dir_entry.rfind (".dic");
- // don't include hyphenation dictionaries
- if (hit != -1) {
- // require .aff file to be present
- if(dir_entry.compare (0, 5, "hyph_") != 0) {
- char * dic = g_build_filename(directory, dir_entry.c_str(), nullptr);
- if (s_fileExists(s_correspondingAffFile(dic))) {
- out_dicts.push_back (dir_entry.substr (0, hit));
- }
- g_free(dic);
- }
- }
- }
- }
-
- g_dir_close (dir);
- }
-}
-
-extern "C" {
-
-static char **
-nuspell_provider_list_dicts (EnchantProvider * me _GL_UNUSED_PARAMETER,
- size_t * out_n_dicts)
-{
- vector<string> dict_dirs, dicts;
- char ** dictionary_list = NULL;
-
- s_buildDictionaryDirs (dict_dirs);
-
- for (size_t i = 0; i < dict_dirs.size(); i++) {
- nuspell_provider_enum_dicts (dict_dirs[i].c_str(), dicts);
- }
-
- if (dicts.size () > 0) {
- dictionary_list = g_new0 (char *, dicts.size() + 1);
-
- for (size_t i = 0; i < dicts.size(); i++)
- dictionary_list[i] = g_strdup (dicts[i].c_str());
- }
-
- *out_n_dicts = dicts.size ();
- return dictionary_list;
-}
-
-static EnchantDict *
-nuspell_provider_request_dict(EnchantProvider * me _GL_UNUSED_PARAMETER, const char *const tag)
-{
- NuspellChecker * checker = new NuspellChecker();
-
- if (!checker)
- return NULL;
-
- if (!checker->requestDictionary(tag)) {
- delete checker;
- return NULL;
+ catch (const nuspell::Dictionary_Loading_Error&) {
+ return nullptr;
}
- EnchantDict *dict = g_new0(EnchantDict, 1);
- dict->user_data = (void *) checker;
+ EnchantDict* dict = g_new0(EnchantDict, 1);
+ dict->user_data = static_cast<void*>(dict_cpp.release());
dict->check = nuspell_dict_check;
dict->suggest = nuspell_dict_suggest;
- // don't implement personal, session
- dict->is_word_character = nuspell_dict_is_word_character;
-
return dict;
}
-static void
-nuspell_provider_dispose_dict (EnchantProvider * me _GL_UNUSED_PARAMETER, EnchantDict * dict)
+static void nuspell_provider_dispose_dict([[maybe_unused]] EnchantProvider* me,
+ EnchantDict* dict)
{
- NuspellChecker *checker = (NuspellChecker *) dict->user_data;
- delete checker;
-
- g_free (dict);
+ auto dict_cpp = static_cast<nuspell::Dictionary*>(dict->user_data);
+ delete dict_cpp;
+ g_free(dict);
}
static int
-nuspell_provider_dictionary_exists (struct str_enchant_provider * me _GL_UNUSED_PARAMETER,
- const char *const tag)
+nuspell_provider_dictionary_exists([[maybe_unused]] EnchantProvider* me,
+ const char* const tag)
{
- vector <string> names;
- s_buildHashNames (names, tag);
- for (size_t i = 0; i < names.size(); i++) {
- if (g_file_test (names[i].c_str(), G_FILE_TEST_EXISTS) &&
- s_fileExists(s_correspondingAffFile(names[i]))) {
- return 1;
- }
- }
-
- return 0;
+ auto dirs = vector<filesystem::path>();
+ nuspell::append_default_dir_paths(dirs);
+ auto dic_path = nuspell::search_dirs_for_one_dict(dirs, tag);
+ return !empty(dic_path);
}
-static void
-nuspell_provider_dispose (EnchantProvider * me)
+static const char*
+nuspell_provider_identify([[maybe_unused]] EnchantProvider* me)
{
- g_free (me);
+ return "nuspell";
}
-static const char *
-nuspell_provider_identify (EnchantProvider * me _GL_UNUSED_PARAMETER)
+static const char*
+nuspell_provider_describe([[maybe_unused]] EnchantProvider* me)
{
- return "nuspell";
+ return "Nuspell Provider";
}
-static const char *
-nuspell_provider_describe (EnchantProvider * me _GL_UNUSED_PARAMETER)
+static char**
+nuspell_provider_list_dicts(EnchantProvider* me _GL_UNUSED_PARAMETER,
+ size_t* out_n_dicts)
{
- return "Nuspell Provider";
+ auto dicts = nuspell::search_default_dirs_for_dicts();
+ if (empty(dicts)) {
+ *out_n_dicts = 0;
+ return nullptr;
+ }
+ for (auto& d : dicts)
+ d = d.stem();
+ sort(begin(dicts), end(dicts));
+ auto it = unique(begin(dicts), end(dicts));
+ it = remove_if(begin(dicts), it, [](const filesystem::path& p) {
+ auto& n = p.native();
+ return any_of(begin(n), end(n),
+ [](auto c) { return c < 0 || c > 127; });
+ });
+ dicts.erase(it, end(dicts));
+
+ char** dictionary_list = g_new0(char*, size(dicts) + 1);
+ transform(begin(dicts), end(dicts), dictionary_list,
+ [](const filesystem::path& p) {
+ return g_strdup(p.string().c_str());
+ });
+ *out_n_dicts = size(dicts);
+ return dictionary_list;
}
-EnchantProvider *init_enchant_provider (void);
+extern "C" EnchantProvider* init_enchant_provider(void);
EnchantProvider *
init_enchant_provider (void)
@@ -432,5 +186,3 @@ init_enchant_provider (void)
return provider;
}
-
-} // extern C linkage