diff options
author | Reuben Thomas <rrt@sc3d.org> | 2021-08-06 22:36:44 +0100 |
---|---|---|
committer | Reuben Thomas <rrt@sc3d.org> | 2021-08-06 22:56:50 +0100 |
commit | 1b044a506dc31c02c9bb9827fc0fdfaea6567a0f (patch) | |
tree | ebe37f8556df75355c7b978a4c2c54ae740df0a9 | |
parent | 5b34d2d98d1929bb4f50894232b22f4e95210873 (diff) | |
download | enchant-1b044a506dc31c02c9bb9827fc0fdfaea6567a0f.tar.gz |
Hunspell: ensure extra word characters are returned as UTF-8 (fix #278)
-rw-r--r-- | providers/enchant_hunspell.cpp | 53 |
1 files changed, 38 insertions, 15 deletions
diff --git a/providers/enchant_hunspell.cpp b/providers/enchant_hunspell.cpp index a1c6815..3658e59 100644 --- a/providers/enchant_hunspell.cpp +++ b/providers/enchant_hunspell.cpp @@ -1,5 +1,6 @@ /* enchant * Copyright (C) 2003-2004 Joan Moratinos <jmo@softcatala.org>, Dom Lachowicz + * Copyright (C) 2016-2021 Reuben Thomas <rrt@sc3d.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -16,8 +17,8 @@ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * - * In addition, as a special exception, Dom Lachowicz - * gives permission to link the code of this program with + * In addition, as a special exception, the copyright holders + * give permission to link the code of this program with * non-LGPL Spelling Provider libraries (eg: a MSFT Office * spell checker backend) and distribute linked combinations including * the two. You must obey the GNU General Public License in all @@ -50,6 +51,8 @@ /***************************************************************************/ +static char *empty_string = ""; + class HunspellChecker { public: @@ -67,6 +70,7 @@ private: GIConv m_translate_in; /* Selected translation from/to Unicode */ GIConv m_translate_out; Hunspell *hunspell; + char *wordchars; /* Value returned by getWordChars() */ }; /***************************************************************************/ @@ -78,7 +82,7 @@ g_iconv_is_valid(GIConv i) } HunspellChecker::HunspellChecker() -: apostropheIsWordChar(false), m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr) +: apostropheIsWordChar(false), m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr), wordchars(nullptr) { } @@ -89,6 +93,7 @@ HunspellChecker::~HunspellChecker() g_iconv_close(m_translate_in); if (g_iconv_is_valid(m_translate_out)) g_iconv_close(m_translate_out); + free(wordchars); } bool @@ -109,10 +114,7 @@ HunspellChecker::checkWord(const char *utf8Word, size_t len) if (static_cast<size_t>(-1) == result) return false; *out = '\0'; - if (hunspell->spell(std::string(word8))) - return true; - else - return false; + return hunspell->spell(std::string(word8)) != 0; } char** @@ -155,14 +157,13 @@ HunspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsu } return sug; } - else - return nullptr; + return nullptr; } -const char* +_GL_ATTRIBUTE_PURE const char* HunspellChecker::getWordchars() { - return hunspell->get_wordchars(); + return static_cast<const char *>(wordchars); } static void @@ -304,8 +305,11 @@ HunspellChecker::requestDictionary(const char *szLang) std::string aff(s_correspondingAffFile(dic)); if (s_fileExists(aff)) { - if (hunspell) + if (hunspell) { delete hunspell; + free(wordchars); + wordchars = NULL; + } hunspell = new Hunspell(aff.c_str(), dic); } free(dic); @@ -317,9 +321,28 @@ HunspellChecker::requestDictionary(const char *szLang) m_translate_in = g_iconv_open(enc, "UTF-8"); m_translate_out = g_iconv_open("UTF-8", enc); - const char *word_chars = hunspell->get_wordchars(); - apostropheIsWordChar = g_utf8_strchr(word_chars, -1, g_utf8_get_char("'")) || - g_utf8_strchr(word_chars, -1, g_utf8_get_char("’")); + char *native_wordchars = strdup(hunspell->get_wordchars()); + if (native_wordchars == NULL) + return false; + char word8[MAXWORDLEN + 1]; + char *in = native_wordchars; + char *out = word8; + size_t len_in = strlen(in); + size_t len_out = sizeof( word8 ) - 1; + size_t result = g_iconv(m_translate_out, &in, &len_in, &out, &len_out); + if (static_cast<size_t>(-1) != result) { + *out = '\0'; + wordchars = strdup(word8); + if (wordchars == NULL) + return false; + } + free(native_wordchars); + if (wordchars == NULL) + wordchars = strdup(empty_string); + if (wordchars == NULL) + return false; + apostropheIsWordChar = g_utf8_strchr(wordchars, -1, g_utf8_get_char("'")) || + g_utf8_strchr(wordchars, -1, g_utf8_get_char("’")); return true; } |