diff options
author | Reuben Thomas <rrt@sc3d.org> | 2020-09-01 16:34:51 +0100 |
---|---|---|
committer | Reuben Thomas <rrt@sc3d.org> | 2020-09-01 21:32:26 +0100 |
commit | 4f857ec36abaea95652675f5fad4bf3abe7f16bb (patch) | |
tree | 0a78eebf527ead773b257903a47aad7d7f82b695 | |
parent | 13129240dec18f43b30f4e090d43bf1e5471b7a9 (diff) | |
download | enchant-4f857ec36abaea95652675f5fad4bf3abe7f16bb.tar.gz |
Hunspell: implement Hunspell’s rule for treating quotes as word char
-rw-r--r-- | providers/enchant_hunspell.cpp | 13 |
1 files changed, 11 insertions, 2 deletions
diff --git a/providers/enchant_hunspell.cpp b/providers/enchant_hunspell.cpp index 91fdce0..d768ce4 100644 --- a/providers/enchant_hunspell.cpp +++ b/providers/enchant_hunspell.cpp @@ -65,6 +65,7 @@ public: bool checkWord (const char *word, size_t len); char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs); const char *getWordchars (); + bool apostropheIsWordChar; bool requestDictionary (const char * szLang); @@ -83,7 +84,7 @@ g_iconv_is_valid(GIConv i) } HunspellChecker::HunspellChecker() -: m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr) +: apostropheIsWordChar(false), m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr) { } @@ -328,6 +329,10 @@ HunspellChecker::requestDictionary(const char *szLang) m_translate_in = g_iconv_open(enc, "UTF-8"); m_translate_out = g_iconv_open("UTF-8", enc); + const char *word_chars = hunspell->get_wordchars(); + apostropheIsWordChar = g_utf8_strchr(word_chars, -1, g_utf8_get_char("'")) || + g_utf8_strchr(word_chars, -1, g_utf8_get_char("’")); + return true; } @@ -364,8 +369,12 @@ hunspell_dict_get_extra_word_characters (EnchantDict *me) static int hunspell_dict_is_word_character (EnchantDict *me, uint32_t uc, size_t n) { - (void)n; HunspellChecker * checker = static_cast<HunspellChecker *>(me->user_data); + /* Accept quote marks anywhere except at the end of a word, as per + hunspell's textparser.cxx/TextParser::next_token */ + if ((uc == g_utf8_get_char("'") || uc == g_utf8_get_char("’")) && checker->apostropheIsWordChar) { + return n < 2; + } return g_unichar_isalpha(uc) || g_utf8_strchr(checker->getWordchars(), -1, uc); } |