summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReuben Thomas <rrt@sc3d.org>2020-09-01 16:34:51 +0100
committerReuben Thomas <rrt@sc3d.org>2020-09-01 21:32:26 +0100
commit4f857ec36abaea95652675f5fad4bf3abe7f16bb (patch)
tree0a78eebf527ead773b257903a47aad7d7f82b695
parent13129240dec18f43b30f4e090d43bf1e5471b7a9 (diff)
downloadenchant-4f857ec36abaea95652675f5fad4bf3abe7f16bb.tar.gz
Hunspell: implement Hunspell’s rule for treating quotes as word char
-rw-r--r--providers/enchant_hunspell.cpp13
1 files changed, 11 insertions, 2 deletions
diff --git a/providers/enchant_hunspell.cpp b/providers/enchant_hunspell.cpp
index 91fdce0..d768ce4 100644
--- a/providers/enchant_hunspell.cpp
+++ b/providers/enchant_hunspell.cpp
@@ -65,6 +65,7 @@ public:
bool checkWord (const char *word, size_t len);
char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
const char *getWordchars ();
+ bool apostropheIsWordChar;
bool requestDictionary (const char * szLang);
@@ -83,7 +84,7 @@ g_iconv_is_valid(GIConv i)
}
HunspellChecker::HunspellChecker()
-: m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr)
+: apostropheIsWordChar(false), m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr)
{
}
@@ -328,6 +329,10 @@ HunspellChecker::requestDictionary(const char *szLang)
m_translate_in = g_iconv_open(enc, "UTF-8");
m_translate_out = g_iconv_open("UTF-8", enc);
+ const char *word_chars = hunspell->get_wordchars();
+ apostropheIsWordChar = g_utf8_strchr(word_chars, -1, g_utf8_get_char("'")) ||
+ g_utf8_strchr(word_chars, -1, g_utf8_get_char("’"));
+
return true;
}
@@ -364,8 +369,12 @@ hunspell_dict_get_extra_word_characters (EnchantDict *me)
static int
hunspell_dict_is_word_character (EnchantDict *me, uint32_t uc, size_t n)
{
- (void)n;
HunspellChecker * checker = static_cast<HunspellChecker *>(me->user_data);
+ /* Accept quote marks anywhere except at the end of a word, as per
+ hunspell's textparser.cxx/TextParser::next_token */
+ if ((uc == g_utf8_get_char("'") || uc == g_utf8_get_char("’")) && checker->apostropheIsWordChar) {
+ return n < 2;
+ }
return g_unichar_isalpha(uc) || g_utf8_strchr(checker->getWordchars(), -1, uc);
}