diff options
author | Reuben Thomas <rrt@sc3d.org> | 2020-09-01 23:01:36 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-01 23:01:36 +0100 |
commit | 54655a7e50b52dd06745e11a7ff6f70807298e23 (patch) | |
tree | 673b82a780fedfb80ff85a9d0473bfccba4ac133 | |
parent | cb8c758626e87eea0da4e0e89acc02ec4d2b1566 (diff) | |
parent | 092af72d3d950a8cd240c72711025af43f61bb94 (diff) | |
download | enchant-54655a7e50b52dd06745e11a7ff6f70807298e23.tar.gz |
Merge pull request #252 from rrthomas/master
Fix Hunspell behaviour with apostrophes and some code clean-up
-rw-r--r-- | configure.ac | 2 | ||||
m--------- | gnulib | 0 | ||||
-rw-r--r-- | m4/.gitignore | 2 | ||||
-rw-r--r-- | providers/enchant_hunspell.cpp | 31 | ||||
-rw-r--r-- | src/enchant.c | 19 | ||||
-rw-r--r-- | src/lib.c | 2 | ||||
-rw-r--r-- | src/pwl.c | 2 | ||||
-rw-r--r-- | tests/asan-suppressions.txt | 1 | ||||
-rw-r--r-- | tests/enchant_providers/providers.test.cpp | 49 |
9 files changed, 50 insertions, 58 deletions
diff --git a/configure.ac b/configure.ac index af388ca..18d988a 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([enchant],[2.2.9]) +AC_INIT([enchant],[2.2.10]) AC_CONFIG_SRCDIR(src/enchant.h) AC_CONFIG_AUX_DIR([build-aux]) AM_INIT_AUTOMAKE([subdir-objects]) diff --git a/gnulib b/gnulib -Subproject 28098428d3d371a238837f338739283cf19dc65 +Subproject b58bf6ee39a6a114550a6bb68e7db5262c17f8b diff --git a/m4/.gitignore b/m4/.gitignore index fc96477..9df6a16 100644 --- a/m4/.gitignore +++ b/m4/.gitignore @@ -59,3 +59,5 @@ /manywarnings-c++.m4 /gnu-make.m4 /getopt.m4 +/pid_t.m4 +/zzgnulib.m4 diff --git a/providers/enchant_hunspell.cpp b/providers/enchant_hunspell.cpp index 91fdce0..206e1e6 100644 --- a/providers/enchant_hunspell.cpp +++ b/providers/enchant_hunspell.cpp @@ -65,6 +65,7 @@ public: bool checkWord (const char *word, size_t len); char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs); const char *getWordchars (); + bool apostropheIsWordChar; bool requestDictionary (const char * szLang); @@ -83,7 +84,7 @@ g_iconv_is_valid(GIConv i) } HunspellChecker::HunspellChecker() -: m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr) +: apostropheIsWordChar(false), m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr) { } @@ -114,7 +115,7 @@ HunspellChecker::checkWord(const char *utf8Word, size_t len) if (static_cast<size_t>(-1) == result) return false; *out = '\0'; - if (hunspell->spell(word8)) + if (hunspell->spell(std::string(word8))) return true; else return false; @@ -141,29 +142,23 @@ HunspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsu return nullptr; *out = '\0'; - char **sugMS; - *nsug = hunspell->suggest(&sugMS, word8); + std::vector<std::string> sugMS = hunspell->suggest(word8); + *nsug = sugMS.size(); if (*nsug > 0) { char **sug = g_new0 (char *, *nsug + 1); for (size_t i=0; i<*nsug; i++) { - in = sugMS[i]; + in = const_cast<char *>(sugMS[i].c_str()); len_in = strlen(in); len_out = MAXWORDLEN; char *word = g_new0(char, len_out + 1); out = word; if (static_cast<size_t>(-1) == g_iconv(m_translate_out, &in, &len_in, &out, &len_out)) { - for (size_t j = i; j < *nsug; j++) - free(sugMS[j]); - free(sugMS); - *nsug = i; - return sug; + break; } - *(out) = 0; + *out = '\0'; sug[i] = word; - free(sugMS[i]); } - free(sugMS); return sug; } else @@ -328,6 +323,10 @@ HunspellChecker::requestDictionary(const char *szLang) m_translate_in = g_iconv_open(enc, "UTF-8"); m_translate_out = g_iconv_open("UTF-8", enc); + const char *word_chars = hunspell->get_wordchars(); + apostropheIsWordChar = g_utf8_strchr(word_chars, -1, g_utf8_get_char("'")) || + g_utf8_strchr(word_chars, -1, g_utf8_get_char("’")); + return true; } @@ -364,8 +363,12 @@ hunspell_dict_get_extra_word_characters (EnchantDict *me) static int hunspell_dict_is_word_character (EnchantDict *me, uint32_t uc, size_t n) { - (void)n; HunspellChecker * checker = static_cast<HunspellChecker *>(me->user_data); + /* Accept quote marks anywhere except at the end of a word, as per + hunspell's textparser.cxx/TextParser::next_token */ + if ((uc == g_utf8_get_char("'") || uc == g_utf8_get_char("’")) && checker->apostropheIsWordChar) { + return n < 2; + } return g_unichar_isalpha(uc) || g_utf8_strchr(checker->getWordchars(), -1, uc); } diff --git a/src/enchant.c b/src/enchant.c index bf00d8a..579b6a0 100644 --- a/src/enchant.c +++ b/src/enchant.c @@ -1,7 +1,7 @@ /* enchant * Copyright (C) 2003 Dom Lachowicz * 2007 Hannu Väisänen - * 2016-2017 Reuben Thomas + * 2016-2020 Reuben Thomas * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -135,9 +135,6 @@ print_utf (const char * str) static void do_mode_a (EnchantDict * dict, GString * word, size_t start_pos, size_t lineCount, gboolean terse_mode) { - size_t n_suggs; - char ** suggs; - if (word->len <= MIN_WORD_LENGTH || enchant_dict_check (dict, word->str, word->len) == 0) { if (!terse_mode) { if (lineCount) @@ -145,27 +142,23 @@ do_mode_a (EnchantDict * dict, GString * word, size_t start_pos, size_t lineCoun else printf ("*\n"); } - } - else { - suggs = enchant_dict_suggest (dict, word->str, - word->len, &n_suggs); + } else { + size_t n_suggs; + char ** suggs = enchant_dict_suggest (dict, word->str, word->len, &n_suggs); if (!n_suggs || !suggs) { printf ("# "); if (lineCount) printf ("%u ", (unsigned int)lineCount); print_utf (word->str); printf (" %u\n", (unsigned int)start_pos); - } - else { - size_t i = 0; - + } else { printf ("& "); if (lineCount) printf ("%u ", (unsigned int)lineCount); print_utf (word->str); printf (" %u %u:", (unsigned int)n_suggs, (unsigned int)start_pos); - for (i = 0; i < n_suggs; i++) { + for (size_t i = 0; i < n_suggs; i++) { putchar (' '); print_utf (suggs[i]); @@ -1358,7 +1358,7 @@ enchant_dict_is_word_character (EnchantDict * dict, uint32_t uc_in, size_t n) case G_UNICODE_TITLECASE_LETTER: case G_UNICODE_UPPERCASE_LETTER: case G_UNICODE_OTHER_LETTER: - case G_UNICODE_COMBINING_MARK: /* Older name for G_UNICODE_SPACING_MARK; deprecated since glib 2.30 */ + case G_UNICODE_SPACING_MARK: case G_UNICODE_ENCLOSING_MARK: case G_UNICODE_NON_SPACING_MARK: case G_UNICODE_DECIMAL_NUMBER: @@ -461,7 +461,7 @@ static int enchant_is_all_caps(const char*const word, size_t len) case G_UNICODE_SURROGATE: case G_UNICODE_MODIFIER_LETTER: case G_UNICODE_OTHER_LETTER: - case G_UNICODE_COMBINING_MARK: + case G_UNICODE_SPACING_MARK: case G_UNICODE_ENCLOSING_MARK: case G_UNICODE_NON_SPACING_MARK: case G_UNICODE_DECIMAL_NUMBER: diff --git a/tests/asan-suppressions.txt b/tests/asan-suppressions.txt index 89dc7cd..f066ce2 100644 --- a/tests/asan-suppressions.txt +++ b/tests/asan-suppressions.txt @@ -3,3 +3,4 @@ leak:getdelim # gnulib leak:set_this_relocation_prefix +leak:set_relocation_prefix diff --git a/tests/enchant_providers/providers.test.cpp b/tests/enchant_providers/providers.test.cpp index 16f7f49..7d72faf 100644 --- a/tests/enchant_providers/providers.test.cpp +++ b/tests/enchant_providers/providers.test.cpp @@ -95,36 +95,29 @@ int Test(char* path) int TestProvidersInDirectory(char * dir_name)
{
- GDir *dir;
- G_CONST_RETURN char *dir_entry;
- size_t entry_len, g_module_suffix_len;
+ GDir *dir = g_dir_open (dir_name, 0, NULL);
+ if (!dir)
+ return 0;
- char * filename;
- int result = 0;
-
- dir = g_dir_open (dir_name, 0, NULL);
- if (!dir)
- return 0;
-
- g_module_suffix_len = strlen (G_MODULE_SUFFIX);
+ size_t g_module_suffix_len = strlen (G_MODULE_SUFFIX);
- while ((dir_entry = g_dir_read_name (dir)) != NULL)
- {
- entry_len = strlen (dir_entry);
- if ((entry_len > g_module_suffix_len) &&
- !strcmp(dir_entry+(entry_len-g_module_suffix_len), G_MODULE_SUFFIX))
- {
- filename = g_build_filename (dir_name, dir_entry, NULL);
- int resultT = Test(filename);
- if(resultT != 0)
- {
- result = resultT;
- }
- g_free (filename);
- }
- }
-
- g_dir_close (dir);
+ int result = 0;
+ const char *dir_entry;
+ while ((dir_entry = g_dir_read_name (dir)) != NULL)
+ {
+ size_t entry_len = strlen (dir_entry);
+ if ((entry_len > g_module_suffix_len) &&
+ !strcmp(dir_entry + (entry_len - g_module_suffix_len), G_MODULE_SUFFIX))
+ {
+ char *filename = g_build_filename (dir_name, dir_entry, NULL);
+ int resultT = Test(filename);
+ if(resultT != 0)
+ result = resultT;
+ g_free (filename);
+ }
+ }
+
+ g_dir_close (dir);
return result;
}
|