summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorReuben Thomas <rrt@sc3d.org>2020-09-01 23:01:36 +0100
committerGitHub <noreply@github.com>2020-09-01 23:01:36 +0100
commit54655a7e50b52dd06745e11a7ff6f70807298e23 (patch)
tree673b82a780fedfb80ff85a9d0473bfccba4ac133
parentcb8c758626e87eea0da4e0e89acc02ec4d2b1566 (diff)
parent092af72d3d950a8cd240c72711025af43f61bb94 (diff)
downloadenchant-54655a7e50b52dd06745e11a7ff6f70807298e23.tar.gz
Merge pull request #252 from rrthomas/master
Fix Hunspell behaviour with apostrophes and some code clean-up
-rw-r--r--configure.ac2
m---------gnulib0
-rw-r--r--m4/.gitignore2
-rw-r--r--providers/enchant_hunspell.cpp31
-rw-r--r--src/enchant.c19
-rw-r--r--src/lib.c2
-rw-r--r--src/pwl.c2
-rw-r--r--tests/asan-suppressions.txt1
-rw-r--r--tests/enchant_providers/providers.test.cpp49
9 files changed, 50 insertions, 58 deletions
diff --git a/configure.ac b/configure.ac
index af388ca..18d988a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([enchant],[2.2.9])
+AC_INIT([enchant],[2.2.10])
AC_CONFIG_SRCDIR(src/enchant.h)
AC_CONFIG_AUX_DIR([build-aux])
AM_INIT_AUTOMAKE([subdir-objects])
diff --git a/gnulib b/gnulib
-Subproject 28098428d3d371a238837f338739283cf19dc65
+Subproject b58bf6ee39a6a114550a6bb68e7db5262c17f8b
diff --git a/m4/.gitignore b/m4/.gitignore
index fc96477..9df6a16 100644
--- a/m4/.gitignore
+++ b/m4/.gitignore
@@ -59,3 +59,5 @@
/manywarnings-c++.m4
/gnu-make.m4
/getopt.m4
+/pid_t.m4
+/zzgnulib.m4
diff --git a/providers/enchant_hunspell.cpp b/providers/enchant_hunspell.cpp
index 91fdce0..206e1e6 100644
--- a/providers/enchant_hunspell.cpp
+++ b/providers/enchant_hunspell.cpp
@@ -65,6 +65,7 @@ public:
bool checkWord (const char *word, size_t len);
char **suggestWord (const char* const word, size_t len, size_t *out_n_suggs);
const char *getWordchars ();
+ bool apostropheIsWordChar;
bool requestDictionary (const char * szLang);
@@ -83,7 +84,7 @@ g_iconv_is_valid(GIConv i)
}
HunspellChecker::HunspellChecker()
-: m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr)
+: apostropheIsWordChar(false), m_translate_in(nullptr), m_translate_out(nullptr), hunspell(nullptr)
{
}
@@ -114,7 +115,7 @@ HunspellChecker::checkWord(const char *utf8Word, size_t len)
if (static_cast<size_t>(-1) == result)
return false;
*out = '\0';
- if (hunspell->spell(word8))
+ if (hunspell->spell(std::string(word8)))
return true;
else
return false;
@@ -141,29 +142,23 @@ HunspellChecker::suggestWord(const char* const utf8Word, size_t len, size_t *nsu
return nullptr;
*out = '\0';
- char **sugMS;
- *nsug = hunspell->suggest(&sugMS, word8);
+ std::vector<std::string> sugMS = hunspell->suggest(word8);
+ *nsug = sugMS.size();
if (*nsug > 0) {
char **sug = g_new0 (char *, *nsug + 1);
for (size_t i=0; i<*nsug; i++) {
- in = sugMS[i];
+ in = const_cast<char *>(sugMS[i].c_str());
len_in = strlen(in);
len_out = MAXWORDLEN;
char *word = g_new0(char, len_out + 1);
out = word;
if (static_cast<size_t>(-1) == g_iconv(m_translate_out, &in, &len_in, &out, &len_out)) {
- for (size_t j = i; j < *nsug; j++)
- free(sugMS[j]);
- free(sugMS);
-
*nsug = i;
- return sug;
+ break;
}
- *(out) = 0;
+ *out = '\0';
sug[i] = word;
- free(sugMS[i]);
}
- free(sugMS);
return sug;
}
else
@@ -328,6 +323,10 @@ HunspellChecker::requestDictionary(const char *szLang)
m_translate_in = g_iconv_open(enc, "UTF-8");
m_translate_out = g_iconv_open("UTF-8", enc);
+ const char *word_chars = hunspell->get_wordchars();
+ apostropheIsWordChar = g_utf8_strchr(word_chars, -1, g_utf8_get_char("'")) ||
+ g_utf8_strchr(word_chars, -1, g_utf8_get_char("’"));
+
return true;
}
@@ -364,8 +363,12 @@ hunspell_dict_get_extra_word_characters (EnchantDict *me)
static int
hunspell_dict_is_word_character (EnchantDict *me, uint32_t uc, size_t n)
{
- (void)n;
HunspellChecker * checker = static_cast<HunspellChecker *>(me->user_data);
+ /* Accept quote marks anywhere except at the end of a word, as per
+ hunspell's textparser.cxx/TextParser::next_token */
+ if ((uc == g_utf8_get_char("'") || uc == g_utf8_get_char("’")) && checker->apostropheIsWordChar) {
+ return n < 2;
+ }
return g_unichar_isalpha(uc) || g_utf8_strchr(checker->getWordchars(), -1, uc);
}
diff --git a/src/enchant.c b/src/enchant.c
index bf00d8a..579b6a0 100644
--- a/src/enchant.c
+++ b/src/enchant.c
@@ -1,7 +1,7 @@
/* enchant
* Copyright (C) 2003 Dom Lachowicz
* 2007 Hannu Väisänen
- * 2016-2017 Reuben Thomas
+ * 2016-2020 Reuben Thomas
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -135,9 +135,6 @@ print_utf (const char * str)
static void
do_mode_a (EnchantDict * dict, GString * word, size_t start_pos, size_t lineCount, gboolean terse_mode)
{
- size_t n_suggs;
- char ** suggs;
-
if (word->len <= MIN_WORD_LENGTH || enchant_dict_check (dict, word->str, word->len) == 0) {
if (!terse_mode) {
if (lineCount)
@@ -145,27 +142,23 @@ do_mode_a (EnchantDict * dict, GString * word, size_t start_pos, size_t lineCoun
else
printf ("*\n");
}
- }
- else {
- suggs = enchant_dict_suggest (dict, word->str,
- word->len, &n_suggs);
+ } else {
+ size_t n_suggs;
+ char ** suggs = enchant_dict_suggest (dict, word->str, word->len, &n_suggs);
if (!n_suggs || !suggs) {
printf ("# ");
if (lineCount)
printf ("%u ", (unsigned int)lineCount);
print_utf (word->str);
printf (" %u\n", (unsigned int)start_pos);
- }
- else {
- size_t i = 0;
-
+ } else {
printf ("& ");
if (lineCount)
printf ("%u ", (unsigned int)lineCount);
print_utf (word->str);
printf (" %u %u:", (unsigned int)n_suggs, (unsigned int)start_pos);
- for (i = 0; i < n_suggs; i++) {
+ for (size_t i = 0; i < n_suggs; i++) {
putchar (' ');
print_utf (suggs[i]);
diff --git a/src/lib.c b/src/lib.c
index dae11d9..5a5cb5a 100644
--- a/src/lib.c
+++ b/src/lib.c
@@ -1358,7 +1358,7 @@ enchant_dict_is_word_character (EnchantDict * dict, uint32_t uc_in, size_t n)
case G_UNICODE_TITLECASE_LETTER:
case G_UNICODE_UPPERCASE_LETTER:
case G_UNICODE_OTHER_LETTER:
- case G_UNICODE_COMBINING_MARK: /* Older name for G_UNICODE_SPACING_MARK; deprecated since glib 2.30 */
+ case G_UNICODE_SPACING_MARK:
case G_UNICODE_ENCLOSING_MARK:
case G_UNICODE_NON_SPACING_MARK:
case G_UNICODE_DECIMAL_NUMBER:
diff --git a/src/pwl.c b/src/pwl.c
index ecd7eed..4f118a1 100644
--- a/src/pwl.c
+++ b/src/pwl.c
@@ -461,7 +461,7 @@ static int enchant_is_all_caps(const char*const word, size_t len)
case G_UNICODE_SURROGATE:
case G_UNICODE_MODIFIER_LETTER:
case G_UNICODE_OTHER_LETTER:
- case G_UNICODE_COMBINING_MARK:
+ case G_UNICODE_SPACING_MARK:
case G_UNICODE_ENCLOSING_MARK:
case G_UNICODE_NON_SPACING_MARK:
case G_UNICODE_DECIMAL_NUMBER:
diff --git a/tests/asan-suppressions.txt b/tests/asan-suppressions.txt
index 89dc7cd..f066ce2 100644
--- a/tests/asan-suppressions.txt
+++ b/tests/asan-suppressions.txt
@@ -3,3 +3,4 @@ leak:getdelim
# gnulib
leak:set_this_relocation_prefix
+leak:set_relocation_prefix
diff --git a/tests/enchant_providers/providers.test.cpp b/tests/enchant_providers/providers.test.cpp
index 16f7f49..7d72faf 100644
--- a/tests/enchant_providers/providers.test.cpp
+++ b/tests/enchant_providers/providers.test.cpp
@@ -95,36 +95,29 @@ int Test(char* path)
int TestProvidersInDirectory(char * dir_name)
{
- GDir *dir;
- G_CONST_RETURN char *dir_entry;
- size_t entry_len, g_module_suffix_len;
+ GDir *dir = g_dir_open (dir_name, 0, NULL);
+ if (!dir)
+ return 0;
- char * filename;
- int result = 0;
-
- dir = g_dir_open (dir_name, 0, NULL);
- if (!dir)
- return 0;
-
- g_module_suffix_len = strlen (G_MODULE_SUFFIX);
+ size_t g_module_suffix_len = strlen (G_MODULE_SUFFIX);
- while ((dir_entry = g_dir_read_name (dir)) != NULL)
- {
- entry_len = strlen (dir_entry);
- if ((entry_len > g_module_suffix_len) &&
- !strcmp(dir_entry+(entry_len-g_module_suffix_len), G_MODULE_SUFFIX))
- {
- filename = g_build_filename (dir_name, dir_entry, NULL);
- int resultT = Test(filename);
- if(resultT != 0)
- {
- result = resultT;
- }
- g_free (filename);
- }
- }
-
- g_dir_close (dir);
+ int result = 0;
+ const char *dir_entry;
+ while ((dir_entry = g_dir_read_name (dir)) != NULL)
+ {
+ size_t entry_len = strlen (dir_entry);
+ if ((entry_len > g_module_suffix_len) &&
+ !strcmp(dir_entry + (entry_len - g_module_suffix_len), G_MODULE_SUFFIX))
+ {
+ char *filename = g_build_filename (dir_name, dir_entry, NULL);
+ int resultT = Test(filename);
+ if(resultT != 0)
+ result = resultT;
+ g_free (filename);
+ }
+ }
+
+ g_dir_close (dir);
return result;
}