summaryrefslogtreecommitdiff
path: root/src/pwl.c
diff options
context:
space:
mode:
authorReuben Thomas <rrt@sc3d.org>2021-11-09 13:03:08 +0000
committerGitHub <noreply@github.com>2021-11-09 13:03:08 +0000
commite0d44f3d0e2ec9b40c7662fda7a6e10465028e4f (patch)
tree7236a0654ad13c95f756ec8db5a7fb9072b99062 /src/pwl.c
parent9bd442f2e48cd4e605bbdf2f8a2e8c3ec02418ac (diff)
parent34eed0fbd502cc3980e7931dbdb4e9379b34252d (diff)
downloadenchant-e0d44f3d0e2ec9b40c7662fda7a6e10465028e4f.tar.gz
Merge pull request #291 from rrthomas/master
Minor fixes and cleanup
Diffstat (limited to 'src/pwl.c')
-rw-r--r--src/pwl.c112
1 files changed, 47 insertions, 65 deletions
diff --git a/src/pwl.c b/src/pwl.c
index db645d5..f6d8df5 100644
--- a/src/pwl.c
+++ b/src/pwl.c
@@ -1,5 +1,6 @@
/* enchant
* Copyright (C) 2003, 2004 Dom Lachowicz
+ * Copyright (C) 2016-2021 Reuben Thomas <rrt@sc3d.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -75,7 +76,7 @@ static const gunichar BOM = 0xfeff;
/* A PWL dictionary is stored as a Trie-like data structure EnchantTrie.
* The EnchantTrie datatype is completely recursive - all child nodes
- * are simply EnchantTrie pointers. This means that all functions
+ * are simply EnchantTrie pointers. This means that all functions
* that potentially modify a trie need to return the modified trie,
* as additional memory may have been allocated.
*
@@ -202,7 +203,7 @@ EnchantPWL* enchant_pwl_init(void)
*
* Returns: a new PWL object used to store/check/suggest words
* or NULL if the file cannot be opened or created
- */
+ */
EnchantPWL* enchant_pwl_init_with_file(const char * file)
{
g_return_val_if_fail (file != NULL, NULL);
@@ -233,13 +234,13 @@ static void enchant_pwl_refresh_from_file(EnchantPWL* pwl)
pwl->words_in_trie = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
FILE *f = g_fopen(pwl->filename, "r");
- if (!f)
+ if (!f)
return;
pwl->file_changed = stats.st_mtime;
enchant_lock_file (f);
-
+
char buffer[BUFSIZ + 1];
size_t line_number = 1;
for (; NULL != (fgets (buffer, sizeof (buffer), f)); ++line_number)
@@ -248,12 +249,12 @@ static void enchant_pwl_refresh_from_file(EnchantPWL* pwl)
if(line_number == 1 && BOM == g_utf8_get_char(line))
line = g_utf8_next_char(line);
- if(line[strlen(line)-1] != '\n' && !feof(f)) /* ignore lines longer than BUFSIZ. */
+ if(line[strlen(line)-1] != '\n' && !feof(f)) /* ignore lines longer than BUFSIZ. */
{
g_warning ("Line too long (ignored) in %s at line:%zu\n", pwl->filename, line_number);
while (NULL != (fgets (buffer, sizeof (buffer), f)))
{
- if (line[strlen(buffer)-1]=='\n')
+ if (line[strlen(buffer)-1]=='\n')
break;
}
continue;
@@ -268,7 +269,7 @@ static void enchant_pwl_refresh_from_file(EnchantPWL* pwl)
g_warning ("Bad UTF-8 sequence in %s at line:%zu\n", pwl->filename, line_number);
}
}
-
+
enchant_unlock_file (f);
fclose (f);
}
@@ -289,7 +290,7 @@ static void enchant_pwl_add_to_trie(EnchantPWL *pwl,
g_free (normalized_word);
return;
}
-
+
g_hash_table_insert (pwl->words_in_trie, normalized_word, g_strndup(word,len));
pwl->trie = enchant_trie_insert(pwl->trie, normalized_word);
@@ -308,7 +309,7 @@ static void enchant_pwl_remove_from_trie(EnchantPWL *pwl,
pwl->trie = NULL; /* make trie empty if has no content */
}
}
-
+
g_free(normalized_word);
}
@@ -346,12 +347,10 @@ void enchant_pwl_add(EnchantPWL *pwl,
}
if (fwrite (word, sizeof(char), len, f) == (size_t)len)
- {
- putc ('\n', f);
- }
+ putc ('\n', f);
enchant_unlock_file (f);
fclose (f);
- }
+ }
}
}
@@ -401,7 +400,7 @@ void enchant_pwl_remove(EnchantPWL *pwl,
fwrite (searchstart, sizeof(char), length - (searchstart - contents), f);
break;
}
- else
+ else
{
char* foundend = needle+len;
if((needle == filestart || contents[needle-contents-1] == '\n' || contents[needle-contents-1] == '\r') &&
@@ -419,7 +418,7 @@ void enchant_pwl_remove(EnchantPWL *pwl,
}
}
g_free(key);
-
+
GStatBuf stats;
if(g_stat(pwl->filename, &stats)==0)
pwl->file_changed = stats.st_mtime;
@@ -427,7 +426,7 @@ void enchant_pwl_remove(EnchantPWL *pwl,
enchant_unlock_file (f);
fclose (f);
- }
+ }
g_free(contents);
}
}
@@ -504,7 +503,7 @@ static _GL_ATTRIBUTE_PURE int enchant_is_title_case(const char * const word, siz
if ((type != G_UNICODE_UPPERCASE_LETTER && type != G_UNICODE_TITLECASE_LETTER) ||
ch != g_unichar_totitle(ch))
return 0;
-
+
for (const char* it = g_utf8_next_char(word); it < word + len; it = g_utf8_next_char(it))
{
type = g_unichar_type(g_utf8_get_char(it));
@@ -539,7 +538,7 @@ int enchant_pwl_check(EnchantPWL *pwl, const char *const word, ssize_t len)
enchant_pwl_refresh_from_file(pwl);
int exists = enchant_pwl_contains(pwl, word, len);
-
+
if(exists)
return 0;
@@ -572,8 +571,8 @@ static void enchant_pwl_check_cb(char* match,EnchantTrieMatcher* matcher)
(*((int*)(matcher->cbdata)))++;
}
-static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl,
- const char *const word, size_t len,
+static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl,
+ const char *const word, size_t len,
EnchantSuggList* suggs_list)
{
gchar* (*utf8_case_convert_function)(const gchar*str, gssize len) = NULL;
@@ -581,7 +580,7 @@ static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl,
utf8_case_convert_function = enchant_utf8_strtitle;
else if (enchant_is_all_caps(word, len))
utf8_case_convert_function = g_utf8_strup;
-
+
for (size_t i = 0; i < suggs_list->n_suggs; ++i)
{
gchar* suggestion = g_hash_table_lookup (pwl->words_in_trie, suggs_list->suggs[i]);
@@ -592,7 +591,7 @@ static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl,
cased_suggestion = utf8_case_convert_function(suggestion, suggestion_len);
else
cased_suggestion = g_strndup(suggestion, suggestion_len);
-
+
g_free(suggs_list->suggs[i]);
suggs_list->suggs[i] = cased_suggestion;
}
@@ -615,7 +614,7 @@ static int best_distance(char** suggs, const char *const word, size_t len)
return best_dist;
}
-/* gives the best set of suggestions from pwl that are at least as good as the
+/* gives the best set of suggestions from pwl that are at least as good as the
* given suggs (if suggs == NULL just best from pwl) */
char** enchant_pwl_suggest(EnchantPWL *pwl, const char *const word,
ssize_t len, char** suggs, size_t* out_n_suggs)
@@ -645,7 +644,7 @@ char** enchant_pwl_suggest(EnchantPWL *pwl, const char *const word,
(*out_n_suggs) = sugg_list.n_suggs;
enchant_pwl_case_and_denormalize_suggestions(pwl, word, len, &sugg_list);
-
+
return sugg_list.suggs;
}
@@ -663,9 +662,8 @@ static void enchant_pwl_suggest_cb(char* match,EnchantTrieMatcher* matcher)
size_t loc;
for(loc=0; loc < sugg_list->n_suggs; loc++) {
/* Better than an existing suggestion, so stop */
- if(sugg_list->sugg_errs[loc] > matcher->num_errors) {
+ if(sugg_list->sugg_errs[loc] > matcher->num_errors)
break;
- }
/* Already in the list with better score, just return */
if(strcmp(match,sugg_list->suggs[loc])==0) {
g_free(match);
@@ -679,7 +677,7 @@ static void enchant_pwl_suggest_cb(char* match,EnchantTrieMatcher* matcher)
}
int changes = 1; /* num words added to list */
-
+
/* Remove all elements with worse score */
for(size_t i=loc; i < sugg_list->n_suggs; i++){
g_free(sugg_list->suggs[i]);
@@ -694,9 +692,8 @@ static void enchant_pwl_suggest_cb(char* match,EnchantTrieMatcher* matcher)
static void enchant_trie_free(EnchantTrie* trie)
{
/* Don't try to free NULL or the EOSTrie pointer */
- if(trie == NULL || trie == EOSTrie) {
+ if(trie == NULL || trie == EOSTrie)
return;
- }
/* Because we have not set a destroy function for the hashtable
* (to make code cleaner below), we need to explicitly free all
@@ -720,9 +717,8 @@ static void enchant_trie_free_cb(void* key _GL_UNUSED_PARAMETER,
static EnchantTrie* enchant_trie_insert(EnchantTrie* trie,const char *const word)
{
- if (trie == NULL) {
+ if (trie == NULL)
trie = g_new0(EnchantTrie, 1);
- }
if (trie->value == NULL) {
if (trie->subtries == NULL) {
@@ -744,7 +740,7 @@ static EnchantTrie* enchant_trie_insert(EnchantTrie* trie,const char *const word
}
} else {
/* Create new hash table for subtries, and reinsert */
- trie->subtries = g_hash_table_new_full(g_str_hash,
+ trie->subtries = g_hash_table_new_full(g_str_hash,
g_str_equal, g_free, NULL);
char *tmpWord = trie->value;
trie->value = NULL;
@@ -764,10 +760,10 @@ static void enchant_trie_remove(EnchantTrie* trie,const char *const word)
if (trie->value == NULL) {
if (trie->subtries != NULL) {
/* Store multiple words in subtries */
- if (word[0] == '\0') {
+ if (word[0] == '\0')
/* End-of-string is marked with special node */
g_hash_table_remove(trie->subtries, "");
- } else {
+ else {
ssize_t nxtCh = (ssize_t)(g_utf8_next_char(word) - word);
char *tmpWord = g_strndup(word, nxtCh);
EnchantTrie *subtrie = g_hash_table_lookup(trie->subtries, tmpWord);
@@ -799,16 +795,13 @@ static void enchant_trie_remove(EnchantTrie* trie,const char *const word)
g_list_free(keys);
}
}
- } else {
- if(strcmp(trie->value, word) == 0)
- {
- g_free(trie->value);
- trie->value = NULL;
- }
+ } else if(strcmp(trie->value, word) == 0) {
+ g_free(trie->value);
+ trie->value = NULL;
}
}
-static EnchantTrie* enchant_trie_get_subtrie(EnchantTrie* trie,
+static EnchantTrie* enchant_trie_get_subtrie(EnchantTrie* trie,
EnchantTrieMatcher* matcher,
char** nxtChS)
{
@@ -830,9 +823,8 @@ static void enchant_trie_find_matches(EnchantTrie* trie,EnchantTrieMatcher *matc
g_return_if_fail(matcher);
/* Can't match in the empty trie */
- if(trie == NULL) {
+ if(trie == NULL)
return;
- }
/* Bail out if over the error limits */
if(matcher->num_errors > matcher->max_errors){
@@ -843,12 +835,10 @@ static void enchant_trie_find_matches(EnchantTrie* trie,EnchantTrieMatcher *matc
if (trie == EOSTrie) {
size_t word_len = strlen(matcher->word);
int errs = matcher->num_errors;
- if((ssize_t)word_len > matcher->word_pos) {
+ if((ssize_t)word_len > matcher->word_pos)
matcher->num_errors = errs + word_len - matcher->word_pos;
- }
- if (matcher->num_errors <= matcher->max_errors) {
+ if (matcher->num_errors <= matcher->max_errors)
matcher->cbfunc(g_strdup(matcher->path),matcher);
- }
matcher->num_errors = errs;
return;
}
@@ -859,21 +849,16 @@ static void enchant_trie_find_matches(EnchantTrie* trie,EnchantTrieMatcher *matc
int errs = matcher->num_errors;
value = trie->value;
if(matcher->mode == case_insensitive)
- {
- value = g_utf8_strdown(value, -1);
- }
- matcher->num_errors = errs + edit_dist(value,
+ value = g_utf8_strdown(value, -1);
+ matcher->num_errors = errs + edit_dist(value,
&(matcher->word[matcher->word_pos]));
if(matcher->mode == case_insensitive)
- {
- g_free(value);
- }
+ g_free(value);
- if (matcher->num_errors <= matcher->max_errors) {
+ if (matcher->num_errors <= matcher->max_errors)
matcher->cbfunc(g_strconcat(matcher->path,
trie->value,NULL),
matcher);
- }
matcher->num_errors = errs;
return;
}
@@ -918,10 +903,9 @@ static void enchant_trie_find_matches_cb(void* keyV,void* subtrieV,void* matcher
ssize_t nxtChI = (ssize_t) (g_utf8_next_char(&matcher->word[matcher->word_pos]) - matcher->word);
- /* Dont handle actual matches, that's already done */
- if (strncmp(key,&matcher->word[matcher->word_pos],nxtChI-matcher->word_pos) == 0) {
+ /* Don't handle actual matches, that's already done */
+ if (strncmp(key,&matcher->word[matcher->word_pos],nxtChI-matcher->word_pos) == 0)
return;
- }
enchant_trie_matcher_pushpath(matcher,key);
@@ -952,7 +936,7 @@ static void enchant_trie_find_matches_cb(void* keyV,void* subtrieV,void* matcher
}
g_free(key2);
-
+
matcher->word_pos = oldPos;
}
@@ -1017,9 +1001,8 @@ static void enchant_trie_matcher_poppath(EnchantTrieMatcher* matcher,int num)
{
g_return_if_fail(matcher->path_pos >= 0);
matcher->path_pos = matcher->path_pos - num;
- if(matcher->path_pos < 0) {
+ if(matcher->path_pos < 0)
matcher->path_pos = 0;
- }
matcher->path[matcher->path_pos] = '\0';
}
@@ -1030,7 +1013,7 @@ static int edit_dist(const char* utf8word1, const char* utf8word2)
gunichar * word2 = g_utf8_to_ucs4_fast(utf8word2, -1, &len2);
int * table = g_new0(int, (len1+1)*(len2+1));
-
+
/* Initialise outer rows of table */
for (glong i = 0; i < len1 + 1; i++)
table[i*(len2+1)] = i;
@@ -1042,9 +1025,8 @@ static int edit_dist(const char* utf8word1, const char* utf8word2)
for (glong j = 1; j < len2+1; j++) {
int cost = word1[i-1] != word2[j-1];
int v1 = table[(i-1)*(len2+1)+j] + 1;
- if (i > 1 && j > 1 && word1[i-1] == word2[j-2] && word1[i-2] == word2[j-1]) {
+ if (i > 1 && j > 1 && word1[i-1] == word2[j-2] && word1[i-2] == word2[j-1])
v1 = MIN (v1, table[(i-2)*(len2+1)+(j-2)] + cost);
- }
int v2 = table[i*(len2+1)+(j-1)] + 1;
int v3 = table[(i-1)*(len2+1)+(j-1)] + cost;