diff options
author | Reuben Thomas <rrt@sc3d.org> | 2021-11-09 13:03:08 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-09 13:03:08 +0000 |
commit | e0d44f3d0e2ec9b40c7662fda7a6e10465028e4f (patch) | |
tree | 7236a0654ad13c95f756ec8db5a7fb9072b99062 /src/pwl.c | |
parent | 9bd442f2e48cd4e605bbdf2f8a2e8c3ec02418ac (diff) | |
parent | 34eed0fbd502cc3980e7931dbdb4e9379b34252d (diff) | |
download | enchant-e0d44f3d0e2ec9b40c7662fda7a6e10465028e4f.tar.gz |
Merge pull request #291 from rrthomas/master
Minor fixes and cleanup
Diffstat (limited to 'src/pwl.c')
-rw-r--r-- | src/pwl.c | 112 |
1 files changed, 47 insertions, 65 deletions
@@ -1,5 +1,6 @@ /* enchant * Copyright (C) 2003, 2004 Dom Lachowicz + * Copyright (C) 2016-2021 Reuben Thomas <rrt@sc3d.org> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -75,7 +76,7 @@ static const gunichar BOM = 0xfeff; /* A PWL dictionary is stored as a Trie-like data structure EnchantTrie. * The EnchantTrie datatype is completely recursive - all child nodes - * are simply EnchantTrie pointers. This means that all functions + * are simply EnchantTrie pointers. This means that all functions * that potentially modify a trie need to return the modified trie, * as additional memory may have been allocated. * @@ -202,7 +203,7 @@ EnchantPWL* enchant_pwl_init(void) * * Returns: a new PWL object used to store/check/suggest words * or NULL if the file cannot be opened or created - */ + */ EnchantPWL* enchant_pwl_init_with_file(const char * file) { g_return_val_if_fail (file != NULL, NULL); @@ -233,13 +234,13 @@ static void enchant_pwl_refresh_from_file(EnchantPWL* pwl) pwl->words_in_trie = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free); FILE *f = g_fopen(pwl->filename, "r"); - if (!f) + if (!f) return; pwl->file_changed = stats.st_mtime; enchant_lock_file (f); - + char buffer[BUFSIZ + 1]; size_t line_number = 1; for (; NULL != (fgets (buffer, sizeof (buffer), f)); ++line_number) @@ -248,12 +249,12 @@ static void enchant_pwl_refresh_from_file(EnchantPWL* pwl) if(line_number == 1 && BOM == g_utf8_get_char(line)) line = g_utf8_next_char(line); - if(line[strlen(line)-1] != '\n' && !feof(f)) /* ignore lines longer than BUFSIZ. */ + if(line[strlen(line)-1] != '\n' && !feof(f)) /* ignore lines longer than BUFSIZ. */ { g_warning ("Line too long (ignored) in %s at line:%zu\n", pwl->filename, line_number); while (NULL != (fgets (buffer, sizeof (buffer), f))) { - if (line[strlen(buffer)-1]=='\n') + if (line[strlen(buffer)-1]=='\n') break; } continue; @@ -268,7 +269,7 @@ static void enchant_pwl_refresh_from_file(EnchantPWL* pwl) g_warning ("Bad UTF-8 sequence in %s at line:%zu\n", pwl->filename, line_number); } } - + enchant_unlock_file (f); fclose (f); } @@ -289,7 +290,7 @@ static void enchant_pwl_add_to_trie(EnchantPWL *pwl, g_free (normalized_word); return; } - + g_hash_table_insert (pwl->words_in_trie, normalized_word, g_strndup(word,len)); pwl->trie = enchant_trie_insert(pwl->trie, normalized_word); @@ -308,7 +309,7 @@ static void enchant_pwl_remove_from_trie(EnchantPWL *pwl, pwl->trie = NULL; /* make trie empty if has no content */ } } - + g_free(normalized_word); } @@ -346,12 +347,10 @@ void enchant_pwl_add(EnchantPWL *pwl, } if (fwrite (word, sizeof(char), len, f) == (size_t)len) - { - putc ('\n', f); - } + putc ('\n', f); enchant_unlock_file (f); fclose (f); - } + } } } @@ -401,7 +400,7 @@ void enchant_pwl_remove(EnchantPWL *pwl, fwrite (searchstart, sizeof(char), length - (searchstart - contents), f); break; } - else + else { char* foundend = needle+len; if((needle == filestart || contents[needle-contents-1] == '\n' || contents[needle-contents-1] == '\r') && @@ -419,7 +418,7 @@ void enchant_pwl_remove(EnchantPWL *pwl, } } g_free(key); - + GStatBuf stats; if(g_stat(pwl->filename, &stats)==0) pwl->file_changed = stats.st_mtime; @@ -427,7 +426,7 @@ void enchant_pwl_remove(EnchantPWL *pwl, enchant_unlock_file (f); fclose (f); - } + } g_free(contents); } } @@ -504,7 +503,7 @@ static _GL_ATTRIBUTE_PURE int enchant_is_title_case(const char * const word, siz if ((type != G_UNICODE_UPPERCASE_LETTER && type != G_UNICODE_TITLECASE_LETTER) || ch != g_unichar_totitle(ch)) return 0; - + for (const char* it = g_utf8_next_char(word); it < word + len; it = g_utf8_next_char(it)) { type = g_unichar_type(g_utf8_get_char(it)); @@ -539,7 +538,7 @@ int enchant_pwl_check(EnchantPWL *pwl, const char *const word, ssize_t len) enchant_pwl_refresh_from_file(pwl); int exists = enchant_pwl_contains(pwl, word, len); - + if(exists) return 0; @@ -572,8 +571,8 @@ static void enchant_pwl_check_cb(char* match,EnchantTrieMatcher* matcher) (*((int*)(matcher->cbdata)))++; } -static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl, - const char *const word, size_t len, +static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl, + const char *const word, size_t len, EnchantSuggList* suggs_list) { gchar* (*utf8_case_convert_function)(const gchar*str, gssize len) = NULL; @@ -581,7 +580,7 @@ static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl, utf8_case_convert_function = enchant_utf8_strtitle; else if (enchant_is_all_caps(word, len)) utf8_case_convert_function = g_utf8_strup; - + for (size_t i = 0; i < suggs_list->n_suggs; ++i) { gchar* suggestion = g_hash_table_lookup (pwl->words_in_trie, suggs_list->suggs[i]); @@ -592,7 +591,7 @@ static void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl, cased_suggestion = utf8_case_convert_function(suggestion, suggestion_len); else cased_suggestion = g_strndup(suggestion, suggestion_len); - + g_free(suggs_list->suggs[i]); suggs_list->suggs[i] = cased_suggestion; } @@ -615,7 +614,7 @@ static int best_distance(char** suggs, const char *const word, size_t len) return best_dist; } -/* gives the best set of suggestions from pwl that are at least as good as the +/* gives the best set of suggestions from pwl that are at least as good as the * given suggs (if suggs == NULL just best from pwl) */ char** enchant_pwl_suggest(EnchantPWL *pwl, const char *const word, ssize_t len, char** suggs, size_t* out_n_suggs) @@ -645,7 +644,7 @@ char** enchant_pwl_suggest(EnchantPWL *pwl, const char *const word, (*out_n_suggs) = sugg_list.n_suggs; enchant_pwl_case_and_denormalize_suggestions(pwl, word, len, &sugg_list); - + return sugg_list.suggs; } @@ -663,9 +662,8 @@ static void enchant_pwl_suggest_cb(char* match,EnchantTrieMatcher* matcher) size_t loc; for(loc=0; loc < sugg_list->n_suggs; loc++) { /* Better than an existing suggestion, so stop */ - if(sugg_list->sugg_errs[loc] > matcher->num_errors) { + if(sugg_list->sugg_errs[loc] > matcher->num_errors) break; - } /* Already in the list with better score, just return */ if(strcmp(match,sugg_list->suggs[loc])==0) { g_free(match); @@ -679,7 +677,7 @@ static void enchant_pwl_suggest_cb(char* match,EnchantTrieMatcher* matcher) } int changes = 1; /* num words added to list */ - + /* Remove all elements with worse score */ for(size_t i=loc; i < sugg_list->n_suggs; i++){ g_free(sugg_list->suggs[i]); @@ -694,9 +692,8 @@ static void enchant_pwl_suggest_cb(char* match,EnchantTrieMatcher* matcher) static void enchant_trie_free(EnchantTrie* trie) { /* Don't try to free NULL or the EOSTrie pointer */ - if(trie == NULL || trie == EOSTrie) { + if(trie == NULL || trie == EOSTrie) return; - } /* Because we have not set a destroy function for the hashtable * (to make code cleaner below), we need to explicitly free all @@ -720,9 +717,8 @@ static void enchant_trie_free_cb(void* key _GL_UNUSED_PARAMETER, static EnchantTrie* enchant_trie_insert(EnchantTrie* trie,const char *const word) { - if (trie == NULL) { + if (trie == NULL) trie = g_new0(EnchantTrie, 1); - } if (trie->value == NULL) { if (trie->subtries == NULL) { @@ -744,7 +740,7 @@ static EnchantTrie* enchant_trie_insert(EnchantTrie* trie,const char *const word } } else { /* Create new hash table for subtries, and reinsert */ - trie->subtries = g_hash_table_new_full(g_str_hash, + trie->subtries = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); char *tmpWord = trie->value; trie->value = NULL; @@ -764,10 +760,10 @@ static void enchant_trie_remove(EnchantTrie* trie,const char *const word) if (trie->value == NULL) { if (trie->subtries != NULL) { /* Store multiple words in subtries */ - if (word[0] == '\0') { + if (word[0] == '\0') /* End-of-string is marked with special node */ g_hash_table_remove(trie->subtries, ""); - } else { + else { ssize_t nxtCh = (ssize_t)(g_utf8_next_char(word) - word); char *tmpWord = g_strndup(word, nxtCh); EnchantTrie *subtrie = g_hash_table_lookup(trie->subtries, tmpWord); @@ -799,16 +795,13 @@ static void enchant_trie_remove(EnchantTrie* trie,const char *const word) g_list_free(keys); } } - } else { - if(strcmp(trie->value, word) == 0) - { - g_free(trie->value); - trie->value = NULL; - } + } else if(strcmp(trie->value, word) == 0) { + g_free(trie->value); + trie->value = NULL; } } -static EnchantTrie* enchant_trie_get_subtrie(EnchantTrie* trie, +static EnchantTrie* enchant_trie_get_subtrie(EnchantTrie* trie, EnchantTrieMatcher* matcher, char** nxtChS) { @@ -830,9 +823,8 @@ static void enchant_trie_find_matches(EnchantTrie* trie,EnchantTrieMatcher *matc g_return_if_fail(matcher); /* Can't match in the empty trie */ - if(trie == NULL) { + if(trie == NULL) return; - } /* Bail out if over the error limits */ if(matcher->num_errors > matcher->max_errors){ @@ -843,12 +835,10 @@ static void enchant_trie_find_matches(EnchantTrie* trie,EnchantTrieMatcher *matc if (trie == EOSTrie) { size_t word_len = strlen(matcher->word); int errs = matcher->num_errors; - if((ssize_t)word_len > matcher->word_pos) { + if((ssize_t)word_len > matcher->word_pos) matcher->num_errors = errs + word_len - matcher->word_pos; - } - if (matcher->num_errors <= matcher->max_errors) { + if (matcher->num_errors <= matcher->max_errors) matcher->cbfunc(g_strdup(matcher->path),matcher); - } matcher->num_errors = errs; return; } @@ -859,21 +849,16 @@ static void enchant_trie_find_matches(EnchantTrie* trie,EnchantTrieMatcher *matc int errs = matcher->num_errors; value = trie->value; if(matcher->mode == case_insensitive) - { - value = g_utf8_strdown(value, -1); - } - matcher->num_errors = errs + edit_dist(value, + value = g_utf8_strdown(value, -1); + matcher->num_errors = errs + edit_dist(value, &(matcher->word[matcher->word_pos])); if(matcher->mode == case_insensitive) - { - g_free(value); - } + g_free(value); - if (matcher->num_errors <= matcher->max_errors) { + if (matcher->num_errors <= matcher->max_errors) matcher->cbfunc(g_strconcat(matcher->path, trie->value,NULL), matcher); - } matcher->num_errors = errs; return; } @@ -918,10 +903,9 @@ static void enchant_trie_find_matches_cb(void* keyV,void* subtrieV,void* matcher ssize_t nxtChI = (ssize_t) (g_utf8_next_char(&matcher->word[matcher->word_pos]) - matcher->word); - /* Dont handle actual matches, that's already done */ - if (strncmp(key,&matcher->word[matcher->word_pos],nxtChI-matcher->word_pos) == 0) { + /* Don't handle actual matches, that's already done */ + if (strncmp(key,&matcher->word[matcher->word_pos],nxtChI-matcher->word_pos) == 0) return; - } enchant_trie_matcher_pushpath(matcher,key); @@ -952,7 +936,7 @@ static void enchant_trie_find_matches_cb(void* keyV,void* subtrieV,void* matcher } g_free(key2); - + matcher->word_pos = oldPos; } @@ -1017,9 +1001,8 @@ static void enchant_trie_matcher_poppath(EnchantTrieMatcher* matcher,int num) { g_return_if_fail(matcher->path_pos >= 0); matcher->path_pos = matcher->path_pos - num; - if(matcher->path_pos < 0) { + if(matcher->path_pos < 0) matcher->path_pos = 0; - } matcher->path[matcher->path_pos] = '\0'; } @@ -1030,7 +1013,7 @@ static int edit_dist(const char* utf8word1, const char* utf8word2) gunichar * word2 = g_utf8_to_ucs4_fast(utf8word2, -1, &len2); int * table = g_new0(int, (len1+1)*(len2+1)); - + /* Initialise outer rows of table */ for (glong i = 0; i < len1 + 1; i++) table[i*(len2+1)] = i; @@ -1042,9 +1025,8 @@ static int edit_dist(const char* utf8word1, const char* utf8word2) for (glong j = 1; j < len2+1; j++) { int cost = word1[i-1] != word2[j-1]; int v1 = table[(i-1)*(len2+1)+j] + 1; - if (i > 1 && j > 1 && word1[i-1] == word2[j-2] && word1[i-2] == word2[j-1]) { + if (i > 1 && j > 1 && word1[i-1] == word2[j-2] && word1[i-2] == word2[j-1]) v1 = MIN (v1, table[(i-2)*(len2+1)+(j-2)] + cost); - } int v2 = table[i*(len2+1)+(j-1)] + 1; int v3 = table[(i-1)*(len2+1)+(j-1)] + cost; |