diff options
author | Eric Albright <eric_albright@sil.org> | 2007-11-28 08:59:06 +0000 |
---|---|---|
committer | Eric Albright <eric_albright@sil.org> | 2007-11-28 08:59:06 +0000 |
commit | f99bda15757fb4f86cf668fc16745a024feb6c72 (patch) | |
tree | 1c2fe608aafb69e4e365ac74c43835ca03e5bf20 | |
parent | d47afd3ae1fa9d114442e37d62e73f7fb7d66512 (diff) | |
download | enchant-f99bda15757fb4f86cf668fc16745a024feb6c72.tar.gz |
treat lines that begin with # as comments in pwl
git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@22345 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6
-rw-r--r-- | src/pwl.c | 202 | ||||
-rw-r--r-- | unittests/pwl/enchant_pwl_tests.cpp | 28 |
2 files changed, 130 insertions, 100 deletions
@@ -279,12 +279,14 @@ EnchantPWL* enchant_pwl_init_with_file(const char * file) l = strlen(line)-1; if (line[l]=='\n') line[l] = '\0'; - - if(g_utf8_validate(line, -1, NULL)) - enchant_pwl_add_to_trie(pwl, line, strlen(line)); - else - g_warning ("Bad UTF-8 sequence in %s at line:%u\n", pwl->filename, line_number); - + + if( line[0] != '#') + { + if(g_utf8_validate(line, -1, NULL)) + enchant_pwl_add_to_trie(pwl, line, strlen(line)); + else + g_warning ("Bad UTF-8 sequence in %s at line:%u\n", pwl->filename, line_number); + } ++line_number; } @@ -312,7 +314,7 @@ static void enchant_pwl_add_to_trie(EnchantPWL *pwl, { char * normalized_word; - normalized_word = g_utf8_normalize (word, len, G_NORMALIZE_NFD); + normalized_word = g_utf8_normalize (word, len, G_NORMALIZE_NFD); if(NULL != g_hash_table_lookup (pwl->words_in_trie, normalized_word)) { g_free (normalized_word); return; @@ -324,9 +326,9 @@ static void enchant_pwl_add_to_trie(EnchantPWL *pwl, } static void enchant_pwl_remove_from_trie(EnchantPWL *pwl, - const char *const word, size_t len) + const char *const word, size_t len) { - char * normalized_word = g_utf8_normalize (word, len, G_NORMALIZE_NFD); + char * normalized_word = g_utf8_normalize (word, len, G_NORMALIZE_NFD); if( g_hash_table_remove (pwl->words_in_trie, normalized_word) ) { @@ -335,7 +337,7 @@ static void enchant_pwl_remove_from_trie(EnchantPWL *pwl, pwl->trie = NULL; /* make trie empty if has no content */ } - g_free(normalized_word); + g_free(normalized_word); } void enchant_pwl_add(EnchantPWL *pwl, @@ -360,75 +362,75 @@ void enchant_pwl_add(EnchantPWL *pwl, } void enchant_pwl_remove(EnchantPWL *pwl, - const char *const word, size_t len) + const char *const word, size_t len) { - if(enchant_pwl_check(pwl, word, len) == 1) - return; + if(enchant_pwl_check(pwl, word, len) == 1) + return; enchant_pwl_remove_from_trie(pwl, word, len); if (pwl->filename) { - char * contents; - size_t length; + char * contents; + size_t length; FILE *f; - if(!g_file_get_contents(pwl->filename, &contents, &length, NULL)) - return; + if(!g_file_get_contents(pwl->filename, &contents, &length, NULL)) + return; f = g_fopen(pwl->filename, "wb"); /*binary because g_file_get_contents reads binary*/ if (f) { - const gunichar BOM = 0xfeff; - char * filestart, *searchstart, *needle; - char * key; - - enchant_lock_file (f); - key = g_strndup(word, len); - - if(BOM == g_utf8_get_char(contents)) - { - filestart = g_utf8_next_char(contents); - fwrite (contents, sizeof(char), filestart-contents, f); - } - else - filestart = contents; - - searchstart = filestart; - for(;;) - { - /*find word*/ - needle = strstr(searchstart, key); - if(needle == NULL) - { - fwrite (searchstart, sizeof(char), length - (searchstart - contents), f); - break; - } - else - { - char* foundend = needle+len; - if((needle == filestart || contents[needle-contents-1] == '\n' || contents[needle-contents-1] == '\r') && - (foundend == contents + length || *foundend == '\n' || *foundend == '\r')) - { - fwrite (searchstart, sizeof(char), needle - searchstart, f); - searchstart = foundend; - while (*searchstart == '\n' || *searchstart == '\r') - ++searchstart; - } - else { - fwrite (searchstart, sizeof(char), needle - searchstart+1, f); - searchstart = needle+1; - } - } - } - g_free(key); + const gunichar BOM = 0xfeff; + char * filestart, *searchstart, *needle; + char * key; + + enchant_lock_file (f); + key = g_strndup(word, len); + + if(BOM == g_utf8_get_char(contents)) + { + filestart = g_utf8_next_char(contents); + fwrite (contents, sizeof(char), filestart-contents, f); + } + else + filestart = contents; + + searchstart = filestart; + for(;;) + { + /*find word*/ + needle = strstr(searchstart, key); + if(needle == NULL) + { + fwrite (searchstart, sizeof(char), length - (searchstart - contents), f); + break; + } + else + { + char* foundend = needle+len; + if((needle == filestart || contents[needle-contents-1] == '\n' || contents[needle-contents-1] == '\r') && + (foundend == contents + length || *foundend == '\n' || *foundend == '\r')) + { + fwrite (searchstart, sizeof(char), needle - searchstart, f); + searchstart = foundend; + while (*searchstart == '\n' || *searchstart == '\r') + ++searchstart; + } + else { + fwrite (searchstart, sizeof(char), needle - searchstart+1, f); + searchstart = needle+1; + } + } + } + g_free(key); enchant_unlock_file (f); fclose (f); } - g_free(contents); + g_free(contents); } } @@ -562,8 +564,8 @@ static void enchant_pwl_check_cb(char* match,EnchantTrieMatcher* matcher) } void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl, - const char *const word, size_t len, - EnchantSuggList* suggs_list) + const char *const word, size_t len, + EnchantSuggList* suggs_list) { size_t i; gchar* (*utf8_case_convert_function)(const gchar*str, gssize len); @@ -581,7 +583,7 @@ void enchant_pwl_case_and_denormalize_suggestions(EnchantPWL *pwl, gchar* suggestion; size_t suggestion_len; - suggestion = g_hash_table_lookup (pwl->words_in_trie, suggs_list->suggs[i]); + suggestion = g_hash_table_lookup (pwl->words_in_trie, suggs_list->suggs[i]); suggestion_len = strlen(suggestion); if(utf8_case_convert_function && @@ -789,46 +791,46 @@ static void enchant_trie_remove(EnchantTrie* trie,const char *const word) /* Store multiple words in subtries */ if (word[0] == '\0') { /* Mark end-of-string with special node */ - g_hash_table_remove(trie->subtries, ""); + g_hash_table_remove(trie->subtries, ""); } else { nxtCh = (ssize_t)(g_utf8_next_char(word)-word); tmpWord = g_strndup(word,nxtCh); subtrie = g_hash_table_lookup(trie->subtries, tmpWord); - enchant_trie_remove(subtrie, + enchant_trie_remove(subtrie, (word+nxtCh)); - if(subtrie->subtries == NULL && subtrie->value == NULL) - g_hash_table_remove(trie->subtries, tmpWord); + if(subtrie->subtries == NULL && subtrie->value == NULL) + g_hash_table_remove(trie->subtries, tmpWord); - g_free(tmpWord); + g_free(tmpWord); } - if(g_hash_table_size(trie->subtries) == 1) - { - char* key; - GList* keys = g_hash_table_get_keys(trie->subtries); - key = (char*) keys->data; - subtrie = g_hash_table_lookup(trie->subtries, key); - - /* only remove trie nodes that have values by propogating these up */ - if(subtrie->value) - { - trie->value = g_strconcat(key, subtrie->value, NULL); - enchant_trie_free(subtrie); - g_hash_table_destroy(trie->subtries); - trie->subtries = NULL; - } - - g_list_free(keys); - } + if(g_hash_table_size(trie->subtries) == 1) + { + char* key; + GList* keys = g_hash_table_get_keys(trie->subtries); + key = (char*) keys->data; + subtrie = g_hash_table_lookup(trie->subtries, key); + + /* only remove trie nodes that have values by propogating these up */ + if(subtrie->value) + { + trie->value = g_strconcat(key, subtrie->value, NULL); + enchant_trie_free(subtrie); + g_hash_table_destroy(trie->subtries); + trie->subtries = NULL; + } + + g_list_free(keys); + } } } else { - if(strcmp(trie->value, word) == 0) - { - g_free(trie->value); - trie->value = NULL; - } + if(strcmp(trie->value, word) == 0) + { + g_free(trie->value); + trie->value = NULL; + } } } @@ -966,15 +968,15 @@ static EnchantTrieMatcher* enchant_trie_matcher_init(const char* const word, void* cbdata) { EnchantTrieMatcher* matcher; - char * normalized_word, * pattern; + char * normalized_word, * pattern; - normalized_word = g_utf8_normalize (word, len, G_NORMALIZE_NFD); + normalized_word = g_utf8_normalize (word, len, G_NORMALIZE_NFD); - if(mode == case_insensitive) - { - pattern = g_utf8_strdown (normalized_word, -1); - g_free(normalized_word); - } + if(mode == case_insensitive) + { + pattern = g_utf8_strdown (normalized_word, -1); + g_free(normalized_word); + } else pattern = normalized_word; @@ -996,7 +998,7 @@ static EnchantTrieMatcher* enchant_trie_matcher_init(const char* const word, static void enchant_trie_matcher_free(EnchantTrieMatcher* matcher) { - g_free(matcher->word); + g_free(matcher->word); g_free(matcher->path); g_free(matcher); } diff --git a/unittests/pwl/enchant_pwl_tests.cpp b/unittests/pwl/enchant_pwl_tests.cpp index 101466a..bf5f55a 100644 --- a/unittests/pwl/enchant_pwl_tests.cpp +++ b/unittests/pwl/enchant_pwl_tests.cpp @@ -92,6 +92,34 @@ TEST_FIXTURE(EnchantPwl_TestFixture, }
/////////////////////////////////////////////////////////////////////////////////////////////////
+// Commented Lines ignored
+TEST_FIXTURE(EnchantPwl_TestFixture,
+ IsWordInDictionary_DictionaryHasCommentedLines_DoesNotReadCommentedLines)
+{
+ std::vector<const std::string> sWords;
+ sWords.push_back("cat");
+ sWords.push_back("hat");
+ sWords.push_back("that");
+ sWords.push_back("bat");
+ sWords.push_back("tot");
+
+ std::vector<const std::string>::const_iterator comment = sWords.insert(sWords.begin()+2, "#sat"); //comment
+ ExternalAddWordsToDictionary(sWords);
+ ReloadTestDictionary();
+
+ for(std::vector<const std::string>::const_iterator itWord = sWords.begin(); itWord != comment; ++itWord){
+ CHECK( IsWordInDictionary(*itWord) );
+ }
+
+ CHECK(!IsWordInDictionary(*comment) );
+ CHECK(!IsWordInDictionary("sat") );
+
+ for(std::vector<const std::string>::const_iterator itWord = comment+1; itWord != sWords.end(); ++itWord){
+ CHECK(IsWordInDictionary(*itWord) );
+ }
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
// Unicode normalization
TEST_FIXTURE(EnchantPwl_TestFixture,
IsWordInDictionary_DictionaryHasComposed_SuccessfulCheckWithComposedAndDecomposed)
|