summaryrefslogtreecommitdiff
path: root/navit
diff options
context:
space:
mode:
authormdankov <mdankov@ffa7fe5e-494d-0410-b361-a75ebd5db220>2013-05-03 21:05:37 +0000
committermdankov <mdankov@ffa7fe5e-494d-0410-b361-a75ebd5db220>2013-05-03 21:05:37 +0000
commitcc2307942ad281717ed09f61e79a36271d6388e1 (patch)
tree49e6e2720be795d7949f77b4887523fe91de2aca /navit
parent2427013ccb10cccac6fdfef6e7b230d709cd694d (diff)
downloadnavit-cc2307942ad281717ed09f61e79a36271d6388e1.tar.gz
Add:core:New function for case- and accent- insensitive string comparison.
git-svn-id: http://svn.code.sf.net/p/navit/code/trunk/navit@5469 ffa7fe5e-494d-0410-b361-a75ebd5db220
Diffstat (limited to 'navit')
-rw-r--r--navit/linguistics.c58
-rw-r--r--navit/linguistics.h13
2 files changed, 60 insertions, 11 deletions
diff --git a/navit/linguistics.c b/navit/linguistics.c
index 997d6c150..e60497e29 100644
--- a/navit/linguistics.c
+++ b/navit/linguistics.c
@@ -4,6 +4,8 @@
#include "debug.h"
#include "linguistics.h"
+/* To have linguistics_casefold(linguistics_expand_special(s,i)) equal to linguistics_expand_special(linguistics_casefold(s),i),
+ * please always specify here lower case expansions for special letters not having case variants (like german ß).*/
static const char *special[][3]={
/* Capital Diacritics */
/* ¨ Diaresis */
@@ -284,10 +286,10 @@ static GHashTable *casefold_hash, *special_hash;
* @return String prepared for case insensitive search. Result shoud be g_free()d after use.
*/
char*
-linguistics_casefold(char *in)
+linguistics_casefold(const char *in)
{
int len=strlen(in);
- char *src=in;
+ const char *src=in;
char *ret=g_new(char,len+1);
char *dest=ret;
char buf[10];
@@ -320,7 +322,7 @@ linguistics_casefold(char *in)
}
static char**
-linguistics_get_special(char *str, char *end)
+linguistics_get_special(const char *str, const char *end)
{
char *buf;
int len;
@@ -332,6 +334,48 @@ linguistics_get_special(char *str, char *end)
return g_hash_table_lookup(special_hash,buf);
}
+/**
+ * @brief Compare two strings, trying to replace special characters (e.g. umlauts) in first string with plain letters.
+ *
+ * @param s1 First string to process, for example, an item name from the map. Will be linguistics_casefold()ed before comparison.
+ * @param s2 Second string to process, usually user supplied search string. Should be linguistics_casefold()ed before calling this function.
+ * @param mode set to composition of linguistics_cmp_mode flags to have s1 linguistics_expand_special()ed, allow matches shorter than whole s1, or
+ * @param let matches start from any word boundary within s1
+ * @returns 0 when strings are equal
+ */
+int linguistics_compare(const char *s1, const char *s2, enum linguistics_cmp_mode mode)
+{
+ int ret=0;
+ int i;
+ int s2len=strlen(s2);
+ char *s1f;
+ /* Calling linguistics_casefold() before linguistics_expand_special() requires that result is independent of calling order. This seems
+ to be true at the time of writing this comment. */
+ s1f=linguistics_casefold(s1);
+ for(i=0; i<3; i++) {
+ char *s, *word;
+ if(i>0)
+ s=linguistics_expand_special(s1f,i);
+ else
+ s=s1f;
+ word=s;
+ while(word) {
+ if(mode & linguistics_cmp_partial)
+ ret=strncmp(word,s2,s2len);
+ else
+ ret=strcmp(word,s2);
+ if(!ret || !(mode & linguistics_cmp_words))
+ break;
+ word=linguistics_next_word(word);
+ }
+ if(i>0)
+ g_free(s);
+ if(!ret || !(mode & linguistics_cmp_expand))
+ break;
+ }
+ g_free(s1f);
+ return ret;
+}
/**
* @brief Replace special characters in string (e.g. umlauts) with plain letters.
@@ -344,9 +388,9 @@ linguistics_get_special(char *str, char *end)
* @returns copy of string, with characters replaced
*/
char *
-linguistics_expand_special(char *str, int mode)
+linguistics_expand_special(const char *str, int mode)
{
- char *in=str;
+ const char *in=str;
char *out,*ret;
int found=0;
int ret_len=strlen(str);
@@ -356,7 +400,7 @@ linguistics_expand_special(char *str, int mode)
return ret;
while (*in) {
char *next=g_utf8_find_next_char(in, NULL);
- int i,len;
+ int len;
int match=0;
if(next)
@@ -412,7 +456,7 @@ linguistics_next_word(char *str)
}
int
-linguistics_search(char *str)
+linguistics_search(const char *str)
{
if (!g_strcasecmp(str,"str"))
return 0;
diff --git a/navit/linguistics.h b/navit/linguistics.h
index 8c15cdeac..91fcc393e 100644
--- a/navit/linguistics.h
+++ b/navit/linguistics.h
@@ -1,15 +1,20 @@
#ifdef __cplusplus
extern "C" {
#endif
-char *linguistics_expand_special(char *str, int mode);
+char *linguistics_expand_special(const char *str, int mode);
char *linguistics_next_word(char *str);
void linguistics_init(void);
void linguistics_free(void);
-char *linguistics_casefold(char *in);
+char *linguistics_casefold(const char *in);
+int linguistics_search(const char *str);
+enum linguistics_cmp_mode {
+ linguistics_cmp_expand=1,
+ linguistics_cmp_partial=2,
+ linguistics_cmp_words=4
+};
+int linguistics_compare(const char *s1, const char *s2, enum linguistics_cmp_mode mode);
#ifdef __cplusplus
}
#endif
-/* Prototypes */
-int linguistics_search(char *str);