diff options
author | unknown <monty@mysql.com> | 2004-03-25 15:05:42 +0200 |
---|---|---|
committer | unknown <monty@mysql.com> | 2004-03-25 15:05:42 +0200 |
commit | fe596dee5869ac1f99a9d88061bc9dff402849f6 (patch) | |
tree | b090f762169aabf4fa3602b52d4463cd9c66106b /strings | |
parent | 23e480a80c64ca9b390a6fa82f68d9f8bbb1fa67 (diff) | |
parent | 887d6f144d85b9a869e4f8030c41816bbd32771b (diff) | |
download | mariadb-git-fe596dee5869ac1f99a9d88061bc9dff402849f6.tar.gz |
Merge bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/home/my/mysql-4.1
myisam/mi_check.c:
Auto merged
sql/field.cc:
Auto merged
strings/ctype-sjis.c:
Auto merged
strings/ctype-ucs2.c:
Auto merged
Diffstat (limited to 'strings')
-rw-r--r-- | strings/ctype-big5.c | 90 | ||||
-rw-r--r-- | strings/ctype-czech.c | 297 | ||||
-rw-r--r-- | strings/ctype-gbk.c | 88 | ||||
-rw-r--r-- | strings/ctype-latin1.c | 128 | ||||
-rw-r--r-- | strings/ctype-mb.c | 17 | ||||
-rw-r--r-- | strings/ctype-simple.c | 90 | ||||
-rw-r--r-- | strings/ctype-sjis.c | 116 | ||||
-rw-r--r-- | strings/ctype-tis620.c | 16 | ||||
-rw-r--r-- | strings/ctype-ucs2.c | 48 | ||||
-rw-r--r-- | strings/ctype-utf8.c | 8 | ||||
-rw-r--r-- | strings/ctype-win1250ch.c | 4 |
11 files changed, 557 insertions, 345 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index ee55cfda6c1..2bde29ecc47 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -218,40 +218,80 @@ static uint16 big5strokexfrm(uint16 i) return 0xA140; } -static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) + + +static int my_strnncoll_big5_internal(const uchar **a_res, + const uchar **b_res, uint length) { - uint len; + const char *a= *a_res, *b= *b_res; - len = min(len1,len2); - while (len--) + while (length--) { - if ((len > 0) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1))) + if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1))) { - if (*s1 != *s2 || *(s1+1) != *(s2+1)) - return ((int) big5code(*s1,*(s1+1)) - - (int) big5code(*s2,*(s2+1))); - s1 +=2; - s2 +=2; - len--; - } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++]) - return ((int) sort_order_big5[(uchar) s1[-1]] - - (int) sort_order_big5[(uchar) s2[-1]]); + if (*a != *b || *(a+1) != *(b+1)) + return ((int) big5code(*a,*(a+1)) - + (int) big5code(*b,*(b+1))); + a+= 2; + b+= 2; + length--; + } + else if (sort_order_big5[(uchar) *a++] != + sort_order_big5[(uchar) *b++]) + return ((int) sort_order_big5[(uchar) a[-1]] - + (int) sort_order_big5[(uchar) b[-1]]); } - return (int) (len1-len2); + *a_res= a; + *b_res= b; + return 0; } -static -int my_strnncollsp_big5(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + +/* Compare strings */ + +static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_big5(cs,s,slen,t,tlen); + uint length= min(a_length, b_length); + int res= my_strnncoll_big5_internal(&a, &b, length); + return res ? res : (int) (a_length - b_length); } + +/* compare strings, ignore end space */ + +static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + uint length= min(a_length, b_length); + int res= my_strnncoll_big5_internal(&a, &b, length); + if (!res && a_length != b_length) + { + const uchar *end; + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; +} + + static int my_strnxfrm_big5(CHARSET_INFO *cs __attribute__((unused)), uchar * dest, uint len, const uchar * src, uint srclen) @@ -377,7 +417,7 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)), *min_length= (uint) (min_str-min_org); *max_length= res_length; do { - *min_str++ = ' '; /* Because if key compression */ + *min_str++ = 0; *max_str++ = max_sort_char; } while (min_str != min_end); return 0; diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c index ed8c0b5b415..5094a7c45da 100644 --- a/strings/ctype-czech.c +++ b/strings/ctype-czech.c @@ -165,169 +165,144 @@ static struct wordvalue doubles[] = { Na konci připojíme znak 0 */ -#define ADD_TO_RESULT(dest, len, totlen, value) \ - if ((totlen) < (len)) { dest[totlen] = value; } (totlen++); - -#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \ - while (1) /* we will make a loop */ \ - { \ - if (IS_END(p, src, len)) \ - /* when we are at the end of string */ \ - { /* return either 0 for end of string */ \ - /* or 1 for end of pass */ \ - if (pass == 3) { value = 0; break; } \ - if (pass == 0) p = store; \ - else p = src; \ - value = 1; pass++; break; \ - } \ - /* not at end of string */ \ - value = CZ_SORT_TABLE[pass][*p]; \ - \ - if (value == 0) { p++; continue; } /* ignore value */ \ - if (value == 2) /* space */ \ - { \ - const uchar * tmp; \ - const uchar * runner = ++p; \ - while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \ - runner++; /* skip all spaces */ \ - if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \ - p = runner; \ - if ((pass <= 2) && !(IS_END(runner, src, len))) \ - p = runner; \ - if (IS_END(p, src, len)) \ - continue; \ - /* we switch passes */ \ - if (pass > 1) \ - break; \ - tmp = p; \ - if (pass == 0) pass = 1; \ - else pass = 0; \ - p = store; store = tmp; \ - break; \ - } \ - if (value == 255) \ - { \ - int i; \ - for (i = 0; i < (int) sizeof(doubles); i++) \ - { \ - const char * pattern = doubles[i].word; \ - const char * q = (const char *) p; \ - int j = 0; \ - while (pattern[j]) \ - { \ - if (IS_END(q, src, len) || (*q != pattern[j])) \ - { break ; } \ - j++; q++; \ - } \ - if (!(pattern[j])) \ - { \ - value = (int)(doubles[i].outvalue[pass]); \ - p = (const uchar *) q - 1; \ - break; \ - } \ - } \ - } \ - p++; \ - break; \ - } - -#define IS_END(p, src, len) (!(*p)) - -#if 0 -/* Function strcoll, with Czech sorting, for zero terminated strings */ -static int my_strcoll_czech(const uchar * s1, const uchar * s2) - { - int v1, v2; - const uchar * p1, * p2, * store1, * store2; - int pass1 = 0, pass2 = 0; - int diff; - - p1 = s1; p2 = s2; - store1 = s1; store2 = s2; - - do - { - NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, 0); - NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, 0); - diff = v1 - v2; - if (diff != 0) return diff; - } - while (v1); - return 0; - } -#endif +#define ADD_TO_RESULT(dest, len, totlen, value) \ +if ((totlen) < (len)) { dest[totlen] = value; } (totlen++); +#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len)) -#if 0 -/* Function strxfrm, with Czech sorting, for zero terminated strings */ -static int my_strxfrm_czech(uchar * dest, const uchar * src, int len) +#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \ +while (1) \ +{ \ + if (IS_END(p, src, len)) \ + { \ + /* when we are at the end of string */ \ + /* return either 0 for end of string */ \ + /* or 1 for end of pass */ \ + value= 0; \ + if (pass != 3) \ + { \ + p= (pass++ == 0) ? store : src; \ + value = 1; \ + } \ + break; \ + } \ + /* not at end of string */ \ + value = CZ_SORT_TABLE[pass][*p]; \ + if (value == 0) \ + { p++; continue; } /* ignore value */ \ + if (value == 2) /* space */ \ + { \ + const uchar * tmp; \ + const uchar * runner = ++p; \ + while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \ + runner++; /* skip all spaces */ \ + if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \ + p = runner; \ + if ((pass <= 2) && !(IS_END(runner, src, len))) \ + p = runner; \ + if (IS_END(p, src, len)) \ + continue; \ + /* we switch passes */ \ + if (pass > 1) \ + break; \ + tmp = p; \ + pass= 1-pass; \ + p = store; store = tmp; \ + break; \ + } \ + if (value == 255) \ + { \ + int i; \ + for (i = 0; i < (int) sizeof(doubles); i++) \ + { \ + const char * pattern = doubles[i].word; \ + const char * q = (const char *) p; \ + int j = 0; \ + while (pattern[j]) \ + { \ + if (IS_END(q, src, len) || (*q != pattern[j])) \ + break; \ + j++; q++; \ + } \ + if (!(pattern[j])) \ + { \ + value = (int)(doubles[i].outvalue[pass]); \ + p= (const uchar *) q - 1; \ + break; \ + } \ + } \ + } \ + p++; \ + break; \ +} + +/* + Function strnncoll, actually strcoll, with Czech sorting, which expect + the length of the strings being specified +*/ + +static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)), + const uchar * s1, uint len1, + const uchar * s2, uint len2) { - int value; - const uchar * p, * store; - int pass = 0; - int totlen = 0; - p = store = src; - - do - { - NEXT_CMP_VALUE(src, p, store, pass, value, 0); - ADD_TO_RESULT(dest, len, totlen, value); - } - while (value); - return totlen; - } -#endif + int v1, v2; + const uchar * p1, * p2, * store1, * store2; + int pass1 = 0, pass2 = 0; + p1 = s1; p2 = s2; + store1 = s1; store2 = s2; -#undef IS_END + do + { + int diff; + NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1); + NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2); + if ((diff = v1 - v2)) + return diff; + } + while (v1); + return 0; +} -#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len)) -/* Function strnncoll, actually strcoll, with Czech sorting, which expect - the length of the strings being specified */ -static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) - { - int v1, v2; - const uchar * p1, * p2, * store1, * store2; - int pass1 = 0, pass2 = 0; - int diff; - - p1 = s1; p2 = s2; - store1 = s1; store2 = s2; - - do - { - NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1); - NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2); - diff = v1 - v2; - - if (diff != 0) return diff; - } - while (v1); - return 0; - } - -/* Function strnxfrm, actually strxfrm, with Czech sorting, which expect - the length of the strings being specified */ + +/* + TODO: Fix this one to compare strings as they are done in ctype-simple1 +*/ + +static +int my_strnncollsp_czech(CHARSET_INFO * cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + for ( ; slen && s[slen-1] == ' ' ; slen--); + for ( ; tlen && t[tlen-1] == ' ' ; tlen--); + return my_strnncoll_czech(cs,s,slen,t,tlen); +} + + +/* + Function strnxfrm, actually strxfrm, with Czech sorting, which expect + the length of the strings being specified +*/ + static int my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)), - uchar * dest, uint len, - const uchar * src, uint srclen) - { - int value; - const uchar * p, * store; - int pass = 0; - int totlen = 0; - p = src; store = src; - - do - { - NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen); - ADD_TO_RESULT(dest, (int)len, totlen, value); - } - while (value); - return totlen; - } + uchar * dest, uint len, + const uchar * src, uint srclen) +{ + int value; + const uchar * p, * store; + int pass = 0; + int totlen = 0; + p = src; store = src; + + do + { + NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen); + ADD_TO_RESULT(dest, (int)len, totlen, value); + } + while (value); + return totlen; +} #undef IS_END @@ -595,16 +570,6 @@ static MY_UNI_IDX idx_uni_8859_2[]={ }; -static -int my_strnncollsp_czech(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) -{ - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_czech(cs,s,slen,t,tlen); -} - static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler = { my_strnncoll_czech, diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c index 98511406ba9..1990060e67b 100644 --- a/strings/ctype-gbk.c +++ b/strings/ctype-gbk.c @@ -2582,40 +2582,74 @@ static uint16 gbksortorder(uint16 i) } -int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)), - const uchar * s1, uint len1, - const uchar * s2, uint len2) +int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res, + uint length) { - uint len,c1,c2; + const char *a= *a_res, *b= *b_res; + uint a_char,b_char; - len = min(len1,len2); - while (len--) + while (length--) { - if ((len > 0) && isgbkcode(*s1,*(s1+1)) && isgbkcode(*s2, *(s2+1))) + if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1))) { - c1=gbkcode(*s1,*(s1+1)); - c2=gbkcode(*s2,*(s2+1)); - if (c1!=c2) - return ((int) gbksortorder((uint16) c1) - - (int) gbksortorder((uint16) c2)); - s1+=2; - s2+=2; - --len; - } else if (sort_order_gbk[(uchar) *s1++] != sort_order_gbk[(uchar) *s2++]) - return ((int) sort_order_gbk[(uchar) s1[-1]] - - (int) sort_order_gbk[(uchar) s2[-1]]); + a_char= gbkcode(*a,*(a+1)); + b_char= gbkcode(*b,*(b+1)); + if (a_char != b_char) + return ((int) gbksortorder((uint16) a_char) - + (int) gbksortorder((uint16) b_char)); + a+= 2; + b+= 2; + length--; + } + else if (sort_order_gbk[(uchar) *a++] != sort_order_gbk[(uchar) *b++]) + return ((int) sort_order_gbk[(uchar) a[-1]] - + (int) sort_order_gbk[(uchar) b[-1]]); } - return (int) (len1-len2); + *a_res= a; + *b_res= b; + return 0; } -static -int my_strnncollsp_gbk(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + + +int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_gbk(cs,s,slen,t,tlen); + uint length= min(a_length, b_length); + int res= my_strnncoll_gbk_internal(&a, &b, length); + return res ? res : (int) (a_length - b_length); +} + + +static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + uint length= min(a_length, b_length); + int res= my_strnncoll_gbk_internal(&a, &b, length); + if (!res && a_length != b_length) + { + const uchar *end; + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in a */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; } @@ -2696,7 +2730,7 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)), *min_length= (uint) (min_str - min_org); *max_length= res_length; do { - *min_str++ = '\0'; /* Because if key compression */ + *min_str++= 0; *max_str++ = max_sort_char; } while (min_str != min_end); return 0; diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c index 0682b15d135..7a010c3bef8 100644 --- a/strings/ctype-latin1.c +++ b/strings/ctype-latin1.c @@ -319,51 +319,105 @@ uchar combo2map[]={ static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s1, uint len1, - const uchar *s2, uint len2) + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - const uchar *e1 = s1 + len1; - const uchar *e2 = s2 + len2; - uchar c1, c12=0, c2, c22=0; + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + uchar a_char, a_extend= 0, b_char, b_extend= 0; - while ((s1 < e1 || c12) && (s2 < e2 || c22)) + while ((a < a_end || a_extend) && (b < b_end || b_extend)) { - if (c12) + if (a_extend) { - c1=c12; c12=0; + a_char=a_extend; a_extend=0; } else { - c12=combo2map[*s1]; - c1=combo1map[*s1++]; + a_extend=combo2map[*a]; + a_char=combo1map[*a++]; } - if (c22) + if (b_extend) { - c2=c22; c22=0; + b_char=b_extend; b_extend=0; } else { - c22=combo2map[*s2]; - c2=combo1map[*s2++]; + b_extend=combo2map[*b]; + b_char=combo1map[*b++]; } - if (c1 != c2) return (int)c1 - (int)c2; + if (a_char != b_char) + return (int) a_char - (int) b_char; } - /* A simple test of string lengths won't work -- we test to see which string ran out first */ - return (s1 < e1 || c12) ? 1 : (s2 < e2 || c22) ? -1 : 0; + return ((a < a_end || a_extend) ? 1 : + (b < b_end || b_extend) ? -1 : 0); } -static int my_strnncollsp_latin1_de(CHARSET_INFO *cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) +static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_latin1_de(cs,s,slen,t,tlen); + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + uchar a_char, a_extend= 0, b_char, b_extend= 0; + + while ((a < a_end || a_extend) && (b < b_end || b_extend)) + { + if (a_extend) + { + a_char=a_extend; + a_extend= 0; + } + else + { + a_extend= combo2map[*a]; + a_char= combo1map[*a++]; + } + if (b_extend) + { + b_char= b_extend; + b_extend= 0; + } + else + { + b_extend= combo2map[*b]; + b_char= combo1map[*b++]; + } + if (a_char != b_char) + return (int) a_char - (int) b_char; + } + /* Check if double character last */ + if (a_extend) + return 1; + if (b_extend) + return -1; + + if (a != a_end || b != b_end) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a == a_end) + { + /* put shorter key in a */ + a_end= b_end; + a= b; + swap= -1; /* swap sign of result */ + } + for ( ; a < a_end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } @@ -385,6 +439,32 @@ static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)), } +void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, uint len, + ulong *nr1, ulong *nr2) +{ + const uchar *end= key+len; + /* + Remove end space. We have to do this to be able to compare + 'AE' and 'Ä' as identical + */ + while (end > key && end[-1] == ' ') + end--; + + for (; key < end ; key++) + { + uint X= (uint) combo1map[(uint) *key]; + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); + nr2[0]+=3; + if ((X= combo2map[*key])) + { + nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); + nr2[0]+=3; + } + } +} + + static MY_COLLATION_HANDLER my_collation_german2_ci_handler= { my_strnncoll_latin1_de, @@ -394,7 +474,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler= my_wildcmp_8bit, my_strcasecmp_8bit, my_instr_simple, - my_hash_sort_simple + my_hash_sort_latin1_de }; diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c index 2f7cf698664..ed772a68845 100644 --- a/strings/ctype-mb.c +++ b/strings/ctype-mb.c @@ -347,6 +347,7 @@ uint my_instr_mb(CHARSET_INFO *cs, return 0; } + /* BINARY collations handlers for MB charsets */ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), @@ -357,20 +358,6 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)), return cmp ? cmp : (int) (slen - tlen); } -static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), - const uchar *s, uint slen, - const uchar *t, uint tlen) -{ - int len, cmp; - - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - - len = ( slen > tlen ) ? tlen : slen; - - cmp= memcmp(s,t,len); - return cmp ? cmp : (int) (slen - tlen); -} static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)), uchar * dest, uint len, @@ -526,7 +513,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs, MY_COLLATION_HANDLER my_collation_mb_bin_handler = { my_strnncoll_mb_bin, - my_strnncollsp_mb_bin, + my_strnncoll_mb_bin, my_strnxfrm_mb_bin, my_like_range_simple, my_wildcmp_mb_bin, diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index 0aae60a0b56..c8eb3c07a3f 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -60,25 +60,69 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen, } -int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *s, uint slen, - const uchar *t, uint tlen) +/* + Compare strings, discarding end space + + SYNOPSIS + my_strnncollsp_simple() + cs character set handler + a First string to compare + a_length Length of 'a' + b Second string to compare + b_length Length of 'b' + + IMPLEMENTATION + If one string is shorter as the other, then we space extend the other + so that the strings have equal length. + + This will ensure that the following things hold: + + "a" == "a " + "a\0" < "a" + "a\0" < "a " + + RETURN + < 0 a < b + = 0 a == b + > 0 a > b +*/ + +int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, + const uchar *b, uint b_length) { - uchar *map= cs->sort_order; - int len; - - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - - len = ( slen > tlen ) ? tlen : slen; - - while (len--) + const uchar *map= cs->sort_order, *end; + uint length; + + end= a + (length= min(a_length, b_length)); + while (a < end) { - if (map[*s++] != map[*t++]) - return ((int) map[s[-1]] - (int) map[t[-1]]); + if (map[*a++] != map[*b++]) + return ((int) map[a[-1]] - (int) map[b[-1]]); } - return (int) (slen-tlen); + if (a_length != b_length) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a_length < b_length) + { + /* put shorter key in s */ + a_length= b_length; + a= b; + swap= -1; /* swap sign of result */ + } + for (end= a + a_length-length; a < end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return 0; } + void my_caseup_str_8bit(CHARSET_INFO * cs,char *str) { register uchar *map=cs->to_upper; @@ -169,8 +213,8 @@ int my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)), void my_hash_sort_simple(CHARSET_INFO *cs, - const uchar *key, uint len, - ulong *nr1, ulong *nr2) + const uchar *key, uint len, + ulong *nr1, ulong *nr2) { register uchar *sort_order=cs->sort_order; const uchar *pos = key; @@ -953,9 +997,10 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, { *min_length= (uint) (min_str - min_org); *max_length=res_length; - do { - *min_str++ = ' '; /* Because if key compression */ - *max_str++ = (char) cs->max_sort_char; + do + { + *min_str++= 0; + *max_str++= (char) cs->max_sort_char; } while (min_str != min_end); return 0; } @@ -963,13 +1008,6 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, } *min_length= *max_length = (uint) (min_str - min_org); - /* Temporary fix for handling w_one at end of string (key compression) */ - { - char *tmp; - for (tmp= min_str ; tmp > min_org && tmp[-1] == '\0';) - *--tmp=' '; - } - while (min_str != min_end) *min_str++ = *max_str++ = ' '; /* Because if key compression */ return 0; diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 8ba650d1486..68cd77f96fc 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -184,7 +184,7 @@ static uchar NEAR sort_order_sjis[]= static int ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)), - const char* p, const char *e) + const char* p, const char *e) { return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0); } @@ -197,59 +197,101 @@ static int mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c) #define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d)) -static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)), - const uchar *s1, uint len1, - const uchar *s2, uint len2) + +static int my_strnncoll_sjis_internal(CHARSET_INFO *cs, + const uchar **a_res, uint a_length, + const uchar **b_res, uint b_length) { - const uchar *e1 = s1 + len1; - const uchar *e2 = s2 + len2; - while (s1 < e1 && s2 < e2) { - if (ismbchar_sjis(cs,(char*) s1, (char*) e1) && - ismbchar_sjis(cs,(char*) s2, (char*) e2)) { - uint c1 = sjiscode(*s1, *(s1+1)); - uint c2 = sjiscode(*s2, *(s2+1)); - if (c1 != c2) - return c1 - c2; - s1 += 2; - s2 += 2; - } else { - if (sort_order_sjis[(uchar)*s1] != sort_order_sjis[(uchar)*s2]) - return sort_order_sjis[(uchar)*s1] - sort_order_sjis[(uchar)*s2]; - s1++; - s2++; + const uchar *a= *a_res, *b= *b_res; + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + while (a < a_end && b < b_end) + { + if (ismbchar_sjis(cs,(char*) a, (char*) a_end) && + ismbchar_sjis(cs,(char*) b, (char*) b_end)) + { + uint a_char= sjiscode(*a, *(a+1)); + uint b_char= sjiscode(*b, *(b+1)); + if (a_char != b_char) + return a_char - b_char; + a += 2; + b += 2; + } else + { + if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b]) + return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b]; + a++; + b++; } } - return len1 - len2; + *a_res= a; + *b_res= b; + return 0; +} + + +static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) +{ + int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); + return res ? res : (int) (a_length - b_length); } -static -int my_strnncollsp_sjis(CHARSET_INFO * cs, - const uchar *s, uint slen, - const uchar *t, uint tlen) + +static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)), + const uchar *a, uint a_length, + const uchar *b, uint b_length) { - for ( ; slen && s[slen-1] == ' ' ; slen--); - for ( ; tlen && t[tlen-1] == ' ' ; tlen--); - return my_strnncoll_sjis(cs,s,slen,t,tlen); + const uchar *a_end= a + a_length; + const uchar *b_end= b + b_length; + int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length); + if (!res && (a != a_end || b != b_end)) + { + int swap= 0; + /* + Check the next not space character of the longer key. If it's < ' ', + then it's smaller than the other key. + */ + if (a == a_end) + { + /* put shorter key in a */ + a_end= b_end; + a= b; + swap= -1; /* swap sign of result */ + } + for (; a < a_end ; a++) + { + if (*a != ' ') + return ((int) *a - (int) ' ') ^ swap; + } + } + return res; } + + static int my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)), uchar *dest, uint len, const uchar *src, uint srclen) { uchar *d_end = dest + len; uchar *s_end = (uchar*) src + srclen; - while (dest < d_end && src < s_end) { - if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) { + while (dest < d_end && src < s_end) + { + if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) + { *dest++ = *src++; if (dest < d_end && src < s_end) *dest++ = *src++; - } else { - *dest++ = sort_order_sjis[(uchar)*src++]; } + else + *dest++ = sort_order_sjis[(uchar)*src++]; } return srclen; } + /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: @@ -300,12 +342,14 @@ static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)), ptr++; continue; } - if (*ptr == w_many) { /* '%' in SQL */ + if (*ptr == w_many) + { /* '%' in SQL */ *min_length = (uint)(min_str - min_org); *max_length = res_length; - do { - *min_str++ = ' '; /* Because if key compression */ - *max_str++ = max_sort_char; + do + { + *min_str++= 0; + *max_str++= max_sort_char; } while (min_str < min_end); return 0; } diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c index 59be820863a..954a3768536 100644 --- a/strings/ctype-tis620.c +++ b/strings/ctype-tis620.c @@ -518,6 +518,10 @@ static uint thai2sortable(uchar *tstr, uint len) strncoll() replacement, compare 2 string, both are converted to sortable string + NOTE: + We can't cut strings at end \0 as this would break comparision with + LIKE characters, where the min range is stored as end \0 + Arg: 2 Strings and it compare length Ret: strcmp result */ @@ -530,9 +534,6 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)), uchar *tc1, *tc2; int i; - /* Cut strings at end \0 */ - len1= (int) strnlen((char*) s1,len1); - len2= (int) strnlen((char*) s2,len2); tc1= buf; if ((len1 + len2 +2) > (int) sizeof(buf)) tc1= (uchar*) malloc(len1+len2); @@ -550,6 +551,10 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)), } +/* + TODO: Has to be fixed like strnncollsp in ctype-simple.c +*/ + static int my_strnncollsp_tis620(CHARSET_INFO * cs, const uchar *s, uint slen, @@ -637,8 +642,9 @@ my_bool my_like_range_tis620(CHARSET_INFO *cs __attribute__((unused)), { *min_length= (uint) (min_str - min_org); *max_length=res_length; - do { - *min_str++ = ' '; /* Because of key compression */ + do + { + *min_str++ = 0; *max_str++ = max_sort_chr; } while (min_str != min_end); return 0; diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 26d7568e6a2..99d97a9614b 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -90,8 +90,8 @@ static uchar to_upper_ucs2[] = { }; -static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), - my_wc_t * pwc, const uchar *s, const uchar *e) +static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) { if (s+2 > e) /* Need 2 characters */ return MY_CS_TOOFEW(0); @@ -100,8 +100,8 @@ static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)), return 2; } -static int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) , - my_wc_t wc, uchar *r, uchar *e) +static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t wc, uchar *r, uchar *e) { if ( r+2 > e ) return MY_CS_TOOSMALL; @@ -128,13 +128,15 @@ static void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen) } } -static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2) + +static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, + ulong *n1, ulong *n2) { my_wc_t wc; int res; const uchar *e=s+slen; - while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) { int plane = (wc>>8) & 0xFF; wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc; @@ -148,7 +150,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), - char * s __attribute__((unused))) + char * s __attribute__((unused))) { } @@ -173,13 +175,14 @@ static void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen) } static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)), - char * s __attribute__((unused))) + char * s __attribute__((unused))) { } static int my_strnncoll_ucs2(CHARSET_INFO *cs, - const uchar *s, uint slen, const uchar *t, uint tlen) + const uchar *s, uint slen, + const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -213,8 +216,9 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + static int my_strncasecmp_ucs2(CHARSET_INFO *cs, - const char *s, const char *t, uint len) + const char *s, const char *t, uint len) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -249,6 +253,7 @@ static int my_strncasecmp_ucs2(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) { uint s_len=strlen(s); @@ -257,6 +262,7 @@ static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t) return my_strncasecmp_ucs2(cs, s, t, len); } + static int my_strnxfrm_ucs2(CHARSET_INFO *cs, uchar *dst, uint dstlen, const uchar *src, uint srclen) { @@ -288,6 +294,7 @@ static int my_strnxfrm_ucs2(CHARSET_INFO *cs, return dst - dst_orig; } + static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), const char *b __attribute__((unused)), const char *e __attribute__((unused))) @@ -295,6 +302,7 @@ static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)), return 2; } + static int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) , uint c __attribute__((unused))) { @@ -380,8 +388,8 @@ static int my_vsnprintf_ucs2(char *dst, uint n, const char* fmt, va_list ap) return (uint) (dst - start); } -static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)) - ,char* to, uint n, const char* fmt, ...) +static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)), + char* to, uint n, const char* fmt, ...) { va_list args; va_start(args,fmt); @@ -389,9 +397,9 @@ static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)) } -long my_strntol_ucs2(CHARSET_INFO *cs, - const char *nptr, uint l, int base, - char **endptr, int *err) +long my_strntol_ucs2(CHARSET_INFO *cs, + const char *nptr, uint l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -504,9 +512,9 @@ bs: } -ulong my_strntoul_ucs2(CHARSET_INFO *cs, - const char *nptr, uint l, int base, - char **endptr, int *err) +ulong my_strntoul_ucs2(CHARSET_INFO *cs, + const char *nptr, uint l, int base, + char **endptr, int *err) { int negative=0; int overflow; @@ -1334,8 +1342,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs, *min_length= (uint) (min_str - min_org); *max_length=res_length; do { - *min_str++ = '\0'; - *min_str++ = ' '; /* Because if key compression */ + *min_str++ = 0; + *min_str++ = 0; *max_str++ = (char) cs->max_sort_char >>8; *max_str++ = (char) cs->max_sort_char & 255; } while (min_str + 1 < min_end); diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index dca73e5a79f..886ecfbd0c9 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1801,7 +1801,8 @@ static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s) static int my_strnncoll_utf8(CHARSET_INFO *cs, - const uchar *s, uint slen, const uchar *t, uint tlen) + const uchar *s, uint slen, + const uchar *t, uint tlen) { int s_res,t_res; my_wc_t s_wc,t_wc; @@ -1835,6 +1836,11 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs, return ( (se-s) - (te-t) ); } + +/* + TODO: Has to be fixed as strnncollsp in ctype-simple +*/ + static int my_strnncollsp_utf8(CHARSET_INFO * cs, const uchar *s, uint slen, diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c index bda349f1988..8fd4e612713 100644 --- a/strings/ctype-win1250ch.c +++ b/strings/ctype-win1250ch.c @@ -467,6 +467,10 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)), } +/* + TODO: Has to be fixed as strnncollsp in ctype-simple +*/ + static int my_strnncollsp_win1250ch(CHARSET_INFO * cs, const uchar *s, uint slen, |