summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <monty@mysql.com>2004-03-25 15:05:42 +0200
committerunknown <monty@mysql.com>2004-03-25 15:05:42 +0200
commitfe596dee5869ac1f99a9d88061bc9dff402849f6 (patch)
treeb090f762169aabf4fa3602b52d4463cd9c66106b /strings
parent23e480a80c64ca9b390a6fa82f68d9f8bbb1fa67 (diff)
parent887d6f144d85b9a869e4f8030c41816bbd32771b (diff)
downloadmariadb-git-fe596dee5869ac1f99a9d88061bc9dff402849f6.tar.gz
Merge bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/home/my/mysql-4.1 myisam/mi_check.c: Auto merged sql/field.cc: Auto merged strings/ctype-sjis.c: Auto merged strings/ctype-ucs2.c: Auto merged
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-big5.c90
-rw-r--r--strings/ctype-czech.c297
-rw-r--r--strings/ctype-gbk.c88
-rw-r--r--strings/ctype-latin1.c128
-rw-r--r--strings/ctype-mb.c17
-rw-r--r--strings/ctype-simple.c90
-rw-r--r--strings/ctype-sjis.c116
-rw-r--r--strings/ctype-tis620.c16
-rw-r--r--strings/ctype-ucs2.c48
-rw-r--r--strings/ctype-utf8.c8
-rw-r--r--strings/ctype-win1250ch.c4
11 files changed, 557 insertions, 345 deletions
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index ee55cfda6c1..2bde29ecc47 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -218,40 +218,80 @@ static uint16 big5strokexfrm(uint16 i)
return 0xA140;
}
-static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
- const uchar * s1, uint len1,
- const uchar * s2, uint len2)
+
+
+static int my_strnncoll_big5_internal(const uchar **a_res,
+ const uchar **b_res, uint length)
{
- uint len;
+ const char *a= *a_res, *b= *b_res;
- len = min(len1,len2);
- while (len--)
+ while (length--)
{
- if ((len > 0) && isbig5code(*s1,*(s1+1)) && isbig5code(*s2, *(s2+1)))
+ if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1)))
{
- if (*s1 != *s2 || *(s1+1) != *(s2+1))
- return ((int) big5code(*s1,*(s1+1)) -
- (int) big5code(*s2,*(s2+1)));
- s1 +=2;
- s2 +=2;
- len--;
- } else if (sort_order_big5[(uchar) *s1++] != sort_order_big5[(uchar) *s2++])
- return ((int) sort_order_big5[(uchar) s1[-1]] -
- (int) sort_order_big5[(uchar) s2[-1]]);
+ if (*a != *b || *(a+1) != *(b+1))
+ return ((int) big5code(*a,*(a+1)) -
+ (int) big5code(*b,*(b+1)));
+ a+= 2;
+ b+= 2;
+ length--;
+ }
+ else if (sort_order_big5[(uchar) *a++] !=
+ sort_order_big5[(uchar) *b++])
+ return ((int) sort_order_big5[(uchar) a[-1]] -
+ (int) sort_order_big5[(uchar) b[-1]]);
}
- return (int) (len1-len2);
+ *a_res= a;
+ *b_res= b;
+ return 0;
}
-static
-int my_strnncollsp_big5(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+
+/* Compare strings */
+
+static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_big5(cs,s,slen,t,tlen);
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_big5_internal(&a, &b, length);
+ return res ? res : (int) (a_length - b_length);
}
+
+/* compare strings, ignore end space */
+
+static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
+{
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_big5_internal(&a, &b, length);
+ if (!res && a_length != b_length)
+ {
+ const uchar *end;
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a_length < b_length)
+ {
+ /* put shorter key in a */
+ a_length= b_length;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (end= a + a_length-length; a < end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return res;
+}
+
+
static int my_strnxfrm_big5(CHARSET_INFO *cs __attribute__((unused)),
uchar * dest, uint len,
const uchar * src, uint srclen)
@@ -377,7 +417,7 @@ static my_bool my_like_range_big5(CHARSET_INFO *cs __attribute__((unused)),
*min_length= (uint) (min_str-min_org);
*max_length= res_length;
do {
- *min_str++ = ' '; /* Because if key compression */
+ *min_str++ = 0;
*max_str++ = max_sort_char;
} while (min_str != min_end);
return 0;
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index ed8c0b5b415..5094a7c45da 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -165,169 +165,144 @@ static struct wordvalue doubles[] = {
Na konci připojíme znak 0
*/
-#define ADD_TO_RESULT(dest, len, totlen, value) \
- if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
-
-#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \
- while (1) /* we will make a loop */ \
- { \
- if (IS_END(p, src, len)) \
- /* when we are at the end of string */ \
- { /* return either 0 for end of string */ \
- /* or 1 for end of pass */ \
- if (pass == 3) { value = 0; break; } \
- if (pass == 0) p = store; \
- else p = src; \
- value = 1; pass++; break; \
- } \
- /* not at end of string */ \
- value = CZ_SORT_TABLE[pass][*p]; \
- \
- if (value == 0) { p++; continue; } /* ignore value */ \
- if (value == 2) /* space */ \
- { \
- const uchar * tmp; \
- const uchar * runner = ++p; \
- while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \
- runner++; /* skip all spaces */ \
- if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \
- p = runner; \
- if ((pass <= 2) && !(IS_END(runner, src, len))) \
- p = runner; \
- if (IS_END(p, src, len)) \
- continue; \
- /* we switch passes */ \
- if (pass > 1) \
- break; \
- tmp = p; \
- if (pass == 0) pass = 1; \
- else pass = 0; \
- p = store; store = tmp; \
- break; \
- } \
- if (value == 255) \
- { \
- int i; \
- for (i = 0; i < (int) sizeof(doubles); i++) \
- { \
- const char * pattern = doubles[i].word; \
- const char * q = (const char *) p; \
- int j = 0; \
- while (pattern[j]) \
- { \
- if (IS_END(q, src, len) || (*q != pattern[j])) \
- { break ; } \
- j++; q++; \
- } \
- if (!(pattern[j])) \
- { \
- value = (int)(doubles[i].outvalue[pass]); \
- p = (const uchar *) q - 1; \
- break; \
- } \
- } \
- } \
- p++; \
- break; \
- }
-
-#define IS_END(p, src, len) (!(*p))
-
-#if 0
-/* Function strcoll, with Czech sorting, for zero terminated strings */
-static int my_strcoll_czech(const uchar * s1, const uchar * s2)
- {
- int v1, v2;
- const uchar * p1, * p2, * store1, * store2;
- int pass1 = 0, pass2 = 0;
- int diff;
-
- p1 = s1; p2 = s2;
- store1 = s1; store2 = s2;
-
- do
- {
- NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, 0);
- NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, 0);
- diff = v1 - v2;
- if (diff != 0) return diff;
- }
- while (v1);
- return 0;
- }
-#endif
+#define ADD_TO_RESULT(dest, len, totlen, value) \
+if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
+#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len))
-#if 0
-/* Function strxfrm, with Czech sorting, for zero terminated strings */
-static int my_strxfrm_czech(uchar * dest, const uchar * src, int len)
+#define NEXT_CMP_VALUE(src, p, store, pass, value, len) \
+while (1) \
+{ \
+ if (IS_END(p, src, len)) \
+ { \
+ /* when we are at the end of string */ \
+ /* return either 0 for end of string */ \
+ /* or 1 for end of pass */ \
+ value= 0; \
+ if (pass != 3) \
+ { \
+ p= (pass++ == 0) ? store : src; \
+ value = 1; \
+ } \
+ break; \
+ } \
+ /* not at end of string */ \
+ value = CZ_SORT_TABLE[pass][*p]; \
+ if (value == 0) \
+ { p++; continue; } /* ignore value */ \
+ if (value == 2) /* space */ \
+ { \
+ const uchar * tmp; \
+ const uchar * runner = ++p; \
+ while (!(IS_END(runner, src, len)) && (CZ_SORT_TABLE[pass][*runner] == 2)) \
+ runner++; /* skip all spaces */ \
+ if (IS_END(runner, src, len) && SKIP_TRAILING_SPACES) \
+ p = runner; \
+ if ((pass <= 2) && !(IS_END(runner, src, len))) \
+ p = runner; \
+ if (IS_END(p, src, len)) \
+ continue; \
+ /* we switch passes */ \
+ if (pass > 1) \
+ break; \
+ tmp = p; \
+ pass= 1-pass; \
+ p = store; store = tmp; \
+ break; \
+ } \
+ if (value == 255) \
+ { \
+ int i; \
+ for (i = 0; i < (int) sizeof(doubles); i++) \
+ { \
+ const char * pattern = doubles[i].word; \
+ const char * q = (const char *) p; \
+ int j = 0; \
+ while (pattern[j]) \
+ { \
+ if (IS_END(q, src, len) || (*q != pattern[j])) \
+ break; \
+ j++; q++; \
+ } \
+ if (!(pattern[j])) \
+ { \
+ value = (int)(doubles[i].outvalue[pass]); \
+ p= (const uchar *) q - 1; \
+ break; \
+ } \
+ } \
+ } \
+ p++; \
+ break; \
+}
+
+/*
+ Function strnncoll, actually strcoll, with Czech sorting, which expect
+ the length of the strings being specified
+*/
+
+static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar * s1, uint len1,
+ const uchar * s2, uint len2)
{
- int value;
- const uchar * p, * store;
- int pass = 0;
- int totlen = 0;
- p = store = src;
-
- do
- {
- NEXT_CMP_VALUE(src, p, store, pass, value, 0);
- ADD_TO_RESULT(dest, len, totlen, value);
- }
- while (value);
- return totlen;
- }
-#endif
+ int v1, v2;
+ const uchar * p1, * p2, * store1, * store2;
+ int pass1 = 0, pass2 = 0;
+ p1 = s1; p2 = s2;
+ store1 = s1; store2 = s2;
-#undef IS_END
+ do
+ {
+ int diff;
+ NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1);
+ NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2);
+ if ((diff = v1 - v2))
+ return diff;
+ }
+ while (v1);
+ return 0;
+}
-#define IS_END(p, src, len) (((char *)p - (char *)src) >= (len))
-/* Function strnncoll, actually strcoll, with Czech sorting, which expect
- the length of the strings being specified */
-static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
- const uchar * s1, uint len1,
- const uchar * s2, uint len2)
- {
- int v1, v2;
- const uchar * p1, * p2, * store1, * store2;
- int pass1 = 0, pass2 = 0;
- int diff;
-
- p1 = s1; p2 = s2;
- store1 = s1; store2 = s2;
-
- do
- {
- NEXT_CMP_VALUE(s1, p1, store1, pass1, v1, (int)len1);
- NEXT_CMP_VALUE(s2, p2, store2, pass2, v2, (int)len2);
- diff = v1 - v2;
-
- if (diff != 0) return diff;
- }
- while (v1);
- return 0;
- }
-
-/* Function strnxfrm, actually strxfrm, with Czech sorting, which expect
- the length of the strings being specified */
+
+/*
+ TODO: Fix this one to compare strings as they are done in ctype-simple1
+*/
+
+static
+int my_strnncollsp_czech(CHARSET_INFO * cs,
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
+{
+ for ( ; slen && s[slen-1] == ' ' ; slen--);
+ for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
+ return my_strnncoll_czech(cs,s,slen,t,tlen);
+}
+
+
+/*
+ Function strnxfrm, actually strxfrm, with Czech sorting, which expect
+ the length of the strings being specified
+*/
+
static int my_strnxfrm_czech(CHARSET_INFO *cs __attribute__((unused)),
- uchar * dest, uint len,
- const uchar * src, uint srclen)
- {
- int value;
- const uchar * p, * store;
- int pass = 0;
- int totlen = 0;
- p = src; store = src;
-
- do
- {
- NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen);
- ADD_TO_RESULT(dest, (int)len, totlen, value);
- }
- while (value);
- return totlen;
- }
+ uchar * dest, uint len,
+ const uchar * src, uint srclen)
+{
+ int value;
+ const uchar * p, * store;
+ int pass = 0;
+ int totlen = 0;
+ p = src; store = src;
+
+ do
+ {
+ NEXT_CMP_VALUE(src, p, store, pass, value, (int)srclen);
+ ADD_TO_RESULT(dest, (int)len, totlen, value);
+ }
+ while (value);
+ return totlen;
+}
#undef IS_END
@@ -595,16 +570,6 @@ static MY_UNI_IDX idx_uni_8859_2[]={
};
-static
-int my_strnncollsp_czech(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
-{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_czech(cs,s,slen,t,tlen);
-}
-
static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
{
my_strnncoll_czech,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 98511406ba9..1990060e67b 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -2582,40 +2582,74 @@ static uint16 gbksortorder(uint16 i)
}
-int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
- const uchar * s1, uint len1,
- const uchar * s2, uint len2)
+int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
+ uint length)
{
- uint len,c1,c2;
+ const char *a= *a_res, *b= *b_res;
+ uint a_char,b_char;
- len = min(len1,len2);
- while (len--)
+ while (length--)
{
- if ((len > 0) && isgbkcode(*s1,*(s1+1)) && isgbkcode(*s2, *(s2+1)))
+ if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
{
- c1=gbkcode(*s1,*(s1+1));
- c2=gbkcode(*s2,*(s2+1));
- if (c1!=c2)
- return ((int) gbksortorder((uint16) c1) -
- (int) gbksortorder((uint16) c2));
- s1+=2;
- s2+=2;
- --len;
- } else if (sort_order_gbk[(uchar) *s1++] != sort_order_gbk[(uchar) *s2++])
- return ((int) sort_order_gbk[(uchar) s1[-1]] -
- (int) sort_order_gbk[(uchar) s2[-1]]);
+ a_char= gbkcode(*a,*(a+1));
+ b_char= gbkcode(*b,*(b+1));
+ if (a_char != b_char)
+ return ((int) gbksortorder((uint16) a_char) -
+ (int) gbksortorder((uint16) b_char));
+ a+= 2;
+ b+= 2;
+ length--;
+ }
+ else if (sort_order_gbk[(uchar) *a++] != sort_order_gbk[(uchar) *b++])
+ return ((int) sort_order_gbk[(uchar) a[-1]] -
+ (int) sort_order_gbk[(uchar) b[-1]]);
}
- return (int) (len1-len2);
+ *a_res= a;
+ *b_res= b;
+ return 0;
}
-static
-int my_strnncollsp_gbk(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+
+
+int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_gbk(cs,s,slen,t,tlen);
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_gbk_internal(&a, &b, length);
+ return res ? res : (int) (a_length - b_length);
+}
+
+
+static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
+{
+ uint length= min(a_length, b_length);
+ int res= my_strnncoll_gbk_internal(&a, &b, length);
+ if (!res && a_length != b_length)
+ {
+ const uchar *end;
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a_length < b_length)
+ {
+ /* put shorter key in a */
+ a_length= b_length;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (end= a + a_length-length; a < end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return res;
}
@@ -2696,7 +2730,7 @@ static my_bool my_like_range_gbk(CHARSET_INFO *cs __attribute__((unused)),
*min_length= (uint) (min_str - min_org);
*max_length= res_length;
do {
- *min_str++ = '\0'; /* Because if key compression */
+ *min_str++= 0;
*max_str++ = max_sort_char;
} while (min_str != min_end);
return 0;
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 0682b15d135..7a010c3bef8 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -319,51 +319,105 @@ uchar combo2map[]={
static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *s1, uint len1,
- const uchar *s2, uint len2)
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- const uchar *e1 = s1 + len1;
- const uchar *e2 = s2 + len2;
- uchar c1, c12=0, c2, c22=0;
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ uchar a_char, a_extend= 0, b_char, b_extend= 0;
- while ((s1 < e1 || c12) && (s2 < e2 || c22))
+ while ((a < a_end || a_extend) && (b < b_end || b_extend))
{
- if (c12)
+ if (a_extend)
{
- c1=c12; c12=0;
+ a_char=a_extend; a_extend=0;
}
else
{
- c12=combo2map[*s1];
- c1=combo1map[*s1++];
+ a_extend=combo2map[*a];
+ a_char=combo1map[*a++];
}
- if (c22)
+ if (b_extend)
{
- c2=c22; c22=0;
+ b_char=b_extend; b_extend=0;
}
else
{
- c22=combo2map[*s2];
- c2=combo1map[*s2++];
+ b_extend=combo2map[*b];
+ b_char=combo1map[*b++];
}
- if (c1 != c2) return (int)c1 - (int)c2;
+ if (a_char != b_char)
+ return (int) a_char - (int) b_char;
}
-
/*
A simple test of string lengths won't work -- we test to see
which string ran out first
*/
- return (s1 < e1 || c12) ? 1 : (s2 < e2 || c22) ? -1 : 0;
+ return ((a < a_end || a_extend) ? 1 :
+ (b < b_end || b_extend) ? -1 : 0);
}
-static int my_strnncollsp_latin1_de(CHARSET_INFO *cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_latin1_de(cs,s,slen,t,tlen);
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ uchar a_char, a_extend= 0, b_char, b_extend= 0;
+
+ while ((a < a_end || a_extend) && (b < b_end || b_extend))
+ {
+ if (a_extend)
+ {
+ a_char=a_extend;
+ a_extend= 0;
+ }
+ else
+ {
+ a_extend= combo2map[*a];
+ a_char= combo1map[*a++];
+ }
+ if (b_extend)
+ {
+ b_char= b_extend;
+ b_extend= 0;
+ }
+ else
+ {
+ b_extend= combo2map[*b];
+ b_char= combo1map[*b++];
+ }
+ if (a_char != b_char)
+ return (int) a_char - (int) b_char;
+ }
+ /* Check if double character last */
+ if (a_extend)
+ return 1;
+ if (b_extend)
+ return -1;
+
+ if (a != a_end || b != b_end)
+ {
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a == a_end)
+ {
+ /* put shorter key in a */
+ a_end= b_end;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for ( ; a < a_end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return 0;
}
@@ -385,6 +439,32 @@ static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
}
+void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *key, uint len,
+ ulong *nr1, ulong *nr2)
+{
+ const uchar *end= key+len;
+ /*
+ Remove end space. We have to do this to be able to compare
+ 'AE' and 'Ä' as identical
+ */
+ while (end > key && end[-1] == ' ')
+ end--;
+
+ for (; key < end ; key++)
+ {
+ uint X= (uint) combo1map[(uint) *key];
+ nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
+ nr2[0]+=3;
+ if ((X= combo2map[*key]))
+ {
+ nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
+ nr2[0]+=3;
+ }
+ }
+}
+
+
static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
{
my_strnncoll_latin1_de,
@@ -394,7 +474,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
my_wildcmp_8bit,
my_strcasecmp_8bit,
my_instr_simple,
- my_hash_sort_simple
+ my_hash_sort_latin1_de
};
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 2f7cf698664..ed772a68845 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -347,6 +347,7 @@ uint my_instr_mb(CHARSET_INFO *cs,
return 0;
}
+
/* BINARY collations handlers for MB charsets */
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
@@ -357,20 +358,6 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
return cmp ? cmp : (int) (slen - tlen);
}
-static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
-{
- int len, cmp;
-
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-
- len = ( slen > tlen ) ? tlen : slen;
-
- cmp= memcmp(s,t,len);
- return cmp ? cmp : (int) (slen - tlen);
-}
static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
uchar * dest, uint len,
@@ -526,7 +513,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
{
my_strnncoll_mb_bin,
- my_strnncollsp_mb_bin,
+ my_strnncoll_mb_bin,
my_strnxfrm_mb_bin,
my_like_range_simple,
my_wildcmp_mb_bin,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 0aae60a0b56..c8eb3c07a3f 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -60,25 +60,69 @@ int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen,
}
-int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *s, uint slen,
- const uchar *t, uint tlen)
+/*
+ Compare strings, discarding end space
+
+ SYNOPSIS
+ my_strnncollsp_simple()
+ cs character set handler
+ a First string to compare
+ a_length Length of 'a'
+ b Second string to compare
+ b_length Length of 'b'
+
+ IMPLEMENTATION
+ If one string is shorter as the other, then we space extend the other
+ so that the strings have equal length.
+
+ This will ensure that the following things hold:
+
+ "a" == "a "
+ "a\0" < "a"
+ "a\0" < "a "
+
+ RETURN
+ < 0 a < b
+ = 0 a == b
+ > 0 a > b
+*/
+
+int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- uchar *map= cs->sort_order;
- int len;
-
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
-
- len = ( slen > tlen ) ? tlen : slen;
-
- while (len--)
+ const uchar *map= cs->sort_order, *end;
+ uint length;
+
+ end= a + (length= min(a_length, b_length));
+ while (a < end)
{
- if (map[*s++] != map[*t++])
- return ((int) map[s[-1]] - (int) map[t[-1]]);
+ if (map[*a++] != map[*b++])
+ return ((int) map[a[-1]] - (int) map[b[-1]]);
}
- return (int) (slen-tlen);
+ if (a_length != b_length)
+ {
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a_length < b_length)
+ {
+ /* put shorter key in s */
+ a_length= b_length;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (end= a + a_length-length; a < end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return 0;
}
+
void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
{
register uchar *map=cs->to_upper;
@@ -169,8 +213,8 @@ int my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)),
void my_hash_sort_simple(CHARSET_INFO *cs,
- const uchar *key, uint len,
- ulong *nr1, ulong *nr2)
+ const uchar *key, uint len,
+ ulong *nr1, ulong *nr2)
{
register uchar *sort_order=cs->sort_order;
const uchar *pos = key;
@@ -953,9 +997,10 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
{
*min_length= (uint) (min_str - min_org);
*max_length=res_length;
- do {
- *min_str++ = ' '; /* Because if key compression */
- *max_str++ = (char) cs->max_sort_char;
+ do
+ {
+ *min_str++= 0;
+ *max_str++= (char) cs->max_sort_char;
} while (min_str != min_end);
return 0;
}
@@ -963,13 +1008,6 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
}
*min_length= *max_length = (uint) (min_str - min_org);
- /* Temporary fix for handling w_one at end of string (key compression) */
- {
- char *tmp;
- for (tmp= min_str ; tmp > min_org && tmp[-1] == '\0';)
- *--tmp=' ';
- }
-
while (min_str != min_end)
*min_str++ = *max_str++ = ' '; /* Because if key compression */
return 0;
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 8ba650d1486..68cd77f96fc 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -184,7 +184,7 @@ static uchar NEAR sort_order_sjis[]=
static int ismbchar_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const char* p, const char *e)
+ const char* p, const char *e)
{
return (issjishead((uchar) *p) && (e-p)>1 && issjistail((uchar)p[1]) ? 2: 0);
}
@@ -197,59 +197,101 @@ static int mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
-static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
- const uchar *s1, uint len1,
- const uchar *s2, uint len2)
+
+static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
+ const uchar **a_res, uint a_length,
+ const uchar **b_res, uint b_length)
{
- const uchar *e1 = s1 + len1;
- const uchar *e2 = s2 + len2;
- while (s1 < e1 && s2 < e2) {
- if (ismbchar_sjis(cs,(char*) s1, (char*) e1) &&
- ismbchar_sjis(cs,(char*) s2, (char*) e2)) {
- uint c1 = sjiscode(*s1, *(s1+1));
- uint c2 = sjiscode(*s2, *(s2+1));
- if (c1 != c2)
- return c1 - c2;
- s1 += 2;
- s2 += 2;
- } else {
- if (sort_order_sjis[(uchar)*s1] != sort_order_sjis[(uchar)*s2])
- return sort_order_sjis[(uchar)*s1] - sort_order_sjis[(uchar)*s2];
- s1++;
- s2++;
+ const uchar *a= *a_res, *b= *b_res;
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ while (a < a_end && b < b_end)
+ {
+ if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
+ ismbchar_sjis(cs,(char*) b, (char*) b_end))
+ {
+ uint a_char= sjiscode(*a, *(a+1));
+ uint b_char= sjiscode(*b, *(b+1));
+ if (a_char != b_char)
+ return a_char - b_char;
+ a += 2;
+ b += 2;
+ } else
+ {
+ if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
+ return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
+ a++;
+ b++;
}
}
- return len1 - len2;
+ *a_res= a;
+ *b_res= b;
+ return 0;
+}
+
+
+static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
+{
+ int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
+ return res ? res : (int) (a_length - b_length);
}
-static
-int my_strnncollsp_sjis(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+
+static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
+ const uchar *a, uint a_length,
+ const uchar *b, uint b_length)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_sjis(cs,s,slen,t,tlen);
+ const uchar *a_end= a + a_length;
+ const uchar *b_end= b + b_length;
+ int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
+ if (!res && (a != a_end || b != b_end))
+ {
+ int swap= 0;
+ /*
+ Check the next not space character of the longer key. If it's < ' ',
+ then it's smaller than the other key.
+ */
+ if (a == a_end)
+ {
+ /* put shorter key in a */
+ a_end= b_end;
+ a= b;
+ swap= -1; /* swap sign of result */
+ }
+ for (; a < a_end ; a++)
+ {
+ if (*a != ' ')
+ return ((int) *a - (int) ' ') ^ swap;
+ }
+ }
+ return res;
}
+
+
static int my_strnxfrm_sjis(CHARSET_INFO *cs __attribute__((unused)),
uchar *dest, uint len,
const uchar *src, uint srclen)
{
uchar *d_end = dest + len;
uchar *s_end = (uchar*) src + srclen;
- while (dest < d_end && src < s_end) {
- if (ismbchar_sjis(cs,(char*) src, (char*) s_end)) {
+ while (dest < d_end && src < s_end)
+ {
+ if (ismbchar_sjis(cs,(char*) src, (char*) s_end))
+ {
*dest++ = *src++;
if (dest < d_end && src < s_end)
*dest++ = *src++;
- } else {
- *dest++ = sort_order_sjis[(uchar)*src++];
}
+ else
+ *dest++ = sort_order_sjis[(uchar)*src++];
}
return srclen;
}
+
/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
@@ -300,12 +342,14 @@ static my_bool my_like_range_sjis(CHARSET_INFO *cs __attribute__((unused)),
ptr++;
continue;
}
- if (*ptr == w_many) { /* '%' in SQL */
+ if (*ptr == w_many)
+ { /* '%' in SQL */
*min_length = (uint)(min_str - min_org);
*max_length = res_length;
- do {
- *min_str++ = ' '; /* Because if key compression */
- *max_str++ = max_sort_char;
+ do
+ {
+ *min_str++= 0;
+ *max_str++= max_sort_char;
} while (min_str < min_end);
return 0;
}
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 59be820863a..954a3768536 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -518,6 +518,10 @@ static uint thai2sortable(uchar *tstr, uint len)
strncoll() replacement, compare 2 string, both are converted to sortable
string
+ NOTE:
+ We can't cut strings at end \0 as this would break comparision with
+ LIKE characters, where the min range is stored as end \0
+
Arg: 2 Strings and it compare length
Ret: strcmp result
*/
@@ -530,9 +534,6 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
uchar *tc1, *tc2;
int i;
- /* Cut strings at end \0 */
- len1= (int) strnlen((char*) s1,len1);
- len2= (int) strnlen((char*) s2,len2);
tc1= buf;
if ((len1 + len2 +2) > (int) sizeof(buf))
tc1= (uchar*) malloc(len1+len2);
@@ -550,6 +551,10 @@ int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ TODO: Has to be fixed like strnncollsp in ctype-simple.c
+*/
+
static
int my_strnncollsp_tis620(CHARSET_INFO * cs,
const uchar *s, uint slen,
@@ -637,8 +642,9 @@ my_bool my_like_range_tis620(CHARSET_INFO *cs __attribute__((unused)),
{
*min_length= (uint) (min_str - min_org);
*max_length=res_length;
- do {
- *min_str++ = ' '; /* Because of key compression */
+ do
+ {
+ *min_str++ = 0;
*max_str++ = max_sort_chr;
} while (min_str != min_end);
return 0;
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 26d7568e6a2..99d97a9614b 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -90,8 +90,8 @@ static uchar to_upper_ucs2[] = {
};
-static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
- my_wc_t * pwc, const uchar *s, const uchar *e)
+static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
+ my_wc_t * pwc, const uchar *s, const uchar *e)
{
if (s+2 > e) /* Need 2 characters */
return MY_CS_TOOFEW(0);
@@ -100,8 +100,8 @@ static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
return 2;
}
-static int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) ,
- my_wc_t wc, uchar *r, uchar *e)
+static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ my_wc_t wc, uchar *r, uchar *e)
{
if ( r+2 > e )
return MY_CS_TOOSMALL;
@@ -128,13 +128,15 @@ static void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen)
}
}
-static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2)
+
+static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
+ ulong *n1, ulong *n2)
{
my_wc_t wc;
int res;
const uchar *e=s+slen;
- while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
@@ -148,7 +150,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong
static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
- char * s __attribute__((unused)))
+ char * s __attribute__((unused)))
{
}
@@ -173,13 +175,14 @@ static void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen)
}
static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
- char * s __attribute__((unused)))
+ char * s __attribute__((unused)))
{
}
static int my_strnncoll_ucs2(CHARSET_INFO *cs,
- const uchar *s, uint slen, const uchar *t, uint tlen)
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@@ -213,8 +216,9 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
return ( (se-s) - (te-t) );
}
+
static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
- const char *s, const char *t, uint len)
+ const char *s, const char *t, uint len)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@@ -249,6 +253,7 @@ static int my_strncasecmp_ucs2(CHARSET_INFO *cs,
return ( (se-s) - (te-t) );
}
+
static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
{
uint s_len=strlen(s);
@@ -257,6 +262,7 @@ static int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
return my_strncasecmp_ucs2(cs, s, t, len);
}
+
static int my_strnxfrm_ucs2(CHARSET_INFO *cs,
uchar *dst, uint dstlen, const uchar *src, uint srclen)
{
@@ -288,6 +294,7 @@ static int my_strnxfrm_ucs2(CHARSET_INFO *cs,
return dst - dst_orig;
}
+
static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b __attribute__((unused)),
const char *e __attribute__((unused)))
@@ -295,6 +302,7 @@ static int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
return 2;
}
+
static int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
uint c __attribute__((unused)))
{
@@ -380,8 +388,8 @@ static int my_vsnprintf_ucs2(char *dst, uint n, const char* fmt, va_list ap)
return (uint) (dst - start);
}
-static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused))
- ,char* to, uint n, const char* fmt, ...)
+static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ char* to, uint n, const char* fmt, ...)
{
va_list args;
va_start(args,fmt);
@@ -389,9 +397,9 @@ static int my_snprintf_ucs2(CHARSET_INFO *cs __attribute__((unused))
}
-long my_strntol_ucs2(CHARSET_INFO *cs,
- const char *nptr, uint l, int base,
- char **endptr, int *err)
+long my_strntol_ucs2(CHARSET_INFO *cs,
+ const char *nptr, uint l, int base,
+ char **endptr, int *err)
{
int negative=0;
int overflow;
@@ -504,9 +512,9 @@ bs:
}
-ulong my_strntoul_ucs2(CHARSET_INFO *cs,
- const char *nptr, uint l, int base,
- char **endptr, int *err)
+ulong my_strntoul_ucs2(CHARSET_INFO *cs,
+ const char *nptr, uint l, int base,
+ char **endptr, int *err)
{
int negative=0;
int overflow;
@@ -1334,8 +1342,8 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
*min_length= (uint) (min_str - min_org);
*max_length=res_length;
do {
- *min_str++ = '\0';
- *min_str++ = ' '; /* Because if key compression */
+ *min_str++ = 0;
+ *min_str++ = 0;
*max_str++ = (char) cs->max_sort_char >>8;
*max_str++ = (char) cs->max_sort_char & 255;
} while (min_str + 1 < min_end);
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index dca73e5a79f..886ecfbd0c9 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1801,7 +1801,8 @@ static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
static int my_strnncoll_utf8(CHARSET_INFO *cs,
- const uchar *s, uint slen, const uchar *t, uint tlen)
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@@ -1835,6 +1836,11 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
return ( (se-s) - (te-t) );
}
+
+/*
+ TODO: Has to be fixed as strnncollsp in ctype-simple
+*/
+
static
int my_strnncollsp_utf8(CHARSET_INFO * cs,
const uchar *s, uint slen,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index bda349f1988..8fd4e612713 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -467,6 +467,10 @@ static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
}
+/*
+ TODO: Has to be fixed as strnncollsp in ctype-simple
+*/
+
static
int my_strnncollsp_win1250ch(CHARSET_INFO * cs,
const uchar *s, uint slen,