summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorunknown <bar@gw.udmsearch.izhnet.ru>2002-06-17 22:43:40 +0500
committerunknown <bar@gw.udmsearch.izhnet.ru>2002-06-17 22:43:40 +0500
commit6f8875caa0185b7fe072bb27c5e0cdb63831d188 (patch)
tree0d23f2ed3374c600690f394ae85990763c2869b7 /strings
parentac4fd586d97e8a76d565ee3f888ce1563bcfc9ce (diff)
downloadmariadb-git-6f8875caa0185b7fe072bb27c5e0cdb63831d188.tar.gz
UCS2 charset has been added
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-utf8.c309
-rw-r--r--strings/ctype.c33
2 files changed, 342 insertions, 0 deletions
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index a4485e1ef50..1cce8819619 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -25,6 +25,10 @@
#define HAVE_UNIDATA
#endif
+#ifdef HAVE_CHARSET_ucs2
+#define HAVE_UNIDATA
+#endif
+
#ifdef HAVE_UNIDATA
static MY_UNICASE_INFO plane00[]={
@@ -1999,4 +2003,309 @@ int main()
#endif
+#endif /* HAVE_CHARSET_UTF8 */
+
+
+
+#ifdef HAVE_CHARSET_ucs2
+
+uchar ctype_ucs2[] = {
+ 0,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
+ 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
+ 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
+ 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+uchar to_lower_ucs2[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+uchar to_upper_ucs2[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+ 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+ 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
+
+
+int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) ,
+ my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ if (s+2 > e) /* Need 2 characters */
+ return MY_CS_ILSEQ;
+
+ *pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]);
+ return 2;
+}
+
+int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) ,
+ my_wc_t wc, uchar *r, uchar *e)
+{
+ if ( r+2 > e )
+ return MY_CS_TOOSMALL;
+
+ r[0]=wc >> 8;
+ r[1]=wc & 0xFF;
+ return 2;
+}
+
+
+void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen)
+{
+ my_wc_t wc;
+ int res;
+ char *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ if (res != my_uni_ucs2(cs,wc,(uchar*)s,(uchar*)e))
+ break;
+ s+=res;
+ }
+}
+
+uint my_hash_caseup_ucs2(CHARSET_INFO *cs, const byte *s, uint slen)
+{
+ my_wc_t wc;
+ register uint nr=1, nr2=4;
+ int res;
+ const char *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ nr^= (((nr & 63)+nr2)*(wc & 0xFF))+ (nr << 8);
+ nr2+=3;
+ nr^= (((nr & 63)+nr2)*(wc >> 8))+ (nr << 8);
+ nr2+=3;
+
+ s+=res;
+ }
+
+ return nr;
+}
+
+
+void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2)
+{
+ my_wc_t wc;
+ int res;
+ const uchar *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
+ n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
+ n2[0]+=3;
+ n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
+ n2[0]+=3;
+ s+=res;
+ }
+}
+
+
+void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
+ char * s __attribute__((unused)))
+{
+}
+
+
+
+void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen)
+{
+ my_wc_t wc;
+ int res;
+ char *e=s+slen;
+
+ while ((s < e) && (res=my_ucs2_uni(cs, &wc, (uchar*)s, (uchar*)e))>0)
+ {
+ int plane = (wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ if (res != my_uni_ucs2(cs, wc, (uchar*)s, (uchar*)e))
+ {
+ break;
+ }
+ s+=res;
+ }
+}
+
+void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ char * s __attribute__((unused)))
+{
+}
+
+
+int my_strnncoll_ucs2(CHARSET_INFO *cs,
+ const uchar *s, uint slen, const uchar *t, uint tlen)
+{
+ int s_res,t_res;
+ my_wc_t s_wc,t_wc;
+ const uchar *se=s+slen;
+ const uchar *te=t+tlen;
+
+ while ( s < se && t < te )
+ {
+ int plane;
+ s_res=my_ucs2_uni(cs,&s_wc, s, se);
+ t_res=my_ucs2_uni(cs,&t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare by char value */
+ return ((int)s[0]-(int)t[0]);
+ }
+
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(t_wc>>8) & 0xFF;
+ t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+ if ( s_wc != t_wc )
+ {
+ return ((int) s_wc) - ((int) t_wc);
+ }
+
+ s+=s_res;
+ t+=t_res;
+ }
+ return ( (se-s) - (te-t) );
+}
+
+int my_strncasecmp_ucs2(CHARSET_INFO *cs,
+ const char *s, const char *t, uint len)
+{
+ int s_res,t_res;
+ my_wc_t s_wc,t_wc;
+ const char *se=s+len;
+ const char *te=t+len;
+
+ while ( s < se && t < te )
+ {
+ int plane;
+
+ s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se);
+ t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare by char value */
+ return ((int)s[0]-(int)t[0]);
+ }
+
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
+
+ plane=(t_wc>>8) & 0xFF;
+ t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
+
+ if ( s_wc != t_wc )
+ return ((int) s_wc) - ((int) t_wc);
+
+ s+=s_res;
+ t+=t_res;
+ }
+ return ( (se-s) - (te-t) );
+}
+
+int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
+{
+ uint s_len=strlen(s);
+ uint t_len=strlen(t);
+ uint len = (s_len > t_len) ? s_len : t_len;
+ return my_strncasecmp_ucs2(cs, s, t, len);
+}
+
+int my_strnxfrm_ucs2(CHARSET_INFO *cs,
+ uchar *dst, uint dstlen, const uchar *src, uint srclen)
+{
+ my_wc_t wc;
+ int res;
+ int plane;
+ uchar *de = dst + dstlen;
+ const uchar *se = src + srclen;
+ const uchar *dst_orig = dst;
+
+ while( src < se && dst < de )
+ {
+ if ((res=my_ucs2_uni(cs,&wc, src, se))<0)
+ {
+ break;
+ }
+ src+=res;
+ srclen-=res;
+
+ plane=(wc>>8) & 0xFF;
+ wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
+
+ if ((res=my_uni_ucs2(cs,wc,dst,de)) <0)
+ {
+ break;
+ }
+ dst+=res;
+ }
+ return dst - dst_orig;
+}
+
+int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
+ const char *b __attribute__((unused)),
+ const char *e __attribute__((unused)))
+{
+ return 2;
+}
+
+my_bool my_ismbhead_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ uint ch __attribute__((unused)))
+{
+ return 1;
+}
+
+int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
+ uint c __attribute__((unused)))
+{
+ return 2;
+}
+
#endif
diff --git a/strings/ctype.c b/strings/ctype.c
index 0c20db35da2..7c37eeff986 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -3635,6 +3635,39 @@ CHARSET_INFO compiled_charsets[] = {
},
#endif
+#ifdef HAVE_CHARSET_ucs2
+ {
+ 35, /* number */
+ "ucs2", /* name */
+ ctype_ucs2, /* ctype */
+ to_lower_ucs2, /* to_lower */
+ to_upper_ucs2, /* to_upper */
+ to_upper_ucs2, /* sort_order */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ 1, /* strxfrm_multiply */
+ my_strnncoll_ucs2, /* strnncoll */
+ my_strnxfrm_ucs2, /* strnxfrm */
+ NULL, /* like_range */
+ 2, /* mbmaxlen */
+ my_ismbchar_ucs2, /* ismbchar */
+ my_ismbhead_ucs2, /* ismbhead */
+ my_mbcharlen_ucs2, /* mbcharlen */
+ my_ucs2_uni, /* mb_wc */
+ my_uni_ucs2, /* wc_mb */
+ my_caseup_str_ucs2,
+ my_casedn_str_ucs2,
+ my_caseup_ucs2,
+ my_casedn_ucs2,
+ my_strcasecmp_ucs2,
+ my_strncasecmp_ucs2,
+ my_hash_caseup_ucs2,/* hash_caseup */
+ my_hash_sort_ucs2, /* hash_sort */
+ 0
+ },
+#endif
+
+
#ifdef HAVE_CHARSET_ujis
{
12, /* number */