summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mnogosearch.org>2013-10-02 15:04:07 +0400
committerAlexander Barkov <bar@mnogosearch.org>2013-10-02 15:04:07 +0400
commit0b6c4bb34f99b8f4023fd0bef25a1b714f96b699 (patch)
tree87e5f83097f30c9fb7e30928800bcc92690f6bbd /strings
parent9538bbfce9055f99529adb461d101b7b236eb5a3 (diff)
downloadmariadb-git-0b6c4bb34f99b8f4023fd0bef25a1b714f96b699.tar.gz
MDEV-4928 Merge collation customization improvements
Merging the following MySQL-5.6 changes: - WL#5624: Collation customization improvements http://dev.mysql.com/worklog/task/?id=5624 - WL#4013: Unicode german2 collation http://dev.mysql.com/worklog/task/?id=4013 - Bug#62429 XML: ExtractValue, UpdateXML max arg length 127 chars http://bugs.mysql.com/bug.php?id=62429 (required by WL#5624)
Diffstat (limited to 'strings')
-rw-r--r--strings/CMakeLists.txt3
-rw-r--r--strings/conf_to_src.c41
-rw-r--r--strings/ctype-big5.c25
-rw-r--r--strings/ctype-bin.c7
-rw-r--r--strings/ctype-cp932.c34
-rw-r--r--strings/ctype-czech.c5
-rw-r--r--strings/ctype-euc_kr.c31
-rw-r--r--strings/ctype-eucjpms.c45
-rw-r--r--strings/ctype-extra.c310
-rw-r--r--strings/ctype-gb2312.c29
-rw-r--r--strings/ctype-gbk.c31
-rw-r--r--strings/ctype-latin1.c15
-rw-r--r--strings/ctype-mb.c34
-rw-r--r--strings/ctype-simple.c25
-rw-r--r--strings/ctype-sjis.c29
-rw-r--r--strings/ctype-tis620.c10
-rw-r--r--strings/ctype-uca.c3245
-rw-r--r--strings/ctype-ucs2.c169
-rw-r--r--strings/ctype-ujis.c47
-rw-r--r--strings/ctype-utf8.c257
-rw-r--r--strings/ctype-win1250ch.c5
-rw-r--r--strings/ctype.c594
-rw-r--r--strings/str_alloc.c6
-rw-r--r--strings/xml.c98
24 files changed, 3468 insertions, 1627 deletions
diff --git a/strings/CMakeLists.txt b/strings/CMakeLists.txt
index 2747374058d..1b26b3d962a 100644
--- a/strings/CMakeLists.txt
+++ b/strings/CMakeLists.txt
@@ -32,3 +32,6 @@ ENDIF()
# Avoid dependencies on perschema data defined in mysys
ADD_DEFINITIONS(-DDISABLE_MYSQL_THREAD_H)
ADD_CONVENIENCE_LIBRARY(strings ${STRINGS_SOURCES})
+
+ADD_EXECUTABLE(conf_to_src EXCLUDE_FROM_ALL conf_to_src.c)
+TARGET_LINK_LIBRARIES(conf_to_src strings)
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index 7dfc76e5417..8d25ac8e7ed 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -145,12 +145,35 @@ static int add_collation(struct charset_info_st *cs)
}
+static void
+default_reporter(enum loglevel level __attribute__ ((unused)),
+ const char *format __attribute__ ((unused)),
+ ...)
+{
+}
+
+
+static void
+my_charset_loader_init(MY_CHARSET_LOADER *loader)
+{
+ loader->error[0]= '\0';
+ loader->once_alloc= malloc;
+ loader->malloc= malloc;
+ loader->realloc= realloc;
+ loader->free= free;
+ loader->reporter= default_reporter;
+ loader->add_collation= add_collation;
+}
+
+
static int my_read_charset_file(const char *filename)
{
char buf[MAX_BUF];
int fd;
uint len;
+ MY_CHARSET_LOADER loader;
+ my_charset_loader_init(&loader);
if ((fd=open(filename,O_RDONLY)) < 0)
{
fprintf(stderr,"Can't open '%s'\n",filename);
@@ -161,14 +184,10 @@ static int my_read_charset_file(const char *filename)
DBUG_ASSERT(len < MAX_BUF);
close(fd);
- if (my_parse_charset_xml(buf,len,add_collation))
+ if (my_parse_charset_xml(&loader, buf, len))
{
-#if 0
- printf("ERROR at line %d pos %d '%s'\n",
- my_xml_error_lineno(&p)+1,
- my_xml_error_pos(&p),
- my_xml_error_string(&p));
-#endif
+ fprintf(stderr, "Error while parsing '%s': %s\n", filename, loader.error);
+ exit(1);
}
return FALSE;
@@ -207,8 +226,7 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
fprintf(f," sort_order_%s, /* sort_order */\n",cs->name);
else
fprintf(f," NULL, /* sort_order */\n");
- fprintf(f," NULL, /* contractions */\n");
- fprintf(f," NULL, /* sort_order_big*/\n");
+ fprintf(f," NULL, /* uca */\n");
fprintf(f," to_uni_%s, /* to_uni */\n",cs->name);
}
else
@@ -221,13 +239,12 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
fprintf(f," NULL, /* lower */\n");
fprintf(f," NULL, /* upper */\n");
fprintf(f," NULL, /* sort order */\n");
- fprintf(f," NULL, /* contractions */\n");
- fprintf(f," NULL, /* sort_order_big*/\n");
+ fprintf(f," NULL, /* uca */\n");
fprintf(f," NULL, /* to_uni */\n");
}
fprintf(f," NULL, /* from_uni */\n");
- fprintf(f," my_unicase_default, /* caseinfo */\n");
+ fprintf(f," &my_unicase_default, /* caseinfo */\n");
fprintf(f," NULL, /* state map */\n");
fprintf(f," NULL, /* ident map */\n");
fprintf(f," 1, /* strxfrm_multiply*/\n");
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index f77e705525c..cf9fc339280 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -177,7 +177,7 @@ static const uchar sort_order_big5[]=
};
-static MY_UNICASE_INFO cA2[256]=
+static MY_UNICASE_CHARACTER cA2[256]=
{
/* A200-A20F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -370,7 +370,7 @@ static MY_UNICASE_INFO cA2[256]=
};
-static MY_UNICASE_INFO cA3[256]=
+static MY_UNICASE_CHARACTER cA3[256]=
{
/* A300-A30F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -563,7 +563,7 @@ static MY_UNICASE_INFO cA3[256]=
};
-static MY_UNICASE_INFO cC7[256]=
+static MY_UNICASE_CHARACTER cC7[256]=
{
/* C700-C70F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -756,7 +756,7 @@ static MY_UNICASE_INFO cC7[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_big5[256]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_big5[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -793,6 +793,13 @@ static MY_UNICASE_INFO *my_caseinfo_big5[256]=
};
+static MY_UNICASE_INFO my_caseinfo_big5=
+{
+ 0xFFFF,
+ my_caseinfo_pages_big5
+};
+
+
static uint16 big5strokexfrm(uint16 i)
{
if ((i == 0xA440) || (i == 0xA441)) return 0xA440;
@@ -6926,11 +6933,10 @@ struct charset_info_st my_charset_big5_chinese_ci=
to_lower_big5,
to_upper_big5,
sort_order_big5,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_big5, /* caseinfo */
+ &my_caseinfo_big5, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -6959,11 +6965,10 @@ struct charset_info_st my_charset_big5_bin=
to_lower_big5,
to_upper_big5,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_big5, /* caseinfo */
+ &my_caseinfo_big5, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 2363a235550..76e8da25fc2 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -69,7 +69,7 @@ static const uchar bin_char_array[] =
static my_bool
my_coll_init_8bit_bin(struct charset_info_st *cs,
- void *(*alloc)(size_t) __attribute__((unused)))
+ MY_CHARSET_LOADER *loader __attribute__((unused)))
{
cs->max_sort_char=255;
return FALSE;
@@ -571,11 +571,10 @@ struct charset_info_st my_charset_bin =
bin_char_array, /* to_lower */
bin_char_array, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index b7847e4509e..946cf4253d8 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -197,7 +197,7 @@ static uint mbcharlen_cp932(CHARSET_INFO *cs __attribute__((unused)),uint c)
#define cp932code(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
-static MY_UNICASE_INFO c81[256]=
+static MY_UNICASE_CHARACTER c81[256]=
{
/* 8100-810F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -407,7 +407,7 @@ static MY_UNICASE_INFO c81[256]=
};
-static MY_UNICASE_INFO c82[256]=
+static MY_UNICASE_CHARACTER c82[256]=
{
/* 8200-820F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -615,7 +615,7 @@ static MY_UNICASE_INFO c82[256]=
};
-static MY_UNICASE_INFO c83[256]=
+static MY_UNICASE_CHARACTER c83[256]=
{
/* 8300-830F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -825,7 +825,7 @@ static MY_UNICASE_INFO c83[256]=
};
-static MY_UNICASE_INFO c84[256]=
+static MY_UNICASE_CHARACTER c84[256]=
{
/* 8400-840F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1035,7 +1035,7 @@ static MY_UNICASE_INFO c84[256]=
};
-static MY_UNICASE_INFO c87[256]=
+static MY_UNICASE_CHARACTER c87[256]=
{
/* 8700-870F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1245,7 +1245,7 @@ static MY_UNICASE_INFO c87[256]=
};
-static MY_UNICASE_INFO cEE[256]=
+static MY_UNICASE_CHARACTER cEE[256]=
{
/* EE00-EE0F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1456,7 +1456,7 @@ static MY_UNICASE_INFO cEE[256]=
};
-static MY_UNICASE_INFO cFA[256]=
+static MY_UNICASE_CHARACTER cFA[256]=
{
/* FA00-FA0F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1666,7 +1666,7 @@ static MY_UNICASE_INFO cFA[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_cp932[256]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_cp932[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1703,7 +1703,13 @@ static MY_UNICASE_INFO *my_caseinfo_cp932[256]=
};
-static int my_strnncoll_cp932_internal(CHARSET_INFO *cs,
+MY_UNICASE_INFO my_caseinfo_cp932=
+{
+ 0xFFFF,
+ my_caseinfo_pages_cp932
+};
+
+static int my_strnncoll_cp932_internal(const CHARSET_INFO *cs,
const uchar **a_res, size_t a_length,
const uchar **b_res, size_t b_length)
{
@@ -34834,11 +34840,10 @@ struct charset_info_st my_charset_cp932_japanese_ci=
to_lower_cp932,
to_upper_cp932,
sort_order_cp932,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_cp932, /* caseinfo */
+ &my_caseinfo_cp932, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -34866,11 +34871,10 @@ struct charset_info_st my_charset_cp932_bin=
to_lower_cp932,
to_upper_cp932,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_cp932, /* caseinfo */
+ &my_caseinfo_cp932, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 36fc6b36f0c..1418edfecb3 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -613,11 +613,10 @@ struct charset_info_st my_charset_latin2_czech_ci =
to_lower_czech,
to_upper_czech,
sort_order_czech,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
tab_8859_2_uni, /* tab_to_uni */
idx_uni_8859_2, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
4, /* strxfrm_multiply */
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index 3caa1977c0b..66b8b090241 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -216,7 +216,7 @@ static uint mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
}
-static MY_UNICASE_INFO cA3[256]=
+static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -421,7 +421,7 @@ static MY_UNICASE_INFO cA3[256]=
};
-static MY_UNICASE_INFO cA5[256]=
+static MY_UNICASE_CHARACTER cA5[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -626,7 +626,7 @@ static MY_UNICASE_INFO cA5[256]=
};
-static MY_UNICASE_INFO cA7[256]=
+static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -831,7 +831,7 @@ static MY_UNICASE_INFO cA7[256]=
};
-static MY_UNICASE_INFO cA8[256]=
+static MY_UNICASE_CHARACTER cA8[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1036,7 +1036,7 @@ static MY_UNICASE_INFO cA8[256]=
};
-static MY_UNICASE_INFO cA9[256]=
+static MY_UNICASE_CHARACTER cA9[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1241,7 +1241,7 @@ static MY_UNICASE_INFO cA9[256]=
};
-static MY_UNICASE_INFO cAC[256]=
+static MY_UNICASE_CHARACTER cAC[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1446,7 +1446,7 @@ static MY_UNICASE_INFO cAC[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_euckr[256]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_euckr[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1483,6 +1483,13 @@ static MY_UNICASE_INFO *my_caseinfo_euckr[256]=
};
+static MY_UNICASE_INFO my_caseinfo_euckr=
+{
+ 0xFFFF,
+ my_caseinfo_pages_euckr
+};
+
+
/* page 0 0x8141-0xC8FE */
static const uint16 tab_ksc5601_uni0[]={
0xAC02,0xAC03,0xAC05,0xAC06,0xAC0B,0xAC0C,0xAC0D,0xAC0E,
@@ -10016,11 +10023,10 @@ struct charset_info_st my_charset_euckr_korean_ci=
to_lower_euc_kr,
to_upper_euc_kr,
sort_order_euc_kr,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_euckr, /* caseinfo */
+ &my_caseinfo_euckr, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -10049,11 +10055,10 @@ struct charset_info_st my_charset_euckr_bin=
to_lower_euc_kr,
to_upper_euc_kr,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_euckr, /* caseinfo */
+ &my_caseinfo_euckr, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index fe6bb744706..59a9a43c0f5 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -203,7 +203,7 @@ static uint mbcharlen_eucjpms(CHARSET_INFO *cs __attribute__((unused)),uint c)
/* Case info pages for JIS-X-0208 range */
-static MY_UNICASE_INFO cA2[256]=
+static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -324,7 +324,7 @@ static MY_UNICASE_INFO cA2[256]=
};
-static MY_UNICASE_INFO cA3[256]=
+static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -445,7 +445,7 @@ static MY_UNICASE_INFO cA3[256]=
};
-static MY_UNICASE_INFO cA6[256]=
+static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -566,7 +566,7 @@ static MY_UNICASE_INFO cA6[256]=
};
-static MY_UNICASE_INFO cA7[256]=
+static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -687,7 +687,7 @@ static MY_UNICASE_INFO cA7[256]=
};
-static MY_UNICASE_INFO cAD[256]=
+static MY_UNICASE_CHARACTER cAD[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -810,7 +810,7 @@ static MY_UNICASE_INFO cAD[256]=
/* Case info pages for JIS-X-0212 range */
-static MY_UNICASE_INFO c8FA6[256]=
+static MY_UNICASE_CHARACTER c8FA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -931,7 +931,7 @@ static MY_UNICASE_INFO c8FA6[256]=
};
-static MY_UNICASE_INFO c8FA7[256]=
+static MY_UNICASE_CHARACTER c8FA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1052,7 +1052,7 @@ static MY_UNICASE_INFO c8FA7[256]=
};
-static MY_UNICASE_INFO c8FA9[256]=
+static MY_UNICASE_CHARACTER c8FA9[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1173,7 +1173,7 @@ static MY_UNICASE_INFO c8FA9[256]=
};
-static MY_UNICASE_INFO c8FAA[256]=
+static MY_UNICASE_CHARACTER c8FAA[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1294,7 +1294,7 @@ static MY_UNICASE_INFO c8FAA[256]=
};
-static MY_UNICASE_INFO c8FAB[256]=
+static MY_UNICASE_CHARACTER c8FAB[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1415,7 +1415,7 @@ static MY_UNICASE_INFO c8FAB[256]=
};
-static MY_UNICASE_INFO c8FF3[256]=
+static MY_UNICASE_CHARACTER c8FF3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1536,7 +1536,7 @@ static MY_UNICASE_INFO c8FF3[256]=
};
-static MY_UNICASE_INFO c8FF4[256]=
+static MY_UNICASE_CHARACTER c8FF4[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1657,7 +1657,7 @@ static MY_UNICASE_INFO c8FF4[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_eucjpms[512]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_eucjpms[512]=
{
/* JIS-X-0208 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
@@ -1729,7 +1729,14 @@ static MY_UNICASE_INFO *my_caseinfo_eucjpms[512]=
};
-static const uint16 jisx0208_eucjpms_to_unicode[65536]=
+static MY_UNICASE_INFO my_caseinfo_eucjpms=
+{
+ 0x0FFFF,
+ my_caseinfo_pages_eucjpms
+};
+
+
+static uint16 jisx0208_eucjpms_to_unicode[65536]=
{
0x0000, 0x0001, 0x0002, 0x0003, /* 0000 */
0x0004, 0x0005, 0x0006, 0x0007,
@@ -67559,11 +67566,10 @@ struct charset_info_st my_charset_eucjpms_japanese_ci=
to_lower_eucjpms,
to_upper_eucjpms,
sort_order_eucjpms,
- NULL, /* sort_order_big*/
- NULL, /* contractions */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_eucjpms,/* caseinfo */
+ &my_caseinfo_eucjpms,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -67592,11 +67598,10 @@ struct charset_info_st my_charset_eucjpms_bin=
to_lower_eucjpms,
to_upper_eucjpms,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_eucjpms,/* caseinfo */
+ &my_caseinfo_eucjpms,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index e0499c6f2e3..addeeba8ba0 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -6616,11 +6616,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_dec8_swedish_ci, /* lower */
to_upper_dec8_swedish_ci, /* upper */
sort_order_dec8_swedish_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_dec8_swedish_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6649,11 +6648,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp850_general_ci, /* lower */
to_upper_cp850_general_ci, /* upper */
sort_order_cp850_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp850_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6682,11 +6680,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_german1_ci, /* lower */
to_upper_latin1_german1_ci, /* upper */
sort_order_latin1_german1_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin1_german1_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6715,11 +6712,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hp8_english_ci, /* lower */
to_upper_hp8_english_ci, /* upper */
sort_order_hp8_english_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_hp8_english_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6748,11 +6744,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8r_general_ci, /* lower */
to_upper_koi8r_general_ci, /* upper */
sort_order_koi8r_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_koi8r_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6781,11 +6776,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_general_ci, /* lower */
to_upper_latin2_general_ci, /* upper */
sort_order_latin2_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin2_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6814,11 +6808,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_swe7_swedish_ci, /* lower */
to_upper_swe7_swedish_ci, /* upper */
sort_order_swe7_swedish_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_swe7_swedish_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6847,11 +6840,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_ascii_general_ci, /* lower */
to_upper_ascii_general_ci, /* upper */
sort_order_ascii_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_ascii_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6880,11 +6872,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_bulgarian_ci, /* lower */
to_upper_cp1251_bulgarian_ci, /* upper */
sort_order_cp1251_bulgarian_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1251_bulgarian_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6913,11 +6904,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_danish_ci, /* lower */
to_upper_latin1_danish_ci, /* upper */
sort_order_latin1_danish_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin1_danish_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6946,11 +6936,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hebrew_general_ci, /* lower */
to_upper_hebrew_general_ci, /* upper */
sort_order_hebrew_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_hebrew_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -6979,11 +6968,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_estonian_cs, /* lower */
to_upper_latin7_estonian_cs, /* upper */
sort_order_latin7_estonian_cs, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin7_estonian_cs, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7012,11 +7000,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_hungarian_ci, /* lower */
to_upper_latin2_hungarian_ci, /* upper */
sort_order_latin2_hungarian_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin2_hungarian_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7045,11 +7032,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8u_general_ci, /* lower */
to_upper_koi8u_general_ci, /* upper */
sort_order_koi8u_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_koi8u_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7078,11 +7064,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_ukrainian_ci, /* lower */
to_upper_cp1251_ukrainian_ci, /* upper */
sort_order_cp1251_ukrainian_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1251_ukrainian_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7111,11 +7096,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_greek_general_ci, /* lower */
to_upper_greek_general_ci, /* upper */
sort_order_greek_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_greek_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7144,11 +7128,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_general_ci, /* lower */
to_upper_cp1250_general_ci, /* upper */
sort_order_cp1250_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1250_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7177,11 +7160,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_croatian_ci, /* lower */
to_upper_latin2_croatian_ci, /* upper */
sort_order_latin2_croatian_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin2_croatian_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7210,11 +7192,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1257_lithuanian_ci, /* lower */
to_upper_cp1257_lithuanian_ci, /* upper */
sort_order_cp1257_lithuanian_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1257_lithuanian_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7243,11 +7224,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin5_turkish_ci, /* lower */
to_upper_latin5_turkish_ci, /* upper */
sort_order_latin5_turkish_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin5_turkish_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7276,11 +7256,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_armscii8_general_ci, /* lower */
to_upper_armscii8_general_ci, /* upper */
sort_order_armscii8_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_armscii8_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7309,11 +7288,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp866_general_ci, /* lower */
to_upper_cp866_general_ci, /* upper */
sort_order_cp866_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp866_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7342,11 +7320,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_keybcs2_general_ci, /* lower */
to_upper_keybcs2_general_ci, /* upper */
sort_order_keybcs2_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_keybcs2_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7375,11 +7352,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macce_general_ci, /* lower */
to_upper_macce_general_ci, /* upper */
sort_order_macce_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_macce_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7408,11 +7384,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macroman_general_ci, /* lower */
to_upper_macroman_general_ci, /* upper */
sort_order_macroman_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_macroman_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7441,11 +7416,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp852_general_ci, /* lower */
to_upper_cp852_general_ci, /* upper */
sort_order_cp852_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp852_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7474,11 +7448,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_general_ci, /* lower */
to_upper_latin7_general_ci, /* upper */
sort_order_latin7_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin7_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7507,11 +7480,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_general_cs, /* lower */
to_upper_latin7_general_cs, /* upper */
sort_order_latin7_general_cs, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin7_general_cs, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7540,11 +7512,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macce_bin, /* lower */
to_upper_macce_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_macce_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7573,11 +7544,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_croatian_ci, /* lower */
to_upper_cp1250_croatian_ci, /* upper */
sort_order_cp1250_croatian_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1250_croatian_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7606,11 +7576,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_general_ci, /* lower */
to_upper_latin1_general_ci, /* upper */
sort_order_latin1_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin1_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7639,11 +7608,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_general_cs, /* lower */
to_upper_latin1_general_cs, /* upper */
sort_order_latin1_general_cs, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin1_general_cs, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7672,11 +7640,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_bin, /* lower */
to_upper_cp1251_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1251_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7705,11 +7672,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_general_ci, /* lower */
to_upper_cp1251_general_ci, /* upper */
sort_order_cp1251_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1251_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7738,11 +7704,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1251_general_cs, /* lower */
to_upper_cp1251_general_cs, /* upper */
sort_order_cp1251_general_cs, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1251_general_cs, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7771,11 +7736,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_macroman_bin, /* lower */
to_upper_macroman_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_macroman_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7804,11 +7768,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1256_general_ci, /* lower */
to_upper_cp1256_general_ci, /* upper */
sort_order_cp1256_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1256_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7837,11 +7800,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1257_bin, /* lower */
to_upper_cp1257_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1257_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7870,11 +7832,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1257_general_ci, /* lower */
to_upper_cp1257_general_ci, /* upper */
sort_order_cp1257_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1257_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7903,11 +7864,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_armscii8_bin, /* lower */
to_upper_armscii8_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_armscii8_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7936,11 +7896,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_ascii_bin, /* lower */
to_upper_ascii_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_ascii_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -7969,11 +7928,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_bin, /* lower */
to_upper_cp1250_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1250_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8002,11 +7960,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1256_bin, /* lower */
to_upper_cp1256_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1256_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8035,11 +7992,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp866_bin, /* lower */
to_upper_cp866_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp866_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8068,11 +8024,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_dec8_bin, /* lower */
to_upper_dec8_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_dec8_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8101,11 +8056,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_greek_bin, /* lower */
to_upper_greek_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_greek_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8134,11 +8088,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hebrew_bin, /* lower */
to_upper_hebrew_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_hebrew_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8167,11 +8120,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_hp8_bin, /* lower */
to_upper_hp8_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_hp8_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8200,11 +8152,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_keybcs2_bin, /* lower */
to_upper_keybcs2_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_keybcs2_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8233,11 +8184,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8r_bin, /* lower */
to_upper_koi8r_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_koi8r_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8266,11 +8216,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_koi8u_bin, /* lower */
to_upper_koi8u_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_koi8u_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8299,11 +8248,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin2_bin, /* lower */
to_upper_latin2_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin2_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8332,11 +8280,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin5_bin, /* lower */
to_upper_latin5_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin5_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8365,11 +8312,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin7_bin, /* lower */
to_upper_latin7_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin7_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8398,11 +8344,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp850_bin, /* lower */
to_upper_cp850_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp850_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8431,11 +8376,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp852_bin, /* lower */
to_upper_cp852_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp852_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8464,11 +8408,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_swe7_bin, /* lower */
to_upper_swe7_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_swe7_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8497,11 +8440,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_geostd8_general_ci, /* lower */
to_upper_geostd8_general_ci, /* upper */
sort_order_geostd8_general_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_geostd8_general_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8530,11 +8472,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_geostd8_bin, /* lower */
to_upper_geostd8_bin, /* upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_geostd8_bin, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8563,11 +8504,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_latin1_spanish_ci, /* lower */
to_upper_latin1_spanish_ci, /* upper */
sort_order_latin1_spanish_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_latin1_spanish_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8596,11 +8536,10 @@ struct charset_info_st compiled_charsets[] = {
to_lower_cp1250_polish_ci, /* lower */
to_upper_cp1250_polish_ci, /* upper */
sort_order_cp1250_polish_ci, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
to_uni_cp1250_polish_ci, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
@@ -8628,11 +8567,10 @@ struct charset_info_st compiled_charsets[] = {
NULL, /* lower */
NULL, /* upper */
NULL, /* sort order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* to_uni */
NULL, /* from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state map */
NULL, /* ident map */
1, /* strxfrm_multiply*/
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index b27ea67059d..74be52a5c6d 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -177,7 +177,7 @@ static uint mbcharlen_gb2312(CHARSET_INFO *cs __attribute__((unused)),uint c)
}
-static MY_UNICASE_INFO cA2[256]=
+static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -298,7 +298,7 @@ static MY_UNICASE_INFO cA2[256]=
};
-static MY_UNICASE_INFO cA3[256]=
+static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -419,7 +419,7 @@ static MY_UNICASE_INFO cA3[256]=
};
-static MY_UNICASE_INFO cA6[256]=
+static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -540,7 +540,7 @@ static MY_UNICASE_INFO cA6[256]=
};
-static MY_UNICASE_INFO cA7[256]=
+static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -661,7 +661,7 @@ static MY_UNICASE_INFO cA7[256]=
};
-static MY_UNICASE_INFO cA8[256]=
+static MY_UNICASE_CHARACTER cA8[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -782,7 +782,7 @@ static MY_UNICASE_INFO cA8[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_gb2312[256]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_gb2312[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -819,6 +819,13 @@ static MY_UNICASE_INFO *my_caseinfo_gb2312[256]=
};
+static MY_UNICASE_INFO my_caseinfo_gb2312=
+{
+ 0xFFFF,
+ my_caseinfo_pages_gb2312
+};
+
+
/* page 0 0x2121-0x2658 */
static const uint16 tab_gb2312_uni0[]={
0x3000,0x3001,0x3002,0x30FB,0x02C9,0x02C7,0x00A8,0x3003,
@@ -6419,11 +6426,10 @@ struct charset_info_st my_charset_gb2312_chinese_ci=
to_lower_gb2312,
to_upper_gb2312,
sort_order_gb2312,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_gb2312, /* caseinfo */
+ &my_caseinfo_gb2312,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -6451,11 +6457,10 @@ struct charset_info_st my_charset_gb2312_bin=
to_lower_gb2312,
to_upper_gb2312,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_gb2312, /* caseinfo */
+ &my_caseinfo_gb2312,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index e21c406d2a9..dd617fd8548 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -136,7 +136,8 @@ static const uchar to_upper_gbk[]=
(uchar) '\370',(uchar) '\371',(uchar) '\372',(uchar) '\373',(uchar) '\374',(uchar) '\375',(uchar) '\376',(uchar) '\377',
};
-static MY_UNICASE_INFO cA2[256]=
+
+static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -340,7 +341,7 @@ static MY_UNICASE_INFO cA2[256]=
{0xA2FF,0xA2FF,0xA2FF}
};
-static MY_UNICASE_INFO cA3[256]=
+static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -545,7 +546,7 @@ static MY_UNICASE_INFO cA3[256]=
};
-static MY_UNICASE_INFO cA6[256]=
+static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -750,7 +751,7 @@ static MY_UNICASE_INFO cA6[256]=
};
-static MY_UNICASE_INFO cA7[256]=
+static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -955,7 +956,7 @@ static MY_UNICASE_INFO cA7[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_gbk[256]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_gbk[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -991,7 +992,15 @@ static MY_UNICASE_INFO *my_caseinfo_gbk[256]=
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
-static const uchar sort_order_gbk[]=
+
+static MY_UNICASE_INFO my_caseinfo_gbk=
+{
+ 0xFFFF,
+ my_caseinfo_pages_gbk
+};
+
+
+static uchar sort_order_gbk[]=
{
'\000','\001','\002','\003','\004','\005','\006','\007',
'\010','\011','\012','\013','\014','\015','\016','\017',
@@ -10809,11 +10818,10 @@ struct charset_info_st my_charset_gbk_chinese_ci=
to_lower_gbk,
to_upper_gbk,
sort_order_gbk,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_gbk, /* caseinfo */
+ &my_caseinfo_gbk, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -10841,11 +10849,10 @@ struct charset_info_st my_charset_gbk_bin=
to_lower_gbk,
to_upper_gbk,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_gbk, /* caseinfo */
+ &my_caseinfo_gbk, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index f8455344498..759997dae2d 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -437,11 +437,10 @@ struct charset_info_st my_charset_latin1=
to_lower_latin1,
to_upper_latin1,
sort_order_latin1,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -736,11 +735,10 @@ struct charset_info_st my_charset_latin1_german2_ci=
to_lower_latin1,
to_upper_latin1,
sort_order_latin1_de,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
2, /* strxfrm_multiply */
@@ -769,11 +767,10 @@ struct charset_info_st my_charset_latin1_bin=
to_lower_latin1,
to_upper_latin1,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
cs_to_uni, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 8c7de1d16c7..c5c8fd92842 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -62,11 +62,11 @@ size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str)
}
-static inline MY_UNICASE_INFO*
-get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs)
+static inline MY_UNICASE_CHARACTER*
+get_case_info_for_ch(const CHARSET_INFO *cs, uint page, uint offs)
{
- MY_UNICASE_INFO *p;
- return cs->caseinfo ? ((p= cs->caseinfo[page]) ? &p[offs] : NULL) : NULL;
+ MY_UNICASE_CHARACTER *p;
+ return cs->caseinfo && (p= cs->caseinfo->page[page]) ? &p[offs] : NULL;
}
@@ -89,7 +89,7 @@ size_t my_caseup_mb(CHARSET_INFO * cs, char *src, size_t srclen,
{
if ((l=my_ismbchar(cs, src, srcend)))
{
- MY_UNICASE_INFO *ch;
+ MY_UNICASE_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{
*src++= ch->toupper >> 8;
@@ -124,7 +124,7 @@ size_t my_casedn_mb(CHARSET_INFO * cs, char *src, size_t srclen,
{
if ((l= my_ismbchar(cs, src, srcend)))
{
- MY_UNICASE_INFO *ch;
+ MY_UNICASE_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{
*src++= ch->tolower >> 8;
@@ -168,7 +168,7 @@ my_casefold_mb_varlen(CHARSET_INFO *cs,
size_t mblen= my_ismbchar(cs, src, srcend);
if (mblen)
{
- MY_UNICASE_INFO *ch;
+ MY_UNICASE_CHARACTER *ch;
if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
{
int code= is_upper ? ch->toupper : ch->tolower;
@@ -696,7 +696,7 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
size_t maxcharlen= res_length / cs->mbmaxlen;
- my_bool have_contractions= my_cs_have_contractions(cs);
+ const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
{
@@ -764,8 +764,8 @@ fill_max_and_min:
'ab\min\min\min\min' and 'ab\max\max\max\max'.
*/
- if (have_contractions && ptr + 1 < end &&
- my_cs_can_be_contraction_head(cs, (uchar) *ptr))
+ if (contractions && ptr + 1 < end &&
+ my_uca_can_be_contraction_head(contractions, (uchar) *ptr))
{
/* Ptr[0] is a contraction head. */
@@ -787,8 +787,8 @@ fill_max_and_min:
is not a contraction, then we put only ptr[0],
and continue with ptr[1] on the next loop.
*/
- if (my_cs_can_be_contraction_tail(cs, (uchar) ptr[1]) &&
- my_cs_contraction2_weight(cs, (uchar) ptr[0], (uchar) ptr[1]))
+ if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) &&
+ my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1]))
{
/* Contraction found */
if (maxcharlen == 1 || min_str + 1 >= min_end)
@@ -853,7 +853,7 @@ my_like_range_generic(CHARSET_INFO *cs,
char *max_end= max_str + res_length;
size_t charlen= res_length / cs->mbmaxlen;
size_t res_length_diff;
- my_bool have_contractions= my_cs_have_contractions(cs);
+ const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
for ( ; charlen > 0; charlen--)
{
@@ -921,8 +921,8 @@ my_like_range_generic(CHARSET_INFO *cs,
goto pad_min_max;
}
- if (have_contractions &&
- my_cs_can_be_contraction_head(cs, wc) &&
+ if (contractions &&
+ my_uca_can_be_contraction_head(contractions, wc) &&
(res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
{
const uint16 *weight;
@@ -933,8 +933,8 @@ my_like_range_generic(CHARSET_INFO *cs,
goto pad_min_max;
}
- if (my_cs_can_be_contraction_tail(cs, wc2) &&
- (weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
+ if (my_uca_can_be_contraction_tail(contractions, wc2) &&
+ (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0])
{
/* Contraction found */
if (charlen == 1)
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index f084ff9949a..91a9df9d50b 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1163,12 +1163,12 @@ static int pcmp(const void * f, const void * s)
return res;
}
-static my_bool create_fromuni(struct charset_info_st *cs,
- void *(*alloc)(size_t))
+static my_bool
+create_fromuni(struct charset_info_st *cs,
+ MY_CHARSET_LOADER *loader)
{
uni_idx idx[PLANE_NUM];
int i,n;
- struct my_uni_idx_st *tab_from_uni;
/*
Check that Unicode map is loaded.
@@ -1217,7 +1217,8 @@ static my_bool create_fromuni(struct charset_info_st *cs,
numchars=idx[i].uidx.to-idx[i].uidx.from+1;
if (!(idx[i].uidx.tab= tab= (uchar*)
- alloc(numchars * sizeof(*idx[i].uidx.tab))))
+ (loader->once_alloc) (numchars *
+ sizeof(*idx[i].uidx.tab))))
return TRUE;
bzero(tab,numchars*sizeof(*tab));
@@ -1235,25 +1236,25 @@ static my_bool create_fromuni(struct charset_info_st *cs,
/* Allocate and fill reverse table for each plane */
n=i;
- if (!(cs->tab_from_uni= tab_from_uni= (struct my_uni_idx_st*)
- alloc(sizeof(MY_UNI_IDX)*(n+1))))
+ if (!(cs->tab_from_uni= (MY_UNI_IDX *)
+ (loader->once_alloc)(sizeof(MY_UNI_IDX) * (n + 1))))
return TRUE;
for (i=0; i< n; i++)
- tab_from_uni[i]= idx[i].uidx;
+ ((struct my_uni_idx_st*)cs->tab_from_uni)[i]= idx[i].uidx;
/* Set end-of-list marker */
- bzero(&tab_from_uni[i],sizeof(MY_UNI_IDX));
+ bzero((char*) &cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
return FALSE;
}
-static my_bool my_cset_init_8bit(struct charset_info_st *cs,
- void *(*alloc)(size_t))
+static my_bool
+my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
cs->caseup_multiply= 1;
cs->casedn_multiply= 1;
cs->pad_char= ' ';
- return create_fromuni(cs, alloc);
+ return create_fromuni(cs, loader);
}
static void set_max_sort_char(struct charset_info_st *cs)
@@ -1276,7 +1277,7 @@ static void set_max_sort_char(struct charset_info_st *cs)
}
static my_bool my_coll_init_simple(struct charset_info_st *cs,
- void *(*alloc)(size_t) __attribute__((unused)))
+ MY_CHARSET_LOADER *loader __attribute__((unused)))
{
set_max_sort_char(cs);
return FALSE;
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 50756799a56..2c3d2b34dab 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -197,7 +197,7 @@ static uint mbcharlen_sjis(CHARSET_INFO *cs __attribute__((unused)),uint c)
#define sjiscode(c,d) ((((uint) (uchar)(c)) << 8) | (uint) (uchar) (d))
-static MY_UNICASE_INFO c81[256]=
+static MY_UNICASE_CHARACTER c81[256]=
{
/* 8100-810F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -407,7 +407,7 @@ static MY_UNICASE_INFO c81[256]=
};
-static MY_UNICASE_INFO c82[256]=
+static MY_UNICASE_CHARACTER c82[256]=
{
/* 8200-820F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -615,7 +615,7 @@ static MY_UNICASE_INFO c82[256]=
};
-static MY_UNICASE_INFO c83[256]=
+static MY_UNICASE_CHARACTER c83[256]=
{
/* 8300-830F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -825,7 +825,7 @@ static MY_UNICASE_INFO c83[256]=
};
-static MY_UNICASE_INFO c84[256]=
+static MY_UNICASE_CHARACTER c84[256]=
{
/* 8400-840F */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -1035,7 +1035,7 @@ static MY_UNICASE_INFO c84[256]=
};
-static MY_UNICASE_INFO *my_caseinfo_sjis[256]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_sjis[256]=
{
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1072,7 +1072,14 @@ static MY_UNICASE_INFO *my_caseinfo_sjis[256]=
};
-static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
+static MY_UNICASE_INFO my_caseinfo_sjis=
+{
+ 0xFFFF,
+ my_caseinfo_pages_sjis
+};
+
+
+static int my_strnncoll_sjis_internal(const CHARSET_INFO *cs,
const uchar **a_res, size_t a_length,
const uchar **b_res, size_t b_length)
{
@@ -34204,11 +34211,10 @@ struct charset_info_st my_charset_sjis_japanese_ci=
to_lower_sjis,
to_upper_sjis,
sort_order_sjis,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_sjis, /* caseinfo */
+ &my_caseinfo_sjis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -34236,11 +34242,10 @@ struct charset_info_st my_charset_sjis_bin=
to_lower_sjis,
to_upper_sjis,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_sjis, /* caseinfo */
+ &my_caseinfo_sjis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index d84d43a67bd..d0b4f9b8862 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -894,11 +894,10 @@ struct charset_info_st my_charset_tis620_thai_ci=
to_lower_tis620,
to_upper_tis620,
sort_order_tis620,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
4, /* strxfrm_multiply */
@@ -926,11 +925,10 @@ struct charset_info_st my_charset_tis620_bin=
to_lower_tis620,
to_upper_tis620,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 109a233d62e..5d52cb7e517 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -46,7 +46,6 @@
#define MY_UCA_NCHARS 256
#define MY_UCA_CMASK 255
#define MY_UCA_PSHIFT 8
-#define MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT 0xFFFF
static const uint16 page000data[]= { /* 0000 (4 weights per char) */
0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,
@@ -6526,6 +6525,59 @@ NULL ,page0F9data,page0FAdata,page0FBdata,
page0FCdata,page0FDdata,page0FEdata,page0FFdata
};
+
+MY_UCA_INFO my_uca_v400=
+{
+ {
+ {
+ 0xFFFF, /* maxchar */
+ (uchar *) uca_length,
+ (uint16 **) uca_weight,
+ { /* Contractions: */
+ 0, /* nitems */
+ NULL, /* item */
+ NULL /* flags */
+ }
+ },
+ },
+
+ /* Logical positions */
+ 0x0009, /* first_non_ignorable p != ignore */
+ 0xA48C, /* last_non_ignorable Not a CJK and not UNASSIGNED */
+
+ 0x0332, /* first_primary_ignorable p == 0 */
+ 0x20EA, /* last_primary_ignorable */
+
+ 0x0000, /* first_secondary_ignorable p,s == 0 */
+ 0xFE73, /* last_secondary_ignorable p,s == 0 */
+
+ 0x0000, /* first_tertiary_ignorable p,s,t == 0 */
+ 0xFE73, /* last_tertiary_ignorable p,s,t == 0 */
+
+ 0x0000, /* first_trailing */
+ 0x0000, /* last_trailing */
+
+ 0x0009, /* first_variable */
+ 0x2183, /* last_variable */
+};
+
+/******************************************************/
+
+#define MY_UCA_CMASK 255
+#define MY_UCA_PSHIFT 8
+
+
+/******************************************************/
+
+/*
+ German Phonebook
+*/
+static const char german2[]=
+ "&AE << \\u00E6 <<< \\u00C6 << \\u00E4 <<< \\u00C4 "
+ "&OE << \\u0153 <<< \\u0152 << \\u00F6 <<< \\u00D6 "
+ "&UE << \\u00FC <<< \\u00DC ";
+
+
/*
Some sources treat LETTER A WITH DIARESIS (00E4,00C4)
secondary greater than LETTER AE (00E6,00C6).
@@ -6686,7 +6738,13 @@ static const char persian[]=
"& \\u0642 < \\u06A9 < \\u0643"
"& \\u0648 < \\u0647 < \\u0629 < \\u06C0 < \\u06CC < \\u0649 < \\u064A"
"& \\uFE80 < \\uFE81 < \\uFE82 < \\uFE8D < \\uFE8E < \\uFB50 < \\uFB51"
- " < \\uFE80 < \\uFE83 < \\uFE84 < \\uFE87 < \\uFE88 < \\uFE85"
+ " < \\uFE80 "
+ /*
+ FE80 appears both in reset and shift.
+ We need to break the rule here and reset to *new* FE80 again,
+ so weight for FE83 is calculated as P[FE80]+1, not as P[FE80]+8.
+ */
+ " & \\uFE80 < \\uFE83 < \\uFE84 < \\uFE87 < \\uFE88 < \\uFE85"
" < \\uFE86 < \\u0689 < \\u068A"
"& \\uFEAE < \\uFDFC"
"& \\uFED8 < \\uFB8E < \\uFB8F < \\uFB90 < \\uFB91 < \\uFED9 < \\uFEDA"
@@ -6747,7 +6805,6 @@ static const char sinhala[]=
static const char croatian[]=
-
"&C < \\u010D <<< \\u010C < \\u0107 <<< \\u0106 "
"&D < d\\u017E <<< \\u01C6 <<< D\\u017E <<< \\u01C5 <<< D\\u017D <<< \\u01C4 "
" < \\u0111 <<< \\u0110 "
@@ -6755,7 +6812,6 @@ static const char croatian[]=
"&N < nj <<< \\u01CC <<< Nj <<< \\u01CB <<< NJ <<< \\u01CA "
"&S < \\u0161 <<< \\u0160 "
"&Z < \\u017E <<< \\u017D";
-
/*
Unicode Collation Algorithm:
Collation element (weight) scanner,
@@ -6767,9 +6823,7 @@ typedef struct my_uca_scanner_st
const uint16 *wbeg; /* Beginning of the current weight string */
const uchar *sbeg; /* Beginning of the input string */
const uchar *send; /* End of the input string */
- const uchar *uca_length;
- const uint16 * const *uca_weight;
- const MY_CONTRACTIONS *contractions;
+ const MY_UCA_WEIGHT_LEVEL *level;
uint16 implicit[2];
int page;
int code;
@@ -6782,51 +6836,81 @@ typedef struct my_uca_scanner_st
*/
typedef struct my_uca_scanner_handler_st
{
- void (*init)(my_uca_scanner *scanner, CHARSET_INFO *cs,
+ void (*init)(my_uca_scanner *scanner, CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
const uchar *str, size_t length);
int (*next)(my_uca_scanner *scanner);
} my_uca_scanner_handler;
static const uint16 nochar[]= {0,0};
+
+#define MY_UCA_CNT_FLAG_SIZE 4096
+#define MY_UCA_CNT_FLAG_MASK 4095
+
+#define MY_UCA_CNT_HEAD 1
+#define MY_UCA_CNT_TAIL 2
+#define MY_UCA_CNT_MID1 4
+#define MY_UCA_CNT_MID2 8
+#define MY_UCA_CNT_MID3 16
+#define MY_UCA_CNT_MID4 32
+
+#define MY_UCA_PREVIOUS_CONTEXT_HEAD 64
+#define MY_UCA_PREVIOUS_CONTEXT_TAIL 128
+
/********** Helper functions to handle contraction ************/
/**
Mark a character as a contraction part
- @cs Pointer to CHARSET_INFO data
- @wc Unicode code point
- @flag flag: "is contraction head", "is contraction tail"
+ @param uca Pointer to UCA data
+ @param wc Unicode code point
+ @param flag flag: "is contraction head", "is contraction tail"
*/
-static void
-my_uca_add_contraction_flag(CHARSET_INFO *cs, my_wc_t wc, int flag)
+static inline void
+my_uca_add_contraction_flag(MY_CONTRACTIONS *list, my_wc_t wc, int flag)
{
- cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag;
+ list->flags[wc & MY_UCA_CNT_FLAG_MASK]|= flag;
}
/**
Add a new contraction into contraction list
- @cs Pointer to CHARSET_INFO data
- @wc Unicode code points of the characters
- @len Number of characters
+ @param uca Pointer to UCA data
+ @param wc Unicode code points of the characters
+ @param len Number of characters
@return New contraction
@retval Pointer to a newly added contraction
*/
static MY_CONTRACTION *
-my_uca_add_contraction(struct charset_info_st *cs,
- my_wc_t *wc, int len __attribute__((unused)))
+my_uca_add_contraction(MY_CONTRACTIONS *list, my_wc_t *wc, size_t len,
+ my_bool with_context)
{
- MY_CONTRACTIONS *list= (MY_CONTRACTIONS*) cs->contractions;
MY_CONTRACTION *next= &list->item[list->nitems];
- DBUG_ASSERT(len == 2); /* We currently support only contraction2 */
- next->ch[0]= wc[0];
- next->ch[1]= wc[1];
+ size_t i;
+ /*
+ Contraction is always at least 2 characters.
+ Contraction is never longer than MY_UCA_MAX_CONTRACTION,
+ which is guaranteed by using my_coll_rule_expand() with proper limit.
+ */
+ DBUG_ASSERT(len > 1 && len <= MY_UCA_MAX_CONTRACTION);
+ for (i= 0; i < len; i++)
+ {
+ /*
+ We don't support contractions with U+0000.
+ my_coll_rule_expand() guarantees there're no U+0000 in a contraction.
+ */
+ DBUG_ASSERT(wc[i] != 0);
+ next->ch[i]= wc[i];
+ }
+ if (i < MY_UCA_MAX_CONTRACTION)
+ next->ch[i]= 0; /* Add end-of-line marker */
+ next->with_context= with_context;
list->nitems++;
return next;
}
@@ -6835,9 +6919,9 @@ my_uca_add_contraction(struct charset_info_st *cs,
/**
Allocate and initialize memory for contraction list and flags
- @cs Pointer to CHARSET_INFO data
- @alloc Memory allocation function (typically points to my_alloc_once)
- @n Number of contractions
+ @param uca Pointer to UCA data
+ @param alloc Memory allocation function (typically points to my_alloc_once)
+ @param n Number of contractions
@return Error code
@retval 0 - memory allocated successfully
@@ -6845,171 +6929,318 @@ my_uca_add_contraction(struct charset_info_st *cs,
*/
static my_bool
-my_uca_alloc_contractions(struct charset_info_st *cs,
- void *(*alloc)(size_t), size_t n)
+my_uca_alloc_contractions(MY_CONTRACTIONS *contractions,
+ MY_CHARSET_LOADER *loader, size_t n)
{
uint size= n * sizeof(MY_CONTRACTION);
- MY_CONTRACTIONS *contractions;
-
- if (!(cs->contractions= contractions= (*alloc)(sizeof(MY_CONTRACTIONS))))
- return 1;
- bzero(contractions, sizeof(MY_CONTRACTIONS));
- if (!(contractions->item= (*alloc)(size)) ||
- !(contractions->flags= (char*) (*alloc)(MY_UCA_CNT_FLAG_SIZE)))
+ if (!(contractions->item= (loader->once_alloc)(size)) ||
+ !(contractions->flags= (char *) (loader->once_alloc)(MY_UCA_CNT_FLAG_SIZE)))
return 1;
- bzero(contractions->item, size);
- bzero(contractions->flags, MY_UCA_CNT_FLAG_SIZE);
+ memset(contractions->item, 0, size);
+ memset(contractions->flags, 0, MY_UCA_CNT_FLAG_SIZE);
return 0;
}
-#ifdef HAVE_CHARSET_ucs2
-/*
- Initialize collation weight scanner
+/**
+ Return UCA contraction data for a CHARSET_INFO structure.
- SYNOPSIS:
- my_uca_scanner_init()
- scanner Pointer to an initialized scanner structure
- cs Character set + collation information
- str Beginning of the string
- length Length of the string.
-
- NOTES:
- Optimized for UCS2
+ @param cs Pointer to CHARSET_INFO structure
+ @retval Pointer to contraction data
+ @retval NULL, if this collation does not have UCA contraction
+*/
- RETURN
- N/A
+const MY_CONTRACTIONS *
+my_charset_get_contractions(const CHARSET_INFO *cs, int level)
+{
+ return (cs->uca != NULL) && (cs->uca->level[level].contractions.nitems > 0) ?
+ &cs->uca->level[level].contractions : NULL;
+}
+
+
+/**
+ Check if UCA level data has contractions (static version)
+ Static quick version of my_uca_have_contractions(),
+ optimized for performance purposes, also marked as "inline".
+
+ @param level Pointer to UCA level data
+
+ @return Flags indicating if UCA with contractions
+ @retval 0 - no contractions
+ @retval 1 - there are some contractions
*/
-static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner,
- CHARSET_INFO *cs,
- const uchar *str, size_t length)
+static inline my_bool
+my_uca_have_contractions_quick(const MY_UCA_WEIGHT_LEVEL *level)
{
- scanner->wbeg= nochar;
- if (length)
+ return (level->contractions.nitems > 0);
+}
+
+
+
+/**
+ Check if a character can be contraction head
+
+ @param c Pointer to UCA contraction data
+ @param wc Code point
+
+ @retval 0 - cannot be contraction head
+ @retval 1 - can be contraction head
+*/
+
+my_bool
+my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc)
+{
+ return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD;
+}
+
+
+/**
+ Check if a character can be contraction tail
+
+ @param c Pointer to UCA contraction data
+ @param wc Code point
+
+ @retval 0 - cannot be contraction tail
+ @retval 1 - can be contraction tail
+*/
+
+my_bool
+my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc)
+{
+ return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL;
+}
+
+
+/**
+ Check if a character can be contraction part
+
+ @param c Pointer to UCA contraction data
+ @param wc Code point
+
+ @retval 0 - cannot be contraction part
+ @retval 1 - can be contraction part
+*/
+
+static inline my_bool
+my_uca_can_be_contraction_part(const MY_CONTRACTIONS *c, my_wc_t wc, int flag)
+{
+ return c->flags[wc & MY_UCA_CNT_FLAG_MASK] & flag;
+}
+
+
+/**
+ Find a contraction consisting of two characters and return its weight array
+
+ @param list Pointer to UCA contraction data
+ @param wc1 First character
+ @param wc2 Second character
+
+ @return Weight array
+ @retval NULL - no contraction found
+ @retval ptr - contraction weight array
+*/
+
+uint16 *
+my_uca_contraction2_weight(const MY_CONTRACTIONS *list, my_wc_t wc1, my_wc_t wc2)
+{
+ MY_CONTRACTION *c, *last;
+ for (c= list->item, last= c + list->nitems; c < last; c++)
{
- scanner->sbeg= str;
- scanner->send= str + length - 2;
- scanner->uca_length= cs->sort_order;
- scanner->uca_weight= cs->sort_order_big;
- scanner->contractions= cs->contractions;
- scanner->cs= cs;
- return;
+ if (c->ch[0] == wc1 && c->ch[1] == wc2 && c->ch[2] == 0)
+ {
+ return c->weight;
+ }
}
+ return NULL;
+}
- /*
- Sometimes this function is called with
- str=NULL and length=0, which should be
- considered as an empty string.
-
- The above initialization is unsafe for such cases,
- because scanner->send is initialized to (NULL-2), which is 0xFFFFFFFE.
- Then we fall into an endless loop in my_uca_scanner_next_ucs2().
-
- Do special initialization for the case when length=0.
- Initialize scanner->sbeg to an address greater than scanner->send.
- Next call of my_uca_scanner_next_ucs2() will correctly return with -1.
- */
- scanner->sbeg= (uchar*) &nochar[1];
- scanner->send= (uchar*) &nochar[0];
+
+/**
+ Check if a character can be previous context head
+
+ @param list Pointer to UCA contraction data
+ @param wc Code point
+
+ @return
+ @retval FALSE - cannot be previous context head
+ @retval TRUE - can be previous context head
+*/
+
+static inline my_bool
+my_uca_can_be_previous_context_head(const MY_CONTRACTIONS *list, my_wc_t wc)
+{
+ return list->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_HEAD;
}
-/*
- Read next collation element (weight), i.e. converts
- a stream of characters into a stream of their weights.
-
- SYNOPSIS:
- my_uca_scanner_next()
- scanner Address of a previously initialized scanner strucuture
-
- NOTES:
- Optimized for UCS2
-
- Checks if the current character's weight string has been fully scanned,
- if no, then returns the next weight for this character,
- else scans the next character and returns its first weight.
+/**
+ Check if a character can be previois context tail
- Each character can have number weights from 0 to 8.
-
- Some characters do not have weights at all, 0 weights.
- It means they are ignored during comparison.
-
- Examples:
- 1. 0x0001 START OF HEADING, has no weights, ignored, does
- not produce any weights.
- 2. 0x0061 LATIN SMALL LETTER A, has one weight.
- 0x0E33 will be returned
- 3. 0x00DF LATIN SMALL LETTER SHARP S, aka SZ ligature,
- has two weights. It will return 0x0FEA twice for two
- consequent calls.
- 4. 0x247D PATENTHESIZED NUMBER TEN, has four weights,
- this function will return these numbers in four
- consequent calls: 0x0288, 0x0E2A, 0x0E29, 0x0289
- 5. A string consisting of the above characters:
- 0x0001 0x0061 0x00DF 0x247D
- will return the following weights, one weight per call:
- 0x0E33 0x0FEA 0x0FEA 0x0288, 0x0E2A, 0x0E29, 0x0289
-
- RETURN
- Next weight, a number between 0x0000 and 0xFFFF
- Or -1 on error (END-OF-STRING or ILLEGAL MULTIBYTE SEQUENCE)
+ @param uca Pointer to UCA contraction data
+ @param wc Code point
+
+ @return
+ @retval FALSE - cannot be contraction tail
+ @retval TRUE - can be contraction tail
*/
-static int my_uca_scanner_next_ucs2(my_uca_scanner *scanner)
+static inline my_bool
+my_uca_can_be_previous_context_tail(const MY_CONTRACTIONS *list, my_wc_t wc)
{
-
- /*
- Check if the weights for the previous character have been
- already fully scanned. If yes, then get the next character and
- initialize wbeg and wlength to its weight string.
- */
-
- if (scanner->wbeg[0])
- return *scanner->wbeg++;
-
- do
+ return list->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_PREVIOUS_CONTEXT_TAIL;
+}
+
+
+/**
+ Compare two wide character strings, wide analog to strncmp().
+
+ @param a Pointer to the first string
+ @param b Pointer to the second string
+ @param len Length of the strings
+
+ @return
+ @retval 0 - strings are equal
+ @retval non-zero - strings are different
+*/
+
+static int
+my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
+{
+ return memcmp(a, b, len * sizeof(my_wc_t));
+}
+
+
+/**
+ Check if a string is a contraction,
+ and return its weight array on success.
+
+ @param list Pointer to UCA contraction data
+ @param wc Pointer to wide string
+ @param len String length
+
+ @return Weight array
+ @retval NULL - Input string is not a known contraction
+ @retval ptr - contraction weight array
+*/
+
+static inline uint16 *
+my_uca_contraction_weight(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
+{
+ MY_CONTRACTION *c, *last;
+ for (c= list->item, last= c + list->nitems; c < last; c++)
{
- const uint16 *const *ucaw= scanner->uca_weight;
- const uchar *ucal= scanner->uca_length;
-
- if (scanner->sbeg > scanner->send)
- return -1;
-
- scanner->page= (uchar)scanner->sbeg[0];
- scanner->code= (uchar)scanner->sbeg[1];
- scanner->sbeg+= 2;
-
- if (scanner->contractions && (scanner->sbeg <= scanner->send))
+ if ((len == MY_UCA_MAX_CONTRACTION || c->ch[len] == 0) &&
+ !c->with_context &&
+ !my_wmemcmp(c->ch, wc, len))
+ return c->weight;
+ }
+ return NULL;
+}
+
+
+/**
+ Find a contraction in the input stream and return its weight array
+
+ Scan input characters while their flags tell that they can be
+ a contraction part. Then try to find real contraction among the
+ candidates, starting from the longest.
+
+ @param scanner Pointer to UCA scanner
+ @param[OUT] *wc Where to store the scanned string
+
+ @return Weight array
+ @retval NULL - no contraction found
+ @retval ptr - contraction weight array
+*/
+
+static uint16 *
+my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
+{
+ size_t clen= 1;
+ int flag;
+ const uchar *s, *beg[MY_UCA_MAX_CONTRACTION];
+ memset(beg, 0, sizeof(beg));
+
+ /* Scan all contraction candidates */
+ for (s= scanner->sbeg, flag= MY_UCA_CNT_MID1;
+ clen < MY_UCA_MAX_CONTRACTION;
+ flag<<= 1)
+ {
+ int mblen;
+ if ((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc[clen],
+ s, scanner->send)) <= 0)
+ break;
+ beg[clen]= s= s + mblen;
+ if (!my_uca_can_be_contraction_part(&scanner->level->contractions,
+ wc[clen++], flag))
+ break;
+ }
+
+ /* Find among candidates the longest real contraction */
+ for ( ; clen > 1; clen--)
+ {
+ uint16 *cweight;
+ if (my_uca_can_be_contraction_tail(&scanner->level->contractions,
+ wc[clen - 1]) &&
+ (cweight= my_uca_contraction_weight(&scanner->level->contractions,
+ wc, clen)))
{
- my_wc_t wc1= ((scanner->page << 8) | scanner->code);
-
- if (my_cs_can_be_contraction_head(scanner->cs, wc1))
- {
- const uint16 *cweight;
- my_wc_t wc2= (((my_wc_t) scanner->sbeg[0]) << 8) | scanner->sbeg[1];
- if (my_cs_can_be_contraction_tail(scanner->cs, wc2) &&
- (cweight= my_cs_contraction2_weight(scanner->cs,
- scanner->code,
- scanner->sbeg[1])))
- {
- scanner->implicit[0]= 0;
- scanner->wbeg= scanner->implicit;
- scanner->sbeg+=2;
- return *cweight;
- }
- }
+ scanner->wbeg= cweight + 1;
+ scanner->sbeg= beg[clen - 1];
+ return cweight;
}
-
- if (!ucaw[scanner->page])
- goto implicit;
- scanner->wbeg= ucaw[scanner->page] + scanner->code * ucal[scanner->page];
- } while (!scanner->wbeg[0]);
-
- return *scanner->wbeg++;
+ }
+
+ return NULL; /* No contractions were found */
+}
+
+
+/**
+ Find weight for contraction with previous context
+ and return its weight array.
+
+ @param scanner Pointer to UCA scanner
+ @param wc0 Previous character
+ @param wc1 Current character
+
+ @return Weight array
+ @retval NULL - no contraction with context found
+ @retval ptr - contraction weight array
+*/
+
+static uint16 *
+my_uca_previous_context_find(my_uca_scanner *scanner,
+ my_wc_t wc0, my_wc_t wc1)
+{
+ const MY_CONTRACTIONS *list= &scanner->level->contractions;
+ MY_CONTRACTION *c, *last;
+ for (c= list->item, last= c + list->nitems; c < last; c++)
+ {
+ if (c->with_context && wc0 == c->ch[0] && wc1 == c->ch[1])
+ {
+ scanner->wbeg= c->weight + 1;
+ return c->weight;
+ }
+ }
+ return NULL;
+}
+
+/****************************************************************/
+
+
+/**
+ Return implicit UCA weight
+ Used for characters that do not have assigned UCA weights.
-implicit:
+ @param scanner UCA weight scanner
+ @return The leading implicit weight.
+*/
+
+static inline int
+my_uca_scanner_next_implicit(my_uca_scanner *scanner)
+{
scanner->code= (scanner->page << 8) + scanner->code;
scanner->implicit[0]= (scanner->code & 0x7FFF) | 0x8000;
scanner->implicit[1]= 0;
@@ -7027,112 +7258,101 @@ implicit:
return scanner->page;
}
-static my_uca_scanner_handler my_ucs2_uca_scanner_handler=
-{
- my_uca_scanner_init_ucs2,
- my_uca_scanner_next_ucs2
-};
-
-#endif /* HAVE_CHARSET_ucs2 */
-
/*
The same two functions for any character set
*/
-static void my_uca_scanner_init_any(my_uca_scanner *scanner,
- CHARSET_INFO *cs __attribute__((unused)),
- const uchar *str, size_t length)
+static void
+my_uca_scanner_init_any(my_uca_scanner *scanner,
+ CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ const uchar *str, size_t length)
{
/* Note, no needs to initialize scanner->wbeg */
scanner->sbeg= str;
scanner->send= str + length;
scanner->wbeg= nochar;
- scanner->uca_length= cs->sort_order;
- scanner->uca_weight= cs->sort_order_big;
- scanner->contractions= cs->contractions;
+ scanner->level= level;
scanner->cs= cs;
}
static int my_uca_scanner_next_any(my_uca_scanner *scanner)
{
-
/*
Check if the weights for the previous character have been
already fully scanned. If yes, then get the next character and
initialize wbeg and wlength to its weight string.
*/
-
- if (scanner->wbeg[0])
- return *scanner->wbeg++;
-
- do
+
+ if (scanner->wbeg[0]) /* More weights left from the previous step: */
+ return *scanner->wbeg++; /* return the next weight from expansion */
+
+ do
{
- const uint16 *const *ucaw= scanner->uca_weight;
- const uchar *ucal= scanner->uca_length;
- my_wc_t wc;
- int mb_len;
-
- if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc,
+ const uint16 *wpage;
+ my_wc_t wc[MY_UCA_MAX_CONTRACTION];
+ int mblen;
+
+ /* Get next character */
+ if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, wc,
scanner->sbeg,
scanner->send)) <= 0))
return -1;
-
- scanner->sbeg+= mb_len;
- if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
+
+ scanner->sbeg+= mblen;
+ if (wc[0] > scanner->level->maxchar)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
return 0xFFFD;
}
- else
- {
- scanner->page= wc >> 8;
- scanner->code= wc & 0xFF;
- }
-
- if (my_cs_have_contractions(scanner->cs) &&
- my_cs_can_be_contraction_head(scanner->cs, wc))
+
+ if (my_uca_have_contractions_quick(scanner->level))
{
- my_wc_t wc2;
- const uint16 *cweight;
-
- if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc2,
- scanner->sbeg,
- scanner->send)) >=0) &&
- my_cs_can_be_contraction_tail(scanner->cs, wc2) &&
- (cweight= my_cs_contraction2_weight(scanner->cs, wc, wc2)))
+ uint16 *cweight;
+ /*
+ If we have scanned a character which can have previous context,
+ and there were some more characters already before,
+ then reconstruct codepoint of the previous character
+ from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
+ together form a real previous context pair.
+ Note, we support only 2-character long sequences with previous
+ context at the moment. CLDR does not have longer sequences.
+ */
+ if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
+ wc[0]) &&
+ scanner->wbeg != nochar && /* if not the very first character */
+ my_uca_can_be_previous_context_head(&scanner->level->contractions,
+ (wc[1]= ((scanner->page << 8) +
+ scanner->code))) &&
+ (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
{
- scanner->implicit[0]= 0;
- scanner->wbeg= scanner->implicit;
- scanner->sbeg+= mb_len;
+ scanner->page= scanner->code= 0; /* Clear for the next character */
return *cweight;
}
+ else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
+ wc[0]))
+ {
+ /* Check if w[0] starts a contraction */
+ if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
+ return *cweight;
+ }
}
-
- if (!ucaw[scanner->page])
- goto implicit;
- scanner->wbeg= ucaw[scanner->page] + scanner->code * ucal[scanner->page];
- } while (!scanner->wbeg[0]);
-
+
+ /* Process single character */
+ scanner->page= wc[0] >> 8;
+ scanner->code= wc[0] & 0xFF;
+
+ /* If weight page for w[0] does not exist, then calculate algoritmically */
+ if (!(wpage= scanner->level->weights[scanner->page]))
+ return my_uca_scanner_next_implicit(scanner);
+
+ /* Calculate pointer to w[0]'s weight, using page and offset */
+ scanner->wbeg= wpage +
+ scanner->code * scanner->level->lengths[scanner->page];
+ } while (!scanner->wbeg[0]); /* Skip ignorable characters */
+
return *scanner->wbeg++;
-
-implicit:
-
- scanner->code= (scanner->page << 8) + scanner->code;
- scanner->implicit[0]= (scanner->code & 0x7FFF) | 0x8000;
- scanner->implicit[1]= 0;
- scanner->wbeg= scanner->implicit;
-
- scanner->page= scanner->page >> 7;
-
- if (scanner->code >= 0x3400 && scanner->code <= 0x4DB5)
- scanner->page+= 0xFB80;
- else if (scanner->code >= 0x4E00 && scanner->code <= 0x9FA5)
- scanner->page+= 0xFB40;
- else
- scanner->page+= 0xFBC0;
-
- return scanner->page;
}
@@ -7142,7 +7362,6 @@ static my_uca_scanner_handler my_any_uca_scanner_handler=
my_uca_scanner_next_any
};
-
/*
Compares two strings according to the collation
@@ -7195,8 +7414,8 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
int s_res;
int t_res;
- scanner_handler->init(&sscanner, cs, s, slen);
- scanner_handler->init(&tscanner, cs, t, tlen);
+ scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen);
+ scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen);
do
{
@@ -7207,6 +7426,38 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
}
+
+static inline int
+my_space_weight(const CHARSET_INFO *cs) /* W3-TODO */
+{
+ return cs->uca->level[0].weights[0][0x20 * cs->uca->level[0].lengths[0]];
+}
+
+
+/**
+ Helper function:
+ Find address of weights of the given character.
+
+ @param weights UCA weight array
+ @param lengths UCA length array
+ @param ch character Unicode code point
+
+ @return Weight array
+ @retval pointer to weight array for the given character,
+ or NULL if this page does not have implicit weights.
+*/
+
+static inline uint16 *
+my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc)
+{
+ uint page, ofst;
+ return wc > level->maxchar ? NULL :
+ (level->weights[page= (wc >> 8)] ?
+ level->weights[page] + (ofst= (wc & 0xFF)) * level->lengths[page] :
+ NULL);
+}
+
+
/*
Compares two strings according to the collation,
ignoring trailing spaces.
@@ -7268,8 +7519,8 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
diff_if_only_endspace_difference= 0;
#endif
- scanner_handler->init(&sscanner, cs, s, slen);
- scanner_handler->init(&tscanner, cs, t, tlen);
+ scanner_handler->init(&sscanner, cs, &cs->uca->level[0], s, slen);
+ scanner_handler->init(&tscanner, cs, &cs->uca->level[0], t, tlen);
do
{
@@ -7280,7 +7531,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
if (s_res > 0 && t_res < 0)
{
/* Calculate weight for SPACE character */
- t_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]];
+ t_res= my_space_weight(cs);
/* compare the first string to spaces */
do
@@ -7295,7 +7546,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
if (s_res < 0 && t_res > 0)
{
/* Calculate weight for SPACE character */
- s_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]];
+ s_res= my_space_weight(cs);
/* compare the second string to spaces */
do
@@ -7342,7 +7593,7 @@ static void my_hash_sort_uca(CHARSET_INFO *cs,
my_uca_scanner scanner;
slen= cs->cset->lengthsp(cs, (char*) s, slen);
- scanner_handler->init(&scanner, cs, s, slen);
+ scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
while ((s_res= scanner_handler->next(&scanner)) >0)
{
@@ -7393,7 +7644,7 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
uchar *de= dst + (dstlen & (size_t) ~1); /* add even length for easier code */
int s_res;
my_uca_scanner scanner;
- scanner_handler->init(&scanner, cs, src, srclen);
+ scanner_handler->init(&scanner, cs, &cs->uca->level[0], src, srclen);
while (dst < de && (s_res= scanner_handler->next(&scanner)) >0)
{
@@ -7401,7 +7652,7 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
dst[1]= s_res & 0xFF;
dst+= 2;
}
- s_res= cs->sort_order_big[0][0x20 * cs->sort_order[0]];
+ s_res= my_space_weight(cs);
while (dst < de)
{
dst[0]= s_res >> 8;
@@ -7416,33 +7667,6 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
-/**
- Helper function:
- Find address of weights of the given character.
-
- @param weights UCA weight array
- @param lengths UCA length array
- @param ch character Unicode code point
-
- @return Weight array
- @retval pointer to weight array for the given character,
- or NULL if this page does not have implicit weights.
-*/
-
-static inline const uint16 *
-my_char_weight_addr(CHARSET_INFO *cs, uint wc)
-{
- uint page, ofst;
- const uchar *ucal= cs->sort_order;
- const uint16 * const *ucaw= cs->sort_order_big;
-
- return wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ? NULL :
- (ucaw[page= (wc >> 8)] ?
- ucaw[page] + (ofst= (wc & 0xFF)) * ucal[page] :
- NULL);
-}
-
-
/*
This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only
@@ -7454,8 +7678,8 @@ my_char_weight_addr(CHARSET_INFO *cs, uint wc)
static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
size_t length1, length2;
- const uint16 *weight1= my_char_weight_addr(cs, wc1);
- const uint16 *weight2= my_char_weight_addr(cs, wc2);
+ const uint16 *weight1= my_char_weight_addr(&cs->uca->level[0], wc1);
+ const uint16 *weight2= my_char_weight_addr(&cs->uca->level[0], wc2);
if (!weight1 || !weight2)
return wc1 != wc2;
@@ -7465,8 +7689,8 @@ static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
return 1;
/* Thoroughly compare all weights */
- length1= cs->sort_order[wc1 >> MY_UCA_PSHIFT];
- length2= cs->sort_order[wc2 >> MY_UCA_PSHIFT];
+ length1= cs->uca->level[0].lengths[wc1 >> MY_UCA_PSHIFT]; /* W3-TODO */
+ length2= cs->uca->level[0].lengths[wc2 >> MY_UCA_PSHIFT];
if (length1 > length2)
return memcmp((const void*)weight1, (const void*)weight2, length2*2) ?
@@ -7632,7 +7856,7 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
/*
Collation language is implemented according to
subset of ICU Collation Customization (tailorings):
- http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
+ http://icu.sourceforge.net/userguide/Collate_Customization.html
Collation language elements:
Delimiters:
@@ -7674,16 +7898,47 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
typedef enum my_coll_lexem_num_en
{
- MY_COLL_LEXEM_EOF = 0,
- MY_COLL_LEXEM_DIFF = 1,
- MY_COLL_LEXEM_SHIFT = 4,
- MY_COLL_LEXEM_CHAR = 5,
- MY_COLL_LEXEM_ERROR = 6
+ MY_COLL_LEXEM_EOF = 0,
+ MY_COLL_LEXEM_SHIFT = 1,
+ MY_COLL_LEXEM_RESET = 4,
+ MY_COLL_LEXEM_CHAR = 5,
+ MY_COLL_LEXEM_ERROR = 6,
+ MY_COLL_LEXEM_OPTION = 7,
+ MY_COLL_LEXEM_EXTEND = 8,
+ MY_COLL_LEXEM_CONTEXT = 9,
} my_coll_lexem_num;
+/**
+ Convert collation customization lexem to string,
+ for nice error reporting
+
+ @param term lexem code
+
+ @return lexem name
+*/
+
+static const char *
+my_coll_lexem_num_to_str(my_coll_lexem_num term)
+{
+ switch (term)
+ {
+ case MY_COLL_LEXEM_EOF: return "EOF";
+ case MY_COLL_LEXEM_SHIFT: return "Shift";
+ case MY_COLL_LEXEM_RESET: return "&";
+ case MY_COLL_LEXEM_CHAR: return "Character";
+ case MY_COLL_LEXEM_OPTION: return "Bracket option";
+ case MY_COLL_LEXEM_EXTEND: return "/";
+ case MY_COLL_LEXEM_CONTEXT:return "|";
+ case MY_COLL_LEXEM_ERROR: return "ERROR";
+ }
+ return NULL;
+}
+
+
typedef struct my_coll_lexem_st
{
+ my_coll_lexem_num term;
const char *beg;
const char *end;
const char *prev;
@@ -7717,6 +7972,27 @@ static void my_coll_lexem_init(MY_COLL_LEXEM *lexem,
}
+/**
+ Compare lexem to string with length
+
+ @param lexem lexem
+ @param pattern string
+ @param patternlen string length
+
+ @return
+ @retval 0 if lexem is equal to string, non-0 otherwise.
+*/
+
+static int
+lex_cmp(MY_COLL_LEXEM *lexem, const char *pattern, size_t patternlen)
+{
+ size_t lexemlen= lexem->beg - lexem->prev;
+ if (lexemlen < patternlen)
+ return 1; /* Not a prefix */
+ return strncasecmp(lexem->prev, pattern, patternlen);
+}
+
+
/*
Print collation customization expression parse error, with context.
@@ -7740,7 +8016,8 @@ static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
size_t len= lexem->end - lexem->prev;
strmake (tail, lexem->prev, (size_t) MY_MIN(len, sizeof(tail)-1));
errstr[errsize-1]= '\0';
- my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
+ my_snprintf(errstr, errsize - 1,
+ "%s at '%s'", txt[0] ? txt : "Syntax error", tail);
}
@@ -7791,44 +8068,75 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
{
const char *beg;
my_coll_lexem_num rc;
-
+
for (beg= lexem->beg ; beg < lexem->end ; beg++)
{
- if (*beg == ' ' || *beg == '\t' || *beg == '\r' || *beg == '\n')
- continue;
-
- if (*beg == '&')
+ switch (*beg)
{
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ continue;
+
+ case '[': /* Bracket expression, e.g. "[optimize [a-z]]" */
+ {
+ size_t nbrackets; /* Indicates nested recursion level */
+ for (beg++, nbrackets= 1 ; beg < lexem->end; beg++)
+ {
+ if (*beg == '[') /* Enter nested bracket expression */
+ nbrackets++;
+ else if (*beg == ']')
+ {
+ if (--nbrackets == 0)
+ {
+ rc= MY_COLL_LEXEM_OPTION;
+ beg++;
+ goto ex;
+ }
+ }
+ }
+ rc= MY_COLL_LEXEM_ERROR;
+ goto ex;
+ }
+
+ case '&':
beg++;
- rc= MY_COLL_LEXEM_SHIFT;
+ rc= MY_COLL_LEXEM_RESET;
goto ex;
- }
-
- if (beg[0] == '=')
- {
+
+ case '=':
beg++;
- rc= MY_COLL_LEXEM_DIFF;
+ lexem->diff= 0;
+ rc= MY_COLL_LEXEM_SHIFT;
goto ex;
- }
-
- if (beg[0] == '<')
- {
- for (beg++, lexem->diff= 1;
- (beg < lexem->end) &&
- (*beg == '<') && (lexem->diff<3);
- beg++, lexem->diff++);
- rc= MY_COLL_LEXEM_DIFF;
+
+ case '/':
+ beg++;
+ rc= MY_COLL_LEXEM_EXTEND;
goto ex;
- }
-
- if ((*beg >= 'a' && *beg <= 'z') || (*beg >= 'A' && *beg <= 'Z'))
- {
- lexem->code= *beg++;
- rc= MY_COLL_LEXEM_CHAR;
+
+ case '|':
+ beg++;
+ rc= MY_COLL_LEXEM_CONTEXT;
goto ex;
+
+ case '<': /* Shift: '<' or '<<' or '<<<' or '<<<<' */
+ {
+ /* Scan up to 3 additional '<' characters */
+ for (beg++, lexem->diff= 1;
+ (beg < lexem->end) && (*beg == '<') && (lexem->diff <= 3);
+ beg++, lexem->diff++);
+ rc= MY_COLL_LEXEM_SHIFT;
+ goto ex;
+ }
+ default:
+ break;
}
-
- if ((*beg == '\\') && (beg+2 < lexem->end) && (beg[1] == 'u'))
+
+ /* Escaped character, e.g. \u1234 */
+ if ((*beg == '\\') && (beg + 2 < lexem->end) &&
+ (beg[1] == 'u') && my_isxdigit(&my_charset_utf8_general_ci, beg[2]))
{
int ch;
@@ -7842,15 +8150,43 @@ static my_coll_lexem_num my_coll_lexem_next(MY_COLL_LEXEM *lexem)
rc= MY_COLL_LEXEM_CHAR;
goto ex;
}
-
+
+ /*
+ Unescaped single byte character:
+ allow printable ASCII range except SPACE and
+ special characters parsed above []<&/|=
+ */
+ if (*beg >= 0x21 && *beg <= 0x7E)
+ {
+ lexem->code= *beg++;
+ rc= MY_COLL_LEXEM_CHAR;
+ goto ex;
+ }
+
+ if (((uchar) *beg) > 0x7F) /* Unescaped multibyte character */
+ {
+ CHARSET_INFO *cs= &my_charset_utf8_general_ci;
+ my_wc_t wc;
+ int nbytes= cs->cset->mb_wc(cs, &wc,
+ (uchar *) beg, (uchar *) lexem->end);
+ if (nbytes > 0)
+ {
+ rc= MY_COLL_LEXEM_CHAR;
+ beg+= nbytes;
+ lexem->code= (int) wc;
+ goto ex;
+ }
+ }
+
rc= MY_COLL_LEXEM_ERROR;
goto ex;
}
rc= MY_COLL_LEXEM_EOF;
-
+
ex:
lexem->prev= lexem->beg;
lexem->beg= beg;
+ lexem->term= rc;
return rc;
}
@@ -7859,142 +8195,1149 @@ ex:
Collation rule item
*/
+#define MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */
+
typedef struct my_coll_rule_item_st
{
- my_wc_t base; /* Base character */
- my_wc_t curr[2]; /* Current character */
- int diff[3]; /* Primary, Secondary and Tertiary difference */
+ my_wc_t base[MY_UCA_MAX_EXPANSION]; /* Base character */
+ my_wc_t curr[MY_UCA_MAX_CONTRACTION]; /* Current character */
+ int diff[4]; /* Primary, Secondary, Tertiary, Quaternary difference */
+ size_t before_level; /* "reset before" indicator */
+ my_bool with_context;
} MY_COLL_RULE;
+/**
+ Return length of a 0-terminated wide string, analog to strnlen().
+
+ @param s Pointer to wide string
+ @param maxlen Mamixum string length
+
+ @return string length, or maxlen if no '\0' is met.
+*/
+static size_t
+my_wstrnlen(my_wc_t *s, size_t maxlen)
+{
+ size_t i;
+ for (i= 0; i < maxlen; i++)
+ {
+ if (s[i] == 0)
+ return i;
+ }
+ return maxlen;
+}
+
+
+/**
+ Return length of the "reset" string of a rule.
+
+ @param r Collation customization rule
+
+ @return Length of r->base
+*/
+
+static inline size_t
+my_coll_rule_reset_length(MY_COLL_RULE *r)
+{
+ return my_wstrnlen(r->base, MY_UCA_MAX_EXPANSION);
+}
+
+
+/**
+ Return length of the "shift" string of a rule.
+
+ @param r Collation customization rule
+
+ @return Length of r->base
+*/
+
+static inline size_t
+my_coll_rule_shift_length(MY_COLL_RULE *r)
+{
+ return my_wstrnlen(r->curr, MY_UCA_MAX_CONTRACTION);
+}
+
+
+/**
+ Append new character to the end of a 0-terminated wide string.
+
+ @param wc Wide string
+ @param limit Maximum possible result length
+ @param code Character to add
+
+ @return 1 if character was added, 0 if string was too long
+*/
+
+static int
+my_coll_rule_expand(my_wc_t *wc, size_t limit, my_wc_t code)
+{
+ size_t i;
+ for (i= 0; i < limit; i++)
+ {
+ if (wc[i] == 0)
+ {
+ wc[i]= code;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ Initialize collation customization rule
+
+ @param wc Rule
+*/
+
+static void
+my_coll_rule_reset(MY_COLL_RULE *r)
+{
+ memset(r, 0, sizeof(*r));
+}
+
+
+/*
+ Shift methods:
+ Simple: "&B < C" : weight('C') = weight('B') + 1
+ Expand: weght('C') = { weight('B'), weight(last_non_ignorable) + 1 }
+*/
+typedef enum
+{
+ my_shift_method_simple= 0,
+ my_shift_method_expand
+} my_coll_shift_method;
+
+
+typedef struct my_coll_rules_st
+{
+ uint version; /* Unicode version, e.g. 400 or 520 */
+ MY_UCA_INFO *uca; /* Unicode weight data */
+ size_t nrules; /* Number of rules in the rule array */
+ size_t mrules; /* Number of allocated rules */
+ MY_COLL_RULE *rule; /* Rule array */
+ MY_CHARSET_LOADER *loader;
+ my_coll_shift_method shift_after_method;
+} MY_COLL_RULES;
+
+
+/**
+ Realloc rule array to a new size.
+ Reallocate memory for 128 additional rules at once,
+ to reduce the number of reallocs, which is important
+ for long tailorings (e.g. for East Asian collations).
+
+ @param rules Rule container
+ @param n new number of rules
+
+ @return 0 on success, -1 on error.
+*/
+
+static int
+my_coll_rules_realloc(MY_COLL_RULES *rules, size_t n)
+{
+ if (rules->nrules < rules->mrules ||
+ (rules->rule= rules->loader->realloc(rules->rule,
+ sizeof(MY_COLL_RULE) *
+ (rules->mrules= n + 128))))
+ return 0;
+ return -1;
+}
+
+
+/**
+ Append one new rule to a rule array
+
+ @param rules Rule container
+ @param rule New rule to add
+
+ @return 0 on success, -1 on error.
+*/
+
+static int
+my_coll_rules_add(MY_COLL_RULES *rules, MY_COLL_RULE *rule)
+{
+ if (my_coll_rules_realloc(rules, rules->nrules + 1))
+ return -1;
+ rules->rule[rules->nrules++]= rule[0];
+ return 0;
+}
+
+
+/**
+ Apply difference at level
+
+ @param r Rule
+ @param level Level (0,1,2,3,4)
+*/
+
+static void
+my_coll_rule_shift_at_level(MY_COLL_RULE *r, int level)
+{
+ switch (level)
+ {
+ case 4: /* Quaternary difference */
+ r->diff[3]++;
+ break;
+ case 3: /* Tertiary difference */
+ r->diff[2]++;
+ r->diff[3]= 0;
+ break;
+ case 2: /* Secondary difference */
+ r->diff[1]++;
+ r->diff[2]= r->diff[3]= 0;
+ break;
+ case 1: /* Primary difference */
+ r->diff[0]++;
+ r->diff[1]= r->diff[2]= r->diff[3]= 0;
+ break;
+ case 0:
+ /* Do nothing for '=': use the previous offsets for all levels */
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+}
+
+
+typedef struct my_coll_rule_parser_st
+{
+ MY_COLL_LEXEM tok[2]; /* Current token and next token for look-ahead */
+ MY_COLL_RULE rule; /* Currently parsed rule */
+ MY_COLL_RULES *rules; /* Rule list pointer */
+ char errstr[128]; /* Error message */
+} MY_COLL_RULE_PARSER;
+
+
+/**
+ Current parser token
+
+ @param p Collation customization parser
+
+ @return Pointer to the current token
+*/
+
+static MY_COLL_LEXEM *
+my_coll_parser_curr(MY_COLL_RULE_PARSER *p)
+{
+ return &p->tok[0];
+}
+
+
+/**
+ Next parser token, to look ahead.
+
+ @param p Collation customization parser
+
+ @return Pointer to the next token
+*/
+
+static MY_COLL_LEXEM *
+my_coll_parser_next(MY_COLL_RULE_PARSER *p)
+{
+ return &p->tok[1];
+}
+
+
+/**
+ Scan one token from the input stream
+
+ @param p Collation customization parser
+
+ @return 1, for convenience, to use in logical expressions easier.
+*/
+static int
+my_coll_parser_scan(MY_COLL_RULE_PARSER *p)
+{
+ my_coll_parser_curr(p)[0]= my_coll_parser_next(p)[0];
+ my_coll_lexem_next(my_coll_parser_next(p));
+ return 1;
+}
+
+
+/**
+ Initialize collation customization parser
+
+ @param p Collation customization parser
+ @param rules Where to store rules
+ @param str Beginning of a collation customization sting
+ @param str_end End of the collation customizations string
+*/
+
+static void
+my_coll_parser_init(MY_COLL_RULE_PARSER *p,
+ MY_COLL_RULES *rules,
+ const char *str, const char *str_end)
+{
+ /*
+ Initialize parser to the input buffer and scan two tokens,
+ to make the current token and the next token known.
+ */
+ memset(p, 0, sizeof(*p));
+ p->rules= rules;
+ p->errstr[0]= '\0';
+ my_coll_lexem_init(my_coll_parser_curr(p), str, str_end);
+ my_coll_lexem_next(my_coll_parser_curr(p));
+ my_coll_parser_next(p)[0]= my_coll_parser_curr(p)[0];
+ my_coll_lexem_next(my_coll_parser_next(p));
+}
+
+
+/**
+ Display error when an unexpected token found
+
+ @param p Collation customization parser
+ @param term Which lexem was expected
+
+ @return 0, to use in "return" and boolean expressions.
+*/
+
+static int
+my_coll_parser_expected_error(MY_COLL_RULE_PARSER *p, my_coll_lexem_num term)
+{
+ my_snprintf(p->errstr, sizeof(p->errstr),
+ "%s expected", my_coll_lexem_num_to_str(term));
+ return 0;
+}
+
+
+/**
+ Display error when a too long character sequence is met
+
+ @param p Collation customization parser
+ @param name Which kind of sequence: contraction, expansion, etc.
+
+ @return 0, to use in "return" and boolean expressions.
+*/
+
+static int
+my_coll_parser_too_long_error(MY_COLL_RULE_PARSER *p, const char *name)
+{
+ my_snprintf(p->errstr, sizeof(p->errstr), "%s is too long", name);
+ return 0;
+}
+
+
+/**
+ Scan the given lexem from input stream, or display "expected" error.
+
+ @param p Collation customization parser
+ @param term Which lexem is expected.
+
+ @return
+ @retval 0 if the required term was not found.
+ @retval 1 if the required term was found.
+*/
+static int
+my_coll_parser_scan_term(MY_COLL_RULE_PARSER *p, my_coll_lexem_num term)
+{
+ if (my_coll_parser_curr(p)->term != term)
+ return my_coll_parser_expected_error(p, term);
+ return my_coll_parser_scan(p);
+}
+
+
+/*
+ In the following code we have a few functions to parse
+ various collation customization non-terminal symbols.
+ Unlike our usual coding convension, they return
+ - 0 on "error" (when the rule was not scanned) and
+ - 1 on "success"(when the rule was scanned).
+ This is done intentionally to make body of the functions look easier
+ and repeat the grammar of the rules in straightforward manner.
+ For example:
+
+ // <x> ::= <y> | <z>
+ int parse_x() { return parse_y() || parser_z(); }
+
+ // <x> ::= <y> <z>
+ int parse_x() { return parse_y() && parser_z(); }
+
+ Using 1 on "not found" and 0 on "found" in the parser code would
+ make the code more error prone and harder to read because
+ of having to use inverse boolean logic.
+*/
+
+
+/**
+ Scan a collation setting in brakets, for example UCA version.
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if setting was scanned.
+ @retval 1 if setting was not scanned.
+*/
+
+static int
+my_coll_parser_scan_setting(MY_COLL_RULE_PARSER *p)
+{
+ MY_COLL_RULES *rules= p->rules;
+ MY_COLL_LEXEM *lexem= my_coll_parser_curr(p);
+
+ if (!lex_cmp(lexem, C_STRING_WITH_LEN("[version 4.0.0]")))
+ {
+ rules->version= 400;
+ rules->uca= &my_uca_v400;
+ }
+#if RESOLVE_CONFLICTS_WITH_MARIA_AND_MYSQL_COLLATION_IDS
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[version 5.2.0]")))
+ {
+ rules->version= 520;
+ rules->uca= &my_uca_v520;
+ }
+#endif
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[shift-after-method expand]")))
+ {
+ rules->shift_after_method= my_shift_method_expand;
+ }
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[shift-after-method simple]")))
+ {
+ rules->shift_after_method= my_shift_method_simple;
+ }
+ else
+ {
+ return 0;
+ }
+ return my_coll_parser_scan(p);
+}
+
+
+/**
+ Scan multiple collation settings
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if no settings were scanned.
+ @retval 1 if one or more settings were scanned.
+*/
+
+static int
+my_coll_parser_scan_settings(MY_COLL_RULE_PARSER *p)
+{
+ /* Scan collation setting or special purpose command */
+ while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION)
+ {
+ if (!my_coll_parser_scan_setting(p))
+ return 0;
+ }
+ return 1;
+}
+
+
+/**
+ Scan [before xxx] reset option
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if reset option was not scanned.
+ @retval 1 if reset option was scanned.
+*/
+
+static int
+my_coll_parser_scan_reset_before(MY_COLL_RULE_PARSER *p)
+{
+ MY_COLL_LEXEM *lexem= my_coll_parser_curr(p);
+ if (!lex_cmp(lexem, C_STRING_WITH_LEN("[before primary]")) ||
+ !lex_cmp(lexem, C_STRING_WITH_LEN("[before 1]")))
+ {
+ p->rule.before_level= 1;
+ }
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[before secondary]")) ||
+ !lex_cmp(lexem, C_STRING_WITH_LEN("[before 2]")))
+ {
+ p->rule.before_level= 2;
+ }
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[before tertiary]")) ||
+ !lex_cmp(lexem, C_STRING_WITH_LEN("[before 3]")))
+ {
+ p->rule.before_level= 3;
+ }
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[before quaternary]")) ||
+ !lex_cmp(lexem, C_STRING_WITH_LEN("[before 4]")))
+ {
+ p->rule.before_level= 4;
+ }
+ else
+ {
+ p->rule.before_level= 0;
+ return 0; /* Don't scan thr next character */
+ }
+ return my_coll_parser_scan(p);
+}
+
+
+/**
+ Scan logical position and add to the wide string.
+
+ @param p Collation customization parser
+ @param pwc Wide string to add code to
+ @param limit The result string cannot be longer than 'limit' characters
+
+ @return
+ @retval 0 if logical position was not scanned.
+ @retval 1 if logical position was scanned.
+*/
+
+static int
+my_coll_parser_scan_logical_position(MY_COLL_RULE_PARSER *p,
+ my_wc_t *pwc, size_t limit)
+{
+ MY_COLL_RULES *rules= p->rules;
+ MY_COLL_LEXEM *lexem= my_coll_parser_curr(p);
+
+ if (!lex_cmp(lexem, C_STRING_WITH_LEN("[first non-ignorable]")))
+ lexem->code= rules->uca->first_non_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[last non-ignorable]")))
+ lexem->code= rules->uca->last_non_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[first primary ignorable]")))
+ lexem->code= rules->uca->first_primary_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[last primary ignorable]")))
+ lexem->code= rules->uca->last_primary_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[first secondary ignorable]")))
+ lexem->code= rules->uca->first_secondary_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[last secondary ignorable]")))
+ lexem->code= rules->uca->last_secondary_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[first tertiary ignorable]")))
+ lexem->code= rules->uca->first_tertiary_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[last tertiary ignorable]")))
+ lexem->code= rules->uca->last_tertiary_ignorable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[first trailing]")))
+ lexem->code= rules->uca->first_trailing;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[last trailing]")))
+ lexem->code= rules->uca->last_trailing;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[first variable]")))
+ lexem->code= rules->uca->first_variable;
+ else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[last variable]")))
+ lexem->code= rules->uca->last_variable;
+ else
+ return 0; /* Don't scan the next token */
+
+ if (!my_coll_rule_expand(pwc, limit, lexem->code))
+ {
+ /*
+ Logical position can not be in a contraction,
+ so the above call should never fail.
+ Let's assert in debug version and print
+ a nice error message in production version.
+ */
+ DBUG_ASSERT(0);
+ return my_coll_parser_too_long_error(p, "Logical position");
+ }
+ return my_coll_parser_scan(p);
+}
+
+
+/**
+ Scan character list
+
+ <character list> ::= CHAR [ CHAR... ]
+
+ @param p Collation customization parser
+ @param pwc Character string to add code to
+ @param limit The result string cannot be longer than 'limit' characters
+ @param name E.g. "contraction", "expansion"
+
+ @return
+ @retval 0 if character sequence was not scanned.
+ @retval 1 if character sequence was scanned.
+*/
+
+static int
+my_coll_parser_scan_character_list(MY_COLL_RULE_PARSER *p,
+ my_wc_t *pwc, size_t limit,
+ const char *name)
+{
+ if (my_coll_parser_curr(p)->term != MY_COLL_LEXEM_CHAR)
+ return my_coll_parser_expected_error(p, MY_COLL_LEXEM_CHAR);
+
+ if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code))
+ return my_coll_parser_too_long_error(p, name);
+
+ if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_CHAR))
+ return 0;
+
+ while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CHAR)
+ {
+ if (!my_coll_rule_expand(pwc, limit, my_coll_parser_curr(p)->code))
+ return my_coll_parser_too_long_error(p, name);
+ my_coll_parser_scan(p);
+ }
+ return 1;
+}
+
+
+/**
+ Scan reset sequence
+
+ <reset sequence> ::=
+ [ <reset before option> ] <character list>
+ | [ <reset before option> ] <logical reset position>
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if reset sequence was not scanned.
+ @retval 1 if reset sequence was scanned.
+*/
+
+static int
+my_coll_parser_scan_reset_sequence(MY_COLL_RULE_PARSER *p)
+{
+ my_coll_rule_reset(&p->rule);
+
+ /* Scan "[before x]" option, if exists */
+ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION)
+ my_coll_parser_scan_reset_before(p);
+
+ /* Try logical reset position */
+ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_OPTION)
+ {
+ if (!my_coll_parser_scan_logical_position(p, p->rule.base, 1))
+ return 0;
+ }
+ else
+ {
+ /* Scan single reset character or expansion */
+ if (!my_coll_parser_scan_character_list(p, p->rule.base,
+ MY_UCA_MAX_EXPANSION, "Expansion"))
+ return 0;
+ }
+
+ if (p->rules->shift_after_method == my_shift_method_expand ||
+ p->rule.before_level == 1) /* Apply "before primary" option */
+ {
+ /*
+ Suppose we have this rule: &B[before primary] < C
+ i.e. we need to put C before B, but after A, so
+ the result order is: A < C < B.
+
+ Let primary weight of B be [BBBB].
+
+ We cannot just use [BBBB-1] as weight for C:
+ DUCET does not have enough unused weights between any two characters,
+ so using [BBBB-1] will likely make C equal to the previous character,
+ which is A, so we'll get this order instead of the desired: A = C < B.
+
+ To guarantee that that C is sorted after A, we'll use expansion
+ with a kind of "biggest possible character".
+ As "biggest possible character" we'll use "last_non_ignorable":
+
+ We'll compose weight for C as: [BBBB-1][MMMM+1]
+ where [MMMM] is weight for "last_non_ignorable".
+
+ We also do the same trick for "reset after" if the collation
+ option says so. E.g. for the rules "&B < C", weight for
+ C will be calculated as: [BBBB][MMMM+1]
+
+ At this point we only need to store codepoints
+ 'B' and 'last_non_ignorable'. Actual weights for 'C'
+ will be calculated according to the above formula later,
+ in create_tailoring().
+ */
+ if (!my_coll_rule_expand(p->rule.base, MY_UCA_MAX_EXPANSION,
+ p->rules->uca->last_non_ignorable))
+ return my_coll_parser_too_long_error(p, "Expansion");
+ }
+ return 1;
+}
+
+
+/**
+ Scan shift sequence
+
+ <shift sequence> ::=
+ <character list> [ / <character list> ]
+ | <character list> [ | <character list> ]
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if shift sequence was not scanned.
+ @retval 1 if shift sequence was scanned.
+*/
+
+static int
+my_coll_parser_scan_shift_sequence(MY_COLL_RULE_PARSER *p)
+{
+ MY_COLL_RULE before_extend;
+
+ memset(&p->rule.curr, 0, sizeof(p->rule.curr));
+
+ /* Scan single shift character or contraction */
+ if (!my_coll_parser_scan_character_list(p, p->rule.curr,
+ MY_UCA_MAX_CONTRACTION,
+ "Contraction"))
+ return 0;
+
+ before_extend= p->rule; /* Remember the part before "/" */
+
+ /* Append the part after "/" as expansion */
+ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_EXTEND)
+ {
+ my_coll_parser_scan(p);
+ if (!my_coll_parser_scan_character_list(p, p->rule.base,
+ MY_UCA_MAX_EXPANSION,
+ "Expansion"))
+ return 0;
+ }
+ else if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_CONTEXT)
+ {
+ /*
+ We support 2-character long context sequences only:
+ one character is the previous context, plus the current character.
+ It's OK as Unicode's CLDR does not have longer examples.
+ */
+ my_coll_parser_scan(p);
+ p->rule.with_context= TRUE;
+ if (!my_coll_parser_scan_character_list(p, p->rule.curr + 1, 1, "context"))
+ return 0;
+ }
+
+ /* Add rule to the rule list */
+ if (my_coll_rules_add(p->rules, &p->rule))
+ return 0;
+
+ p->rule= before_extend; /* Restore to the state before "/" */
+
+ return 1;
+}
+
+
+/**
+ Scan shift operator
+
+ <shift> ::= < | << | <<< | <<<< | =
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if shift operator was not scanned.
+ @retval 1 if shift operator was scanned.
+*/
+static int
+my_coll_parser_scan_shift(MY_COLL_RULE_PARSER *p)
+{
+ if (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_SHIFT)
+ {
+ my_coll_rule_shift_at_level(&p->rule, my_coll_parser_curr(p)->diff);
+ return my_coll_parser_scan(p);
+ }
+ return 0;
+}
+
+
+/**
+ Scan one rule: reset followed by a number of shifts
+
+ <rule> ::=
+ & <reset sequence>
+ <shift> <shift sequence>
+ [ { <shift> <shift sequence> }... ]
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if rule was not scanned.
+ @retval 1 if rule was scanned.
+*/
+static int
+my_coll_parser_scan_rule(MY_COLL_RULE_PARSER *p)
+{
+ if (!my_coll_parser_scan_term(p, MY_COLL_LEXEM_RESET) ||
+ !my_coll_parser_scan_reset_sequence(p))
+ return 0;
+
+ /* Scan the first required shift command */
+ if (!my_coll_parser_scan_shift(p))
+ return my_coll_parser_expected_error(p, MY_COLL_LEXEM_SHIFT);
+
+ /* Scan the first shift sequence */
+ if (!my_coll_parser_scan_shift_sequence(p))
+ return 0;
+
+ /* Scan subsequent shift rules */
+ while (my_coll_parser_scan_shift(p))
+ {
+ if (!my_coll_parser_scan_shift_sequence(p))
+ return 0;
+ }
+ return 1;
+}
+
+
+/**
+ Scan collation customization: settings followed by rules
+
+ <collation customization> ::=
+ [ <setting> ... ]
+ [ <rule>... ]
+
+ @param p Collation customization parser
+
+ @return
+ @retval 0 if collation customozation expression was not scanned.
+ @retval 1 if collation customization expression was scanned.
+*/
+
+static int
+my_coll_parser_exec(MY_COLL_RULE_PARSER *p)
+{
+ if (!my_coll_parser_scan_settings(p))
+ return 0;
+
+ while (my_coll_parser_curr(p)->term == MY_COLL_LEXEM_RESET)
+ {
+ if (!my_coll_parser_scan_rule(p))
+ return 0;
+ }
+ /* Make sure no unparsed input data left */
+ return my_coll_parser_scan_term(p, MY_COLL_LEXEM_EOF);
+}
+
+
/*
Collation language syntax parser.
Uses lexical parser.
-
- SYNOPSIS
- my_coll_rule_parse
- rule Collation rule list to load to.
- str A string containin collation language expression.
- str_end End of the string.
- USAGE
-
- RETURN VALUES
- A positive number means the number of rules loaded.
- -1 means ERROR, e.g. too many items, syntax error, etc.
+
+ @param rules Collation rule list to load to.
+ @param str A string with collation customization.
+ @param str_end End of the string.
+
+ @return
+ @retval 0 on success
+ @retval 1 on error
*/
-static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
- const char *str, const char *str_end,
- char *errstr, size_t errsize)
+static int
+my_coll_rule_parse(MY_COLL_RULES *rules,
+ const char *str, const char *str_end)
{
- MY_COLL_LEXEM lexem;
- my_coll_lexem_num lexnum;
- my_coll_lexem_num prevlexnum= MY_COLL_LEXEM_ERROR;
- MY_COLL_RULE item;
- int state= 0;
- size_t nitems= 0;
+ MY_COLL_RULE_PARSER p;
+
+ my_coll_parser_init(&p, rules, str, str_end);
+
+ if (!my_coll_parser_exec(&p))
+ {
+ my_coll_lexem_print_error(my_coll_parser_curr(&p),
+ rules->loader->error,
+ sizeof(rules->loader->error) - 1,
+ p.errstr);
+ return 1;
+ }
+ return 0;
+}
+
+
+/**
+ Helper function:
+ Copies UCA weights for a given "uint" string
+ to the given location.
- /* Init all variables */
- errstr[0]= '\0';
- bzero(&item, sizeof(item));
- my_coll_lexem_init(&lexem, str, str_end);
+ @src_uca source UCA weight data
+ @dst_uca destination UCA weight data
+ @to destination address
+ @to_length size of destination
+ @str qide string
+ @len string length
- while ((lexnum= my_coll_lexem_next(&lexem)))
+ @return number of weights put
+*/
+
+static size_t
+my_char_weight_put(MY_UCA_WEIGHT_LEVEL *dst,
+ uint16 *to, size_t to_length,
+ my_wc_t *str, size_t len)
+{
+ size_t count;
+ if (!to_length)
+ return 0;
+ to_length--; /* Without trailing zero */
+
+ for (count= 0; len; )
{
- if (lexnum == MY_COLL_LEXEM_ERROR)
+ size_t chlen;
+ const uint16 *from= NULL;
+
+ for (chlen= len; chlen > 1; chlen--)
{
- my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Unknown character");
- return -1;
- }
-
- switch (state) {
- case 0:
- if (lexnum != MY_COLL_LEXEM_SHIFT)
- {
- my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& expected");
- return -1;
- }
- prevlexnum= lexnum;
- state= 2;
- continue;
-
- case 1:
- if (lexnum != MY_COLL_LEXEM_SHIFT && lexnum != MY_COLL_LEXEM_DIFF)
+ if ((from= my_uca_contraction_weight(&dst->contractions, str, chlen)))
{
- my_coll_lexem_print_error(&lexem,errstr,errsize-1,"& or < expected");
- return -1;
+ str+= chlen;
+ len-= chlen;
+ break;
}
- prevlexnum= lexnum;
- state= 2;
- continue;
-
- case 2:
- if (lexnum != MY_COLL_LEXEM_CHAR)
+ }
+
+ if (!from)
+ {
+ from= my_char_weight_addr(dst, *str);
+ str++;
+ len--;
+ }
+
+ for ( ; from && *from && count < to_length; )
+ {
+ *to++= *from++;
+ count++;
+ }
+ }
+
+ *to= 0;
+ return count;
+}
+
+
+/**
+ Alloc new page and copy the default UCA weights
+ @param loader - Character set loader
+ @param src_uca - Default UCA data to copy from
+ @param dst_uca - UCA data to copy weights to
+ @param page - page number
+
+ @return
+ @retval FALSE on success
+ @retval TRUE on error
+*/
+static my_bool
+my_uca_copy_page(MY_CHARSET_LOADER *loader,
+ const MY_UCA_WEIGHT_LEVEL *src,
+ MY_UCA_WEIGHT_LEVEL *dst,
+ size_t page)
+{
+ uint chc, size= 256 * dst->lengths[page] * sizeof(uint16);
+ if (!(dst->weights[page]= (uint16 *) (loader->once_alloc)(size)))
+ return TRUE;
+
+ DBUG_ASSERT(src->lengths[page] <= dst->lengths[page]);
+ memset(dst->weights[page], 0, size);
+ for (chc=0 ; chc < 256; chc++)
+ {
+ memcpy(dst->weights[page] + chc * dst->lengths[page],
+ src->weights[page] + chc * src->lengths[page],
+ src->lengths[page] * sizeof(uint16));
+ }
+ return FALSE;
+}
+
+
+static my_bool
+apply_shift(MY_CHARSET_LOADER *loader,
+ MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
+ uint16 *to, size_t nweights)
+{
+ /* Apply level difference. */
+ if (nweights)
+ {
+ to[nweights - 1]+= r->diff[level];
+ if (r->before_level == 1) /* Apply "&[before primary]" */
+ {
+ if (nweights >= 2)
{
- my_coll_lexem_print_error(&lexem,errstr,errsize-1,"character expected");
- return -1;
+ to[nweights - 2]--; /* Reset before */
+ if (rules->shift_after_method == my_shift_method_expand)
+ {
+ /*
+ Special case. Don't let characters shifted after X
+ and before next(X) intermix to each other.
+
+ For example:
+ "[shift-after-method expand] &0 < a &[before primary]1 < A".
+ I.e. we reorder 'a' after '0', and then 'A' before '1'.
+ 'a' must be sorted before 'A'.
+
+ Note, there are no real collations in CLDR which shift
+ after and before two neighbourgh characters. We need this
+ just in case. Reserving 4096 (0x1000) weights for such
+ cases is perfectly enough.
+ */
+ to[nweights - 1]+= 0x1000; /* W3-TODO: const may vary on levels 2,3*/
+ }
}
-
- if (prevlexnum == MY_COLL_LEXEM_SHIFT)
+ else
{
- item.base= lexem.code;
- item.diff[0]= 0;
- item.diff[1]= 0;
- item.diff[2]= 0;
+ my_snprintf(loader->error, sizeof(loader->error),
+ "Can't reset before "
+ "a primary ignorable character U+%04lX", r->base[0]);
+ return TRUE;
}
- else if (prevlexnum == MY_COLL_LEXEM_DIFF)
+ }
+ }
+ else
+ {
+ /* Shift to an ignorable character, e.g.: & \u0000 < \u0001 */
+ DBUG_ASSERT(to[0] == 0);
+ to[0]= r->diff[level];
+ }
+ return FALSE;
+}
+
+
+static my_bool
+apply_one_rule(MY_CHARSET_LOADER *loader,
+ MY_COLL_RULES *rules, MY_COLL_RULE *r, int level,
+ MY_UCA_WEIGHT_LEVEL *dst)
+{
+ size_t nweights;
+ size_t nreset= my_coll_rule_reset_length(r); /* Length of reset sequence */
+ size_t nshift= my_coll_rule_shift_length(r); /* Length of shift sequence */
+ uint16 *to;
+
+ if (nshift >= 2) /* Contraction */
+ {
+ size_t i;
+ int flag;
+ MY_CONTRACTIONS *contractions= &dst->contractions;
+ /* Add HEAD, MID and TAIL flags for the contraction parts */
+ my_uca_add_contraction_flag(contractions, r->curr[0],
+ r->with_context ?
+ MY_UCA_PREVIOUS_CONTEXT_HEAD :
+ MY_UCA_CNT_HEAD);
+ for (i= 1, flag= MY_UCA_CNT_MID1; i < nshift - 1; i++, flag<<= 1)
+ my_uca_add_contraction_flag(contractions, r->curr[i], flag);
+ my_uca_add_contraction_flag(contractions, r->curr[i],
+ r->with_context ?
+ MY_UCA_PREVIOUS_CONTEXT_TAIL :
+ MY_UCA_CNT_TAIL);
+ /* Add new contraction to the contraction list */
+ to= my_uca_add_contraction(contractions, r->curr, nshift,
+ r->with_context)->weight;
+ /* Store weights of the "reset to" character */
+ dst->contractions.nitems--; /* Temporarily hide - it's incomplete */
+ nweights= my_char_weight_put(dst, to, MY_UCA_MAX_WEIGHT_SIZE,
+ r->base, nreset);
+ dst->contractions.nitems++; /* Activate, now it's complete */
+ }
+ else
+ {
+ my_wc_t pagec= (r->curr[0] >> 8);
+ DBUG_ASSERT(dst->weights[pagec]);
+ to= my_char_weight_addr(dst, r->curr[0]);
+ /* Store weights of the "reset to" character */
+ nweights= my_char_weight_put(dst, to, dst->lengths[pagec], r->base, nreset);
+ }
+
+ /* Apply level difference. */
+ return apply_shift(loader, rules, r, level, to, nweights);
+}
+
+
+/**
+ Check if collation rules are valid,
+ i.e. characters are not outside of the collation suported range.
+*/
+static int
+check_rules(MY_CHARSET_LOADER *loader,
+ const MY_COLL_RULES *rules,
+ const MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
+{
+ const MY_COLL_RULE *r, *rlast;
+ for (r= rules->rule, rlast= rules->rule + rules->nrules; r < rlast; r++)
+ {
+ if (r->curr[0] > dst->maxchar)
+ {
+ my_snprintf(loader->error, sizeof(loader->error),
+ "Shift character out of range: u%04X", (uint) r->curr[0]);
+ return TRUE;
+ }
+ else if (r->base[0] > src->maxchar)
+ {
+ my_snprintf(loader->error, sizeof(loader->error),
+ "Reset character out of range: u%04X", (uint) r->base[0]);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+
+static my_bool
+init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, int level,
+ MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
+{
+ MY_COLL_RULE *r, *rlast;
+ int ncontractions= 0;
+ size_t i, npages= (src->maxchar + 1) / 256;
+
+ dst->maxchar= src->maxchar;
+
+ if (check_rules(loader, rules, dst, src))
+ return TRUE;
+
+ /* Allocate memory for pages and their lengths */
+ if (!(dst->lengths= (uchar *) (loader->once_alloc)(npages)) ||
+ !(dst->weights= (uint16 **) (loader->once_alloc)(npages *
+ sizeof(uint16 *))))
+ return TRUE;
+
+ /* Copy pages lengths and page pointers from the default UCA weights */
+ memcpy(dst->lengths, src->lengths, npages);
+ memcpy(dst->weights, src->weights, npages * sizeof(uint16 *));
+
+ /*
+ Calculate maximum lenghts for the pages which will be overwritten.
+ Mark pages that will be otherwriten as NULL.
+ We'll allocate their own memory.
+ */
+ for (r= rules->rule, rlast= rules->rule + rules->nrules; r < rlast; r++)
+ {
+ if (!r->curr[1]) /* If not a contraction */
+ {
+ uint pagec= (r->curr[0] >> 8);
+ if (r->base[1]) /* Expansion */
{
- MY_COLL_LEXEM savlex;
- savlex= lexem;
- item.curr[0]= lexem.code;
- if ((lexnum= my_coll_lexem_next(&lexem)) == MY_COLL_LEXEM_CHAR)
- {
- item.curr[1]= lexem.code;
- }
- else
- {
- item.curr[1]= 0;
- lexem=savlex; /* Restore previous parser state */
- }
- if (lexem.diff == 3)
- {
- item.diff[2]++;
- }
- else if (lexem.diff == 2)
- {
- item.diff[1]++;
- item.diff[2]= 0;
- }
- else if (lexem.diff == 1)
- {
- item.diff[0]++;
- item.diff[1]= 0;
- item.diff[2]= 0;
- }
- else if (lexem.diff == 0)
- {
- item.diff[0]= item.diff[1]= item.diff[2]= 0;
- }
- if (nitems >= mitems)
- {
- my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Too many rules");
- return -1;
- }
- rule[nitems++]= item;
+ /* Reserve space for maximum possible length */
+ dst->lengths[pagec]= MY_UCA_MAX_WEIGHT_SIZE;
}
else
{
- my_coll_lexem_print_error(&lexem,errstr,errsize-1,"Should never happen");
- return -1;
+ uint pageb= (r->base[0] >> 8);
+ if (dst->lengths[pagec] < src->lengths[pageb])
+ dst->lengths[pagec]= src->lengths[pageb];
}
- state= 1;
- continue;
+ dst->weights[pagec]= NULL; /* Mark that we'll overwrite this page */
}
+ else
+ ncontractions++;
+ }
+
+ /* Allocate pages that we'll overwrite and copy default weights */
+ for (i= 0; i < npages; i++)
+ {
+ my_bool rc;
+ /*
+ Don't touch pages with lengths[i]==0, they have implicit weights
+ calculated algorithmically.
+ */
+ if (!dst->weights[i] && dst->lengths[i] &&
+ (rc= my_uca_copy_page(loader, src, dst, i)))
+ return rc;
+ }
+
+ if (ncontractions)
+ {
+ if (my_uca_alloc_contractions(&dst->contractions, loader, ncontractions))
+ return TRUE;
}
- return (int) nitems;
+
+ /*
+ Preparatory step is done at this point.
+ Now we have memory allocated for the pages that we'll overwrite,
+ and for contractions, including previous context contractions.
+ Also, for the pages that we'll overwrite, we have copied default weights.
+ Now iterate through the rules, overwrite weights for the characters
+ that appear in the rules, and put all contractions into contraction list.
+ */
+ for (r= rules->rule; r < rlast; r++)
+ {
+ if (apply_one_rule(loader, rules, r, level, dst))
+ return TRUE;
+ }
+ return FALSE;
}
-#define MY_MAX_COLL_RULE 128
/*
This function copies an UCS2 collation from
@@ -8013,145 +9356,65 @@ static int my_coll_rule_parse(MY_COLL_RULE *rule, size_t mitems,
default weights.
*/
-static my_bool create_tailoring(struct charset_info_st *cs,
- void *(*alloc)(size_t))
-{
- MY_COLL_RULE rule[MY_MAX_COLL_RULE];
- MY_COLL_RULE *r, *rfirst, *rlast;
- char errstr[128];
- uchar *newlengths;
- uint16 **newweights;
- const uchar *deflengths= uca_length;
- const uint16 *const *defweights= uca_weight;
- int rc, i;
- int ncontractions= 0;
-
+static my_bool
+create_tailoring(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
+{
+ MY_COLL_RULES rules;
+ MY_UCA_INFO new_uca, *src_uca= NULL;
+ int rc= 0;
+
+ *loader->error= '\0';
+
if (!cs->tailoring)
- return 1;
-
+ return 0; /* Ok to add a collation without tailoring */
+
+ memset(&rules, 0, sizeof(rules));
+ rules.loader= loader;
+ rules.uca= cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
+ memset(&new_uca, 0, sizeof(new_uca));
+
/* Parse ICU Collation Customization expression */
- if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
+ if ((rc= my_coll_rule_parse(&rules,
cs->tailoring,
- cs->tailoring + strlen(cs->tailoring),
- errstr, sizeof(errstr))) < 0)
+ cs->tailoring + strlen(cs->tailoring))))
+ goto ex;
+
+#if RESOLVE_CONFLICT_WITH_MYSQL_AND_MARIA_COLLATION_IDS
+ if (rules.version == 520) /* Unicode-5.2.0 requested */
{
- /*
- TODO: add error message reporting.
- printf("Error: %d '%s'\n", rc, errstr);
- */
- return 1;
+ src_uca= &my_uca_v520;
+ cs->caseinfo= &my_unicase_unicode520;
}
-
- rfirst= rule;
- rlast= rule + rc;
-
- if (!cs->caseinfo)
- cs->caseinfo= my_unicase_default;
-
- if (!(newweights= (uint16**) (*alloc)(256*sizeof(uint16*))))
- return 1;
- bzero(newweights, 256*sizeof(uint16*));
-
- if (!(newlengths= (uchar*) (*alloc)(256)))
- return 1;
-
- memcpy(newlengths, deflengths, 256);
-
- /*
- Calculate maximum lenghts for the pages
- which will be overwritten.
- */
- for (i=0; i < rc; i++)
+ else
+#endif
+ if (rules.version == 400) /* Unicode-4.0.0 requested */
{
- /* check if the shift or the reset characters are out of range */
- if (rule[i].curr[0] > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ||
- rule[i].base > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
- return 1;
-
- if (!rule[i].curr[1]) /* If not a contraction */
- {
- uint pageb= (rule[i].base >> 8) & 0xFF;
- uint pagec= (rule[i].curr[0] >> 8) & 0xFF;
-
- if (newlengths[pagec] < deflengths[pageb])
- newlengths[pagec]= deflengths[pageb];
- }
- else
- ncontractions++;
+ src_uca= &my_uca_v400;
+ cs->caseinfo= &my_unicase_default;
}
-
- for (i=0; i < rc; i++)
+ else /* No Unicode version specified */
{
- uint pageb= (rule[i].base >> 8) & 0xFF;
- uint pagec= (rule[i].curr[0] >> 8) & 0xFF;
- uint chb, chc;
-
- if (rule[i].curr[1]) /* Skip contraction */
- continue;
-
- if (!newweights[pagec])
- {
- /* Alloc new page and copy the default UCA weights */
- uint size= 256*newlengths[pagec]*sizeof(uint16);
-
- if (!(newweights[pagec]= (uint16*) (*alloc)(size)))
- return 1;
- bzero((void*) newweights[pagec], size);
-
- for (chc=0 ; chc < 256; chc++)
- {
- memcpy(newweights[pagec] + chc*newlengths[pagec],
- defweights[pagec] + chc*deflengths[pagec],
- deflengths[pagec]*sizeof(uint16));
- }
- }
-
- /*
- Aply the alternative rule:
- shift to the base character and primary difference.
- */
- chc= rule[i].curr[0] & 0xFF;
- chb= rule[i].base & 0xFF;
- memcpy(newweights[pagec] + chc*newlengths[pagec],
- defweights[pageb] + chb*deflengths[pageb],
- deflengths[pageb]*sizeof(uint16));
- /* Apply primary difference */
- newweights[pagec][chc*newlengths[pagec]]+= rule[i].diff[0];
+ src_uca= cs->uca ? cs->uca : &my_uca_v400;
+ if (!cs->caseinfo)
+ cs->caseinfo= &my_unicase_default;
}
-
- /* Copy non-overwritten pages from the default UCA weights */
- for (i= 0; i < 256 ; i++)
- {
- if (!newweights[i])
- ((const uint16**) newweights)[i]= defweights[i];
- }
-
- cs->sort_order= newlengths;
- cs->sort_order_big= (const uint16**) newweights;
- cs->contractions= NULL;
-
- /* Now process contractions */
- if (ncontractions)
+
+ if ((rc= init_weight_level(loader, &rules, 0,
+ &new_uca.level[0], &src_uca->level[0])))
+ goto ex;
+
+ if (!(cs->uca= (MY_UCA_INFO *) (loader->once_alloc)(sizeof(MY_UCA_INFO))))
{
- if (my_uca_alloc_contractions(cs, alloc, ncontractions))
- return 1;
- for (r= rfirst; r < rlast; r++)
- {
- uint16 *to;
- if (r->curr[1]) /* Contraction */
- {
- /* Mark both letters as "is contraction part" */
- my_uca_add_contraction_flag(cs, r->curr[0], MY_UCA_CNT_HEAD);
- my_uca_add_contraction_flag(cs, r->curr[1], MY_UCA_CNT_TAIL);
- to= my_uca_add_contraction(cs, r->curr, 2)->weight;
- /* Copy weight from the reset character */
- to[0]= my_char_weight_addr(cs, r->base)[0];
- /* Apply primary difference */
- to[0]+= r->diff[0];
- }
- }
+ rc= 1;
+ goto ex;
}
- return 0;
+ cs->uca[0]= new_uca;
+
+ex:
+ (loader->free)(rules.rule);
+ if (rc != 0 && loader->error[0])
+ loader->reporter(ERROR_LEVEL, "%s", loader->error);
+ return rc;
}
@@ -8161,12 +9424,14 @@ static my_bool create_tailoring(struct charset_info_st *cs,
Should work for any character set.
*/
-static my_bool my_coll_init_uca(struct charset_info_st *cs,
- void *(*alloc)(size_t))
+static my_bool
+my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
cs->pad_char= ' ';
cs->ctype= my_charset_utf8_unicode_ci.ctype;
- return create_tailoring(cs, alloc);
+ if (!cs->caseinfo)
+ cs->caseinfo= &my_unicase_default;
+ return create_tailoring(cs, loader);
}
static int my_strnncoll_any_uca(CHARSET_INFO *cs,
@@ -8213,7 +9478,7 @@ static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
- return my_strnncoll_uca(cs, &my_ucs2_uca_scanner_handler,
+ return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen, t_is_prefix);
}
@@ -8222,7 +9487,7 @@ static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs,
const uchar *t, size_t tlen,
my_bool diff_if_only_endspace_difference)
{
- return my_strnncollsp_uca(cs, &my_ucs2_uca_scanner_handler,
+ return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen,
diff_if_only_endspace_difference);
}
@@ -8231,14 +9496,14 @@ static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
ulong *n1, ulong *n2)
{
- my_hash_sort_uca(cs, &my_ucs2_uca_scanner_handler, s, slen, n1, n2);
+ my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
}
static size_t my_strnxfrm_ucs2_uca(CHARSET_INFO *cs,
uchar *dst, size_t dstlen,
const uchar *src, size_t srclen)
{
- return my_strnxfrm_uca(cs, &my_ucs2_uca_scanner_handler,
+ return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
dst, dstlen, src, srclen);
}
@@ -8268,12 +9533,11 @@ struct charset_info_st my_charset_ucs2_unicode_ci=
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
- uca_length, /* sort_order */
- NULL, /* contractions */
- uca_weight, /* sort_order_big*/
+ NULL, /* sort_order */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8301,11 +9565,10 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8333,11 +9596,10 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8365,11 +9627,10 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8397,11 +9658,10 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8429,11 +9689,10 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8461,11 +9720,10 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8493,11 +9751,10 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8525,11 +9782,10 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8557,11 +9813,10 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_turkish, /* caseinfo */
+ &my_unicase_turkish,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8589,11 +9844,10 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8622,11 +9876,10 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8654,11 +9907,10 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8686,11 +9938,10 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8718,11 +9969,10 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8751,11 +10001,10 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8784,11 +10033,10 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8817,11 +10065,10 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8850,11 +10097,10 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8882,11 +10128,43 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 2, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_ucs2_handler,
+ &my_collation_ucs2_uca_handler
+};
+
+
+
+struct charset_info_st my_charset_ucs2_german2_uca_ci=
+{
+ 148,0,0, /* number */
+ MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
+ "ucs2", /* csname */
+ "ucs2_german2_ci", /* name */
+ "", /* comment */
+ german2, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8914,11 +10192,10 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -8934,6 +10211,7 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci=
&my_collation_ucs2_uca_handler
};
+
#endif
@@ -8981,7 +10259,7 @@ static uchar ctype_utf8[] = {
extern MY_CHARSET_HANDLER my_charset_utf8_handler;
-#define MY_CS_UTF8MB3_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE)
+#define MY_CS_UTF8MB3_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE)
struct charset_info_st my_charset_utf8_unicode_ci=
{
@@ -8994,12 +10272,11 @@ struct charset_info_st my_charset_utf8_unicode_ci=
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
- uca_length, /* sort_order */
- NULL, /* contractions */
- uca_weight, /* sort_order_big*/
+ NULL, /* sort_order */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9019,7 +10296,7 @@ struct charset_info_st my_charset_utf8_unicode_ci=
struct charset_info_st my_charset_utf8_icelandic_uca_ci=
{
193,0,0, /* number */
- MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
+ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */
"utf8", /* cs name */
"utf8_icelandic_ci",/* name */
"", /* comment */
@@ -9028,11 +10305,10 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9060,11 +10336,10 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9092,11 +10367,10 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9124,11 +10398,10 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9156,11 +10429,10 @@ struct charset_info_st my_charset_utf8_polish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9188,11 +10460,10 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9220,11 +10491,10 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9252,11 +10522,10 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9284,11 +10553,10 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_turkish, /* caseinfo */
+ &my_unicase_turkish,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9316,11 +10584,10 @@ struct charset_info_st my_charset_utf8_czech_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9349,11 +10616,10 @@ struct charset_info_st my_charset_utf8_danish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9381,11 +10647,10 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9413,11 +10678,10 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9445,11 +10709,10 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9477,11 +10740,10 @@ struct charset_info_st my_charset_utf8_roman_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9509,11 +10771,10 @@ struct charset_info_st my_charset_utf8_persian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9541,11 +10802,10 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9573,11 +10833,10 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9605,11 +10864,42 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 3, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8_handler,
+ &my_collation_any_uca_handler
+};
+
+
+struct charset_info_st my_charset_utf8_german2_uca_ci=
+{
+ 212,0,0, /* number */
+ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */
+ MY_UTF8MB3, /* cs name */
+ MY_UTF8MB3 "_german2_ci",/* name */
+ "", /* comment */
+ german2, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9627,36 +10917,36 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci=
struct charset_info_st my_charset_utf8_croatian_uca_ci=
{
- 213,0,0, /* number */
- MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
- "utf8", /* cs name */
- "utf8_croatian_ci", /* name */
- "", /* comment */
- croatian, /* tailoring */
- ctype_utf8, /* ctype */
- NULL, /* to_lower */
- NULL, /* to_upper */
- NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
- NULL, /* tab_to_uni */
- NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
- NULL, /* state_map */
- NULL, /* ident_map */
- 8, /* strxfrm_multiply */
- 1, /* caseup_multiply */
- 1, /* casedn_multiply */
- 1, /* mbminlen */
- 3, /* mbmaxlen */
- 9, /* min_sort_char */
- 0xFFFF, /* max_sort_char */
- ' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 213,0,0, /* number */
+ MY_CS_UTF8MB3_UCA_FLAGS,/* flags */
+ MY_UTF8MB3, /* cs name */
+ MY_UTF8MB3 "_croatian_ci",/* name */
+ "", /* comment */
+ croatian, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_default, /* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 3, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
+
#endif /* HAVE_CHARSET_utf8 */
@@ -9677,12 +10967,11 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci=
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
- uca_length, /* sort_order */
- NULL, /* contractions */
- uca_weight, /* sort_order_big*/
+ NULL, /* sort_order */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9711,11 +11000,10 @@ struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9743,11 +11031,10 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9775,11 +11062,10 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9807,11 +11093,10 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9839,11 +11124,10 @@ struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9871,11 +11155,10 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9903,11 +11186,10 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9935,11 +11217,10 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9967,11 +11248,10 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_turkish, /* caseinfo */
+ &my_unicase_turkish, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -9999,11 +11279,10 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10032,11 +11311,10 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10064,11 +11342,10 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10096,11 +11373,10 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10128,11 +11404,10 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10160,11 +11435,10 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10192,11 +11466,10 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10224,11 +11497,10 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10256,11 +11528,10 @@ struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10288,11 +11559,41 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 1, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf8mb4_handler,
+ &my_collation_any_uca_handler
+};
+
+struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
+{
+ 244,0,0, /* number */
+ MY_CS_UTF8MB4_UCA_FLAGS,/* state */
+ MY_UTF8MB4, /* csname */
+ MY_UTF8MB4 "_german2_ci",/* name */
+ "", /* comment */
+ german2, /* tailoring */
+ ctype_utf8, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10320,11 +11621,10 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10375,12 +11675,11 @@ struct charset_info_st my_charset_utf32_unicode_ci=
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
- uca_length, /* sort_order */
- NULL, /* contractions */
- uca_weight, /* sort_order_big*/
+ NULL, /* sort_order */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10409,11 +11708,10 @@ struct charset_info_st my_charset_utf32_icelandic_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10441,11 +11739,10 @@ struct charset_info_st my_charset_utf32_latvian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10473,11 +11770,10 @@ struct charset_info_st my_charset_utf32_romanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10505,11 +11801,10 @@ struct charset_info_st my_charset_utf32_slovenian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10537,11 +11832,10 @@ struct charset_info_st my_charset_utf32_polish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10569,11 +11863,10 @@ struct charset_info_st my_charset_utf32_estonian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10601,11 +11894,10 @@ struct charset_info_st my_charset_utf32_spanish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10633,11 +11925,10 @@ struct charset_info_st my_charset_utf32_swedish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10665,11 +11956,10 @@ struct charset_info_st my_charset_utf32_turkish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_turkish, /* caseinfo */
+ &my_unicase_turkish, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10697,11 +11987,10 @@ struct charset_info_st my_charset_utf32_czech_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10730,11 +12019,10 @@ struct charset_info_st my_charset_utf32_danish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10762,11 +12050,10 @@ struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10794,11 +12081,10 @@ struct charset_info_st my_charset_utf32_slovak_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10826,11 +12112,10 @@ struct charset_info_st my_charset_utf32_spanish2_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10858,11 +12143,10 @@ struct charset_info_st my_charset_utf32_roman_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10890,11 +12174,10 @@ struct charset_info_st my_charset_utf32_persian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10922,11 +12205,10 @@ struct charset_info_st my_charset_utf32_esperanto_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10954,11 +12236,10 @@ struct charset_info_st my_charset_utf32_hungarian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -10986,11 +12267,41 @@ struct charset_info_st my_charset_utf32_sinhala_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 4, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf32_handler,
+ &my_collation_utf32_uca_handler
+};
+
+struct charset_info_st my_charset_utf32_german2_uca_ci=
+{
+ 180,0,0, /* number */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
+ "utf32_german2_ci", /* name */
+ "", /* comment */
+ german2, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11009,8 +12320,8 @@ struct charset_info_st my_charset_utf32_sinhala_uca_ci=
struct charset_info_st my_charset_utf32_croatian_uca_ci=
{
214,0,0, /* number */
- MY_CS_UTF32_UCA_FLAGS /* state */,
- "utf32", /* cs name */
+ MY_CS_UTF32_UCA_FLAGS,/* state */
+ "utf32", /* csname */
"utf32_croatian_ci", /* name */
"", /* comment */
croatian, /* tailoring */
@@ -11018,11 +12329,10 @@ struct charset_info_st my_charset_utf32_croatian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11037,6 +12347,7 @@ struct charset_info_st my_charset_utf32_croatian_uca_ci=
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
};
+
#endif /* HAVE_CHARSET_utf32 */
@@ -11073,12 +12384,11 @@ struct charset_info_st my_charset_utf16_unicode_ci=
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
- uca_length, /* sort_order */
- NULL, /* contractions */
- uca_weight, /* sort_order_big*/
+ NULL, /* sort_order */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11107,11 +12417,10 @@ struct charset_info_st my_charset_utf16_icelandic_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11139,11 +12448,10 @@ struct charset_info_st my_charset_utf16_latvian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11171,11 +12479,10 @@ struct charset_info_st my_charset_utf16_romanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11203,11 +12510,10 @@ struct charset_info_st my_charset_utf16_slovenian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11235,11 +12541,10 @@ struct charset_info_st my_charset_utf16_polish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11267,11 +12572,10 @@ struct charset_info_st my_charset_utf16_estonian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11299,11 +12603,10 @@ struct charset_info_st my_charset_utf16_spanish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11331,11 +12634,10 @@ struct charset_info_st my_charset_utf16_swedish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11363,11 +12665,10 @@ struct charset_info_st my_charset_utf16_turkish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_turkish, /* caseinfo */
+ &my_unicase_turkish, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11395,11 +12696,10 @@ struct charset_info_st my_charset_utf16_czech_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11428,11 +12728,10 @@ struct charset_info_st my_charset_utf16_danish_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11460,11 +12759,10 @@ struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11492,11 +12790,10 @@ struct charset_info_st my_charset_utf16_slovak_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11524,11 +12821,10 @@ struct charset_info_st my_charset_utf16_spanish2_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11556,11 +12852,10 @@ struct charset_info_st my_charset_utf16_roman_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11588,11 +12883,10 @@ struct charset_info_st my_charset_utf16_persian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11620,11 +12914,10 @@ struct charset_info_st my_charset_utf16_esperanto_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11652,11 +12945,10 @@ struct charset_info_st my_charset_utf16_hungarian_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default,/* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11684,11 +12976,10 @@ struct charset_info_st my_charset_utf16_sinhala_uca_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default,/* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
@@ -11704,114 +12995,72 @@ struct charset_info_st my_charset_utf16_sinhala_uca_ci=
&my_collation_utf16_uca_handler
};
-struct charset_info_st my_charset_utf16_croatian_uca_ci=
+struct charset_info_st my_charset_utf16_german2_uca_ci=
{
- 215,0,0, /* number */
- MY_CS_UTF16_UCA_FLAGS /* state */,
- "utf16", /* cs name */
- "utf16_croatian_ci", /* name */
- "", /* comment */
- croatian, /* tailoring */
- NULL, /* ctype */
- NULL, /* to_lower */
- NULL, /* to_upper */
- NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
- NULL, /* tab_to_uni */
- NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
- NULL, /* state_map */
- NULL, /* ident_map */
- 8, /* strxfrm_multiply */
- 1, /* caseup_multiply */
- 1, /* casedn_multiply */
- 2, /* mbminlen */
- 4, /* mbmaxlen */
- 9, /* min_sort_char */
- 0xFFFF, /* max_sort_char */
- ' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 121,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_german2_ci",/* name */
+ "", /* comment */
+ german2, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_default,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
};
-#endif /* HAVE_CHARSET_utf16 */
-
-#endif /* HAVE_UCA_COLLATIONS */
-
-/**
- Check if UCA data has contractions (public version)
-
- @cs Pointer to CHARSET_INFO data
- @retval 0 - no contraction, 1 - have contractions.
-*/
-
-my_bool
-my_cs_have_contractions(CHARSET_INFO *cs)
-{
- return cs->contractions != NULL;
-}
-
-/**
- Check if a character can be contraction head
-
- @cs Pointer to CHARSET_INFO data
- @wc Code point
-
- @retval 0 - cannot be contraction head
- @retval 1 - can be contraction head
-*/
-
-my_bool
-my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
+struct charset_info_st my_charset_utf16_croatian_uca_ci=
{
- return cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_HEAD;
-}
-
-
-/**
- Check if a character can be contraction tail
-
- @cs Pointer to CHARSET_INFO data
- @wc Code point
-
- @retval 0 - cannot be contraction tail
- @retval 1 - can be contraction tail
-*/
+ 215,0,0, /* number */
+ MY_CS_UTF16_UCA_FLAGS,/* state */
+ "utf16", /* cs name */
+ "utf16_croatian_ci",/* name */
+ "", /* comment */
+ croatian, /* tailoring */
+ NULL, /* ctype */
+ NULL, /* to_lower */
+ NULL, /* to_upper */
+ NULL, /* sort_order */
+ NULL, /* uca */
+ NULL, /* tab_to_uni */
+ NULL, /* tab_from_uni */
+ &my_unicase_default,/* caseinfo */
+ NULL, /* state_map */
+ NULL, /* ident_map */
+ 8, /* strxfrm_multiply */
+ 1, /* caseup_multiply */
+ 1, /* casedn_multiply */
+ 2, /* mbminlen */
+ 4, /* mbmaxlen */
+ 9, /* min_sort_char */
+ 0xFFFF, /* max_sort_char */
+ ' ', /* pad char */
+ 0, /* escape_with_backslash_is_dangerous */
+ &my_charset_utf16_handler,
+ &my_collation_utf16_uca_handler
+};
-my_bool
-my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
-{
- return cs->contractions->flags[wc & MY_UCA_CNT_FLAG_MASK] & MY_UCA_CNT_TAIL;
-}
+#endif /* HAVE_CHARSET_utf16 */
-/**
- Find a contraction and return its weight array
-
- @cs Pointer to CHARSET data
- @wc1 First character
- @wc2 Second character
-
- @return Weight array
- @retval NULL - no contraction found
- @retval ptr - contraction weight array
-*/
-const uint16 *
-my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
-{
- const MY_CONTRACTIONS *list= cs->contractions;
- const MY_CONTRACTION *c, *last;
- for (c= list->item, last= &list->item[list->nitems]; c < last; c++)
- {
- if (c->ch[0] == wc1 && c->ch[1] == wc2)
- {
- return c->weight;
- }
- }
- return NULL;
-}
+#endif /* HAVE_UCA_COLLATIONS */
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 26f15584bcd..a5845a26917 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1161,31 +1161,31 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
static inline void
-my_tolower_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- if (page < 256 && uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].tolower;
+ MY_UNICASE_CHARACTER *page;
+ if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
+ *wc= page[*wc & 0xFF].tolower;
}
static inline void
-my_toupper_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- if (page < 256 && uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].toupper;
+ MY_UNICASE_CHARACTER *page;
+ if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
+ *wc= page[*wc & 0xFF].toupper;
}
static inline void
-my_tosort_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- if (page < 256)
+ if (*wc <= uni_plane->maxchar)
{
- if (uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].sort;
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[*wc >> 8]))
+ *wc= page[*wc & 0xFF].sort;
}
else
{
@@ -1194,6 +1194,7 @@ my_tosort_utf16(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
}
+
static size_t
my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst __attribute__((unused)),
@@ -1204,7 +1205,7 @@ my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@@ -1227,7 +1228,7 @@ my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int res;
const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen);
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
{
@@ -1251,7 +1252,7 @@ my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb;
int res;
char *srcend= src + srclen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@@ -1277,7 +1278,7 @@ my_strnncoll_utf16(CHARSET_INFO *cs,
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se= s + slen;
const uchar *te= t + tlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s < se && t < te)
{
@@ -1341,7 +1342,7 @@ my_strnncollsp_utf16(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
const uchar *se= s + slen, *te= t + tlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT((slen % 2) == 0);
DBUG_ASSERT((tlen % 2) == 0);
@@ -1483,7 +1484,7 @@ my_wildcmp_utf16_ci(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
@@ -1695,11 +1696,10 @@ struct charset_info_st my_charset_utf16_general_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -1728,11 +1728,10 @@ struct charset_info_st my_charset_utf16_bin=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -1864,11 +1863,10 @@ struct charset_info_st my_charset_utf16le_general_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -1897,11 +1895,10 @@ struct charset_info_st my_charset_utf16le_bin=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -1950,31 +1947,31 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
static inline void
-my_tolower_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- if (page < 256 && uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].tolower;
+ MY_UNICASE_CHARACTER *page;
+ if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
+ *wc= page[*wc & 0xFF].tolower;
}
static inline void
-my_toupper_utf32(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- if (page < 256 && uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].toupper;
+ MY_UNICASE_CHARACTER *page;
+ if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
+ *wc= page[*wc & 0xFF].toupper;
}
static inline void
-my_tosort_utf32(MY_UNICASE_INFO *const* uni_plane, my_wc_t *wc)
+my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- if (page < 256)
+ if (*wc <= uni_plane->maxchar)
{
- if (uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].sort;
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[*wc >> 8]))
+ *wc= page[*wc & 0xFF].sort;
}
else
{
@@ -1991,7 +1988,7 @@ my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@@ -2021,7 +2018,7 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e= s + slen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/* Skip trailing spaces */
while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
@@ -2047,7 +2044,7 @@ my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
@@ -2070,7 +2067,7 @@ my_strnncoll_utf32(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
const uchar *se= s + slen;
const uchar *te= t + tlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s < se && t < te)
{
@@ -2134,7 +2131,7 @@ my_strnncollsp_utf32(CHARSET_INFO *cs,
int res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
const uchar *se= s + slen, *te= t + tlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT((slen % 4) == 0);
DBUG_ASSERT((tlen % 4) == 0);
@@ -2582,7 +2579,7 @@ my_wildcmp_utf32_ci(CHARSET_INFO *cs,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many)
{
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
@@ -2790,11 +2787,10 @@ struct charset_info_st my_charset_utf32_general_ci=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -2823,11 +2819,10 @@ struct charset_info_st my_charset_utf32_bin=
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -2934,32 +2929,29 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
static inline void
-my_tolower_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
+my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- DBUG_ASSERT(page < 256);
- if (uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].tolower;
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
+ *wc= page[*wc & 0xFF].tolower;
}
static inline void
-my_toupper_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
+my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- DBUG_ASSERT(page < 256);
- if (uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].toupper;
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
+ *wc= page[*wc & 0xFF].toupper;
}
static inline void
-my_tosort_ucs2(MY_UNICASE_INFO *const *uni_plane, my_wc_t *wc)
+my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- uint page= *wc >> 8;
- DBUG_ASSERT(page < 256);
- if (uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].sort;
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
+ *wc= page[*wc & 0xFF].sort;
}
static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
@@ -2969,7 +2961,7 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@@ -2990,7 +2982,7 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e=s+slen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
e-= 2;
@@ -3014,7 +3006,7 @@ static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int res;
char *srcend= src + srclen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src == dst && srclen == dstlen);
while ((src < srcend) &&
@@ -3062,7 +3054,7 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc),UNINIT_VAR(t_wc);
const uchar *se=s+slen;
const uchar *te=t+tlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while ( s < se && t < te )
{
@@ -3124,7 +3116,7 @@ static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
{
const uchar *se, *te;
size_t minlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/* extra safety to make sure the lengths are even numbers */
slen&= ~1;
@@ -3135,11 +3127,11 @@ static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
{
- int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
- (((int) s[0]) << 8) + (int) s[1];
+ int s_wc = uni_plane->page[s[0]] ? (int) uni_plane->page[s[0]][s[1]].sort :
+ (((int) s[0]) << 8) + (int) s[1];
- int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort :
- (((int) t[0]) << 8) + (int) t[1];
+ int t_wc = uni_plane->page[t[0]] ? (int) uni_plane->page[t[0]][t[1]].sort :
+ (((int) t[0]) << 8) + (int) t[1];
if ( s_wc != t_wc )
return s_wc > t_wc ? 1 : -1;
@@ -3220,7 +3212,7 @@ int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
@@ -3412,11 +3404,10 @@ struct charset_info_st my_charset_ucs2_general_ci=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -3445,11 +3436,10 @@ struct charset_info_st my_charset_ucs2_general_mysql500_ci=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_mysql500, /* caseinfo */
+ &my_unicase_mysql500, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -3478,11 +3468,10 @@ struct charset_info_st my_charset_ucs2_bin=
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 2743efc4087..0f405825830 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -65988,7 +65988,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *cs __attribute__((unused)),
/* Case info pages for JIS-X-0208 range */
-static MY_UNICASE_INFO cA2[256]=
+static MY_UNICASE_CHARACTER cA2[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66109,7 +66109,7 @@ static MY_UNICASE_INFO cA2[256]=
};
-static MY_UNICASE_INFO cA3[256]=
+static MY_UNICASE_CHARACTER cA3[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66230,7 +66230,7 @@ static MY_UNICASE_INFO cA3[256]=
};
-static MY_UNICASE_INFO cA6[256]=
+static MY_UNICASE_CHARACTER cA6[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66351,7 +66351,7 @@ static MY_UNICASE_INFO cA6[256]=
};
-static MY_UNICASE_INFO cA7[256]=
+static MY_UNICASE_CHARACTER cA7[256]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66473,7 +66473,7 @@ static MY_UNICASE_INFO cA7[256]=
/* Case info pages for JIS-X-0212 range */
-static MY_UNICASE_INFO c8FA6[]=
+static MY_UNICASE_CHARACTER c8FA6[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66594,7 +66594,7 @@ static MY_UNICASE_INFO c8FA6[]=
};
-static MY_UNICASE_INFO c8FA7[]=
+static MY_UNICASE_CHARACTER c8FA7[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66715,7 +66715,7 @@ static MY_UNICASE_INFO c8FA7[]=
};
-static MY_UNICASE_INFO c8FA9[]=
+static MY_UNICASE_CHARACTER c8FA9[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66836,7 +66836,7 @@ static MY_UNICASE_INFO c8FA9[]=
};
-static MY_UNICASE_INFO c8FAA[]=
+static MY_UNICASE_CHARACTER c8FAA[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -66957,7 +66957,7 @@ static MY_UNICASE_INFO c8FAA[]=
};
-static MY_UNICASE_INFO c8FAB[]=
+static MY_UNICASE_CHARACTER c8FAB[]=
{
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0}, /* xx00 */
{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
@@ -67078,7 +67078,7 @@ static MY_UNICASE_INFO c8FAB[]=
};
-static MY_UNICASE_INFO *my_caseinfo_ujis[512]=
+static MY_UNICASE_CHARACTER *my_caseinfo_pages_ujis[512]=
{
/* JIS-X-0208 */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 0 */
@@ -67148,6 +67148,15 @@ static MY_UNICASE_INFO *my_caseinfo_ujis[512]=
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* F */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
+
+static MY_UNICASE_INFO my_caseinfo_ujis=
+{
+ 0x0FFFF,
+ my_caseinfo_pages_ujis
+};
+
+
+
#endif /* HAVE_CHARSET_ujis */
@@ -67158,11 +67167,11 @@ static MY_UNICASE_INFO *my_caseinfo_ujis[512]=
UJIS and EUCJPMS share the same UPPER/LOWER functions.
*/
-static MY_UNICASE_INFO*
+static MY_UNICASE_CHARACTER*
get_case_info_for_ch(CHARSET_INFO *cs, uint plane, uint page, uint offs)
{
- MY_UNICASE_INFO *p;
- return (p= cs->caseinfo[page + plane * 256]) ? &p[offs & 0xFF] : NULL;
+ MY_UNICASE_CHARACTER *p;
+ return (p= cs->caseinfo->page[page + plane * 256]) ? &p[offs & 0xFF] : NULL;
}
@@ -67183,7 +67192,7 @@ my_casefold_ujis(CHARSET_INFO *cs,
size_t mblen= my_ismbchar(cs, src, srcend);
if (mblen)
{
- MY_UNICASE_INFO *ch;
+ MY_UNICASE_CHARACTER *ch;
ch= (mblen == 2) ?
get_case_info_for_ch(cs, 0, (uchar) src[0], (uchar) src[1]) :
get_case_info_for_ch(cs, 1, (uchar) src[1], (uchar) src[2]);
@@ -67304,11 +67313,10 @@ struct charset_info_st my_charset_ujis_japanese_ci=
to_lower_ujis,
to_upper_ujis,
sort_order_ujis,
- NULL, /* sort_order_big*/
- NULL, /* contractions */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_ujis, /* caseinfo */
+ &my_caseinfo_ujis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -67337,11 +67345,10 @@ struct charset_info_st my_charset_ujis_bin=
to_lower_ujis,
to_upper_ujis,
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_caseinfo_ujis, /* caseinfo */
+ &my_caseinfo_ujis, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index fe25f288d5f..ae891b43d37 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -60,7 +60,7 @@
#include "my_uctype.h"
-static MY_UNICASE_INFO plane00[]={
+static MY_UNICASE_CHARACTER plane00[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
@@ -196,7 +196,7 @@ static MY_UNICASE_INFO plane00[]={
Almost similar to plane00, but maps sorting order
for U+00DF to 0x00DF instead of 0x0053.
*/
-static MY_UNICASE_INFO plane00_mysql500[]={
+static MY_UNICASE_CHARACTER plane00_mysql500[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
@@ -328,7 +328,7 @@ static MY_UNICASE_INFO plane00_mysql500[]={
};
-static MY_UNICASE_INFO plane01[]={
+static MY_UNICASE_CHARACTER plane01[]={
{0x0100,0x0101,0x0041}, {0x0100,0x0101,0x0041},
{0x0102,0x0103,0x0041}, {0x0102,0x0103,0x0041},
{0x0104,0x0105,0x0041}, {0x0104,0x0105,0x0041},
@@ -459,7 +459,7 @@ static MY_UNICASE_INFO plane01[]={
{0x01FE,0x01FF,0x00D8}, {0x01FE,0x01FF,0x00D8}
};
-static MY_UNICASE_INFO plane02[]={
+static MY_UNICASE_CHARACTER plane02[]={
{0x0200,0x0201,0x0041}, {0x0200,0x0201,0x0041},
{0x0202,0x0203,0x0041}, {0x0202,0x0203,0x0041},
{0x0204,0x0205,0x0045}, {0x0204,0x0205,0x0045},
@@ -590,7 +590,7 @@ static MY_UNICASE_INFO plane02[]={
{0x02FE,0x02FE,0x02FE}, {0x02FF,0x02FF,0x02FF}
};
-static MY_UNICASE_INFO plane03[]={
+static MY_UNICASE_CHARACTER plane03[]={
{0x0300,0x0300,0x0300}, {0x0301,0x0301,0x0301},
{0x0302,0x0302,0x0302}, {0x0303,0x0303,0x0303},
{0x0304,0x0304,0x0304}, {0x0305,0x0305,0x0305},
@@ -721,7 +721,7 @@ static MY_UNICASE_INFO plane03[]={
{0x03FE,0x03FE,0x03FE}, {0x03FF,0x03FF,0x03FF}
};
-static MY_UNICASE_INFO plane04[]={
+static MY_UNICASE_CHARACTER plane04[]={
{0x0400,0x0450,0x0415}, {0x0401,0x0451,0x0415},
{0x0402,0x0452,0x0402}, {0x0403,0x0453,0x0413},
{0x0404,0x0454,0x0404}, {0x0405,0x0455,0x0405},
@@ -852,7 +852,7 @@ static MY_UNICASE_INFO plane04[]={
{0x04FE,0x04FE,0x04FE}, {0x04FF,0x04FF,0x04FF}
};
-static MY_UNICASE_INFO plane05[]={
+static MY_UNICASE_CHARACTER plane05[]={
{0x0500,0x0500,0x0500}, {0x0501,0x0501,0x0501},
{0x0502,0x0502,0x0502}, {0x0503,0x0503,0x0503},
{0x0504,0x0504,0x0504}, {0x0505,0x0505,0x0505},
@@ -983,7 +983,7 @@ static MY_UNICASE_INFO plane05[]={
{0x05FE,0x05FE,0x05FE}, {0x05FF,0x05FF,0x05FF}
};
-static MY_UNICASE_INFO plane1E[]={
+static MY_UNICASE_CHARACTER plane1E[]={
{0x1E00,0x1E01,0x0041}, {0x1E00,0x1E01,0x0041},
{0x1E02,0x1E03,0x0042}, {0x1E02,0x1E03,0x0042},
{0x1E04,0x1E05,0x0042}, {0x1E04,0x1E05,0x0042},
@@ -1114,7 +1114,7 @@ static MY_UNICASE_INFO plane1E[]={
{0x1EFE,0x1EFE,0x1EFE}, {0x1EFF,0x1EFF,0x1EFF}
};
-static MY_UNICASE_INFO plane1F[]={
+static MY_UNICASE_CHARACTER plane1F[]={
{0x1F08,0x1F00,0x0391}, {0x1F09,0x1F01,0x0391},
{0x1F0A,0x1F02,0x0391}, {0x1F0B,0x1F03,0x0391},
{0x1F0C,0x1F04,0x0391}, {0x1F0D,0x1F05,0x0391},
@@ -1245,7 +1245,7 @@ static MY_UNICASE_INFO plane1F[]={
{0x1FFE,0x1FFE,0x1FFE}, {0x1FFF,0x1FFF,0x1FFF}
};
-static MY_UNICASE_INFO plane21[]={
+static MY_UNICASE_CHARACTER plane21[]={
{0x2100,0x2100,0x2100}, {0x2101,0x2101,0x2101},
{0x2102,0x2102,0x2102}, {0x2103,0x2103,0x2103},
{0x2104,0x2104,0x2104}, {0x2105,0x2105,0x2105},
@@ -1376,7 +1376,7 @@ static MY_UNICASE_INFO plane21[]={
{0x21FE,0x21FE,0x21FE}, {0x21FF,0x21FF,0x21FF}
};
-static MY_UNICASE_INFO plane24[]={
+static MY_UNICASE_CHARACTER plane24[]={
{0x2400,0x2400,0x2400}, {0x2401,0x2401,0x2401},
{0x2402,0x2402,0x2402}, {0x2403,0x2403,0x2403},
{0x2404,0x2404,0x2404}, {0x2405,0x2405,0x2405},
@@ -1507,7 +1507,7 @@ static MY_UNICASE_INFO plane24[]={
{0x24FE,0x24FE,0x24FE}, {0x24FF,0x24FF,0x24FF}
};
-static MY_UNICASE_INFO planeFF[]={
+static MY_UNICASE_CHARACTER planeFF[]={
{0xFF00,0xFF00,0xFF00}, {0xFF01,0xFF01,0xFF01},
{0xFF02,0xFF02,0xFF02}, {0xFF03,0xFF03,0xFF03},
{0xFF04,0xFF04,0xFF04}, {0xFF05,0xFF05,0xFF05},
@@ -1638,7 +1638,9 @@ static MY_UNICASE_INFO planeFF[]={
{0xFFFE,0xFFFE,0xFFFE}, {0xFFFF,0xFFFF,0xFFFF}
};
-MY_UNICASE_INFO *const my_unicase_default[256]={
+
+static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
+{
plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1671,14 +1673,20 @@ MY_UNICASE_INFO *const my_unicase_default[256]={
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, planeFF
+};
+
+MY_UNICASE_INFO my_unicase_default=
+{
+ 0xFFFF,
+ my_unicase_pages_default
};
/*
Reproduce old utf8_general_ci behaviour before we fixed Bug#27877.
*/
-MY_UNICASE_INFO *const my_unicase_mysql500[256]={
+MY_UNICASE_CHARACTER *my_unicase_pages_mysql500[256]={
plane00_mysql500,
plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1716,6 +1724,13 @@ MY_UNICASE_INFO *const my_unicase_mysql500[256]={
};
+MY_UNICASE_INFO my_unicase_mysql500=
+{
+ 0xFFFF,
+ my_unicase_pages_mysql500
+};
+
+
/*
Turkish lower/upper mapping:
1. LOWER(0x0049 LATIN CAPITAL LETTER I) ->
@@ -1724,7 +1739,7 @@ MY_UNICASE_INFO *const my_unicase_mysql500[256]={
0x0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
-static MY_UNICASE_INFO turk00[]=
+static MY_UNICASE_CHARACTER turk00[]=
{
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
@@ -1858,7 +1873,7 @@ static MY_UNICASE_INFO turk00[]=
-MY_UNICASE_INFO *const my_unicase_turkish[256]=
+static MY_UNICASE_CHARACTER *my_unicase_pages_turkish[256]=
{
turk00, plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -1895,14 +1910,23 @@ MY_UNICASE_INFO *const my_unicase_turkish[256]=
};
+MY_UNICASE_INFO my_unicase_turkish=
+{
+ 0xFFFF,
+ my_unicase_pages_turkish
+};
+
+
static inline void
-my_tosort_unicode(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_tosort_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc, uint flags)
{
- int page= *wc >> 8;
- if (page < 256)
+ if (*wc <= uni_plane->maxchar)
{
- if (uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].sort;
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[*wc >> 8]))
+ *wc= (flags & MY_CS_LOWER_SORT) ?
+ page[*wc & 0xFF].tolower :
+ page[*wc & 0xFF].sort;
}
else
{
@@ -1925,7 +1949,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
- MY_UNICASE_INFO *const *weights, int recurse_level)
+ MY_UNICASE_INFO *weights, int recurse_level)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
@@ -1974,8 +1998,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
{
if (weights)
{
- my_tosort_unicode(weights, &s_wc);
- my_tosort_unicode(weights, &w_wc);
+ my_tosort_unicode(weights, &s_wc, cs->state);
+ my_tosort_unicode(weights, &w_wc, cs->state);
}
if (s_wc != w_wc)
return 1; /* No match */
@@ -2045,8 +2069,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
return 1;
if (weights)
{
- my_tosort_unicode(weights, &s_wc);
- my_tosort_unicode(weights, &w_wc);
+ my_tosort_unicode(weights, &s_wc, cs->state);
+ my_tosort_unicode(weights, &w_wc, cs->state);
}
if (s_wc == w_wc)
@@ -2074,7 +2098,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
- MY_UNICASE_INFO *const *weights)
+ MY_UNICASE_INFO *weights)
{
return my_wildcmp_unicode_impl(cs, str, str_end,
wildstr, wildend,
@@ -2099,7 +2123,7 @@ my_strnxfrm_unicode(CHARSET_INFO *cs,
uchar *de= dst + dstlen;
uchar *de_beg= de - 1;
const uchar *se = src + srclen;
- MY_UNICASE_INFO * const*uni_plane= (cs->state & MY_CS_BINSORT) ?
+ MY_UNICASE_INFO *uni_plane= (cs->state & MY_CS_BINSORT) ?
NULL : cs->caseinfo;
DBUG_ASSERT(src);
@@ -2110,7 +2134,7 @@ my_strnxfrm_unicode(CHARSET_INFO *cs,
src+=res;
if (uni_plane)
- my_tosort_unicode(uni_plane, &wc);
+ my_tosort_unicode(uni_plane, &wc, cs->state);
*dst++= (uchar) (wc >> 8);
if (dst < de)
@@ -2476,20 +2500,45 @@ static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
}
+static inline void
+my_tolower_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
+{
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
+ *wc= page[*wc & 0xFF].tolower;
+}
+
+
+static inline void
+my_toupper_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
+{
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
+ *wc= page[*wc & 0xFF].toupper;
+}
+
+
+static inline void
+my_tosort_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
+{
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
+ *wc= page[*wc & 0xFF].sort;
+}
+
static size_t my_caseup_utf8(CHARSET_INFO *cs, char *src, size_t srclen,
char *dst, size_t dstlen)
{
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->caseup_multiply == 1);
while ((src < srcend) &&
(srcres= my_utf8_uni(cs, &wc, (uchar *) src, (uchar*) srcend)) > 0)
{
- int plane= (wc>>8) & 0xFF;
- wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ my_toupper_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
break;
src+= srcres;
@@ -2505,7 +2554,7 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e=s+slen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/*
Remove end space. We have to do this to be able to compare
@@ -2516,8 +2565,7 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{
- int plane = (wc>>8) & 0xFF;
- wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
+ my_tosort_unicode(uni_plane, &wc, cs->state);
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
n2[0]+=3;
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
@@ -2532,14 +2580,13 @@ static size_t my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->caseup_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
- int plane= (wc>>8) & 0xFF;
- wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
+ my_toupper_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
@@ -2556,14 +2603,13 @@ static size_t my_casedn_utf8(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->casedn_multiply == 1);
while ((src < srcend) &&
(srcres= my_utf8_uni(cs, &wc, (uchar*) src, (uchar*)srcend)) > 0)
{
- int plane= (wc>>8) & 0xFF;
- wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ my_tolower_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8(cs, wc, (uchar*) dst, (uchar*) dstend)) <= 0)
break;
src+= srcres;
@@ -2578,14 +2624,13 @@ static size_t my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->casedn_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
- int plane= (wc>>8) & 0xFF;
- wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
+ my_tolower_utf8mb3(uni_plane, &wc);
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
@@ -2621,11 +2666,10 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
const uchar *se=s+slen;
const uchar *te=t+tlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while ( s < se && t < te )
{
- int plane;
s_res=my_utf8_uni(cs,&s_wc, s, se);
t_res=my_utf8_uni(cs,&t_wc, t, te);
@@ -2635,10 +2679,9 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
return bincmp(s, se, t, te);
}
- plane=(s_wc>>8) & 0xFF;
- s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
- plane=(t_wc>>8) & 0xFF;
- t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+ my_tosort_unicode(uni_plane, &s_wc, cs->state);
+ my_tosort_unicode(uni_plane, &t_wc, cs->state);
+
if ( s_wc != t_wc )
{
return s_wc > t_wc ? 1 : -1;
@@ -2690,7 +2733,7 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
int s_res, t_res, res;
my_wc_t UNINIT_VAR(s_wc), UNINIT_VAR(t_wc);
const uchar *se= s+slen, *te= t+tlen;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
@@ -2698,7 +2741,6 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
while ( s < se && t < te )
{
- int plane;
s_res=my_utf8_uni(cs,&s_wc, s, se);
t_res=my_utf8_uni(cs,&t_wc, t, te);
@@ -2708,10 +2750,9 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
return bincmp(s, se, t, te);
}
- plane=(s_wc>>8) & 0xFF;
- s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
- plane=(t_wc>>8) & 0xFF;
- t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+ my_tosort_unicode(uni_plane, &s_wc, cs->state);
+ my_tosort_unicode(uni_plane, &t_wc, cs->state);
+
if ( s_wc != t_wc )
{
return s_wc > t_wc ? 1 : -1;
@@ -2778,7 +2819,7 @@ static int my_strnncollsp_utf8(CHARSET_INFO *cs,
static
int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
{
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s[0] && t[0])
{
my_wc_t s_wc,t_wc;
@@ -2795,7 +2836,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
}
else
{
- int plane, res;
+ int res;
/*
Scan a multibyte character.
@@ -2823,8 +2864,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
s+= res;
/* Convert Unicode code into weight according to collation */
- plane=(s_wc>>8) & 0xFF;
- s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
+ my_tolower_utf8mb3(uni_plane, &s_wc);
}
@@ -2838,15 +2878,13 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
}
else
{
- int plane;
int res=my_utf8_uni(cs,&t_wc, (const uchar*)t, (const uchar*) t + 3);
if (res <= 0)
return strcmp(s, t);
t+= res;
/* Convert code into weight */
- plane=(t_wc>>8) & 0xFF;
- t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
+ my_tolower_utf8mb3(uni_plane, &t_wc);
}
/* Now we have two weights, let's compare them */
@@ -2863,7 +2901,7 @@ int my_wildcmp_utf8(CHARSET_INFO *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
@@ -2966,11 +3004,10 @@ struct charset_info_st my_charset_utf8_general_ci=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -2999,11 +3036,10 @@ struct charset_info_st my_charset_utf8_general_mysql500_ci=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big */
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_mysql500, /* caseinfo */
+ &my_unicase_mysql500, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -3032,11 +3068,10 @@ struct charset_info_st my_charset_utf8_bin=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -3117,7 +3152,7 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
const uchar *se= s + slen;
const uchar *te= t + tlen;
int save_diff= 0;
- MY_UNICASE_INFO *const *uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
@@ -3125,7 +3160,6 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
while ( s < se && t < te )
{
- int plane;
s_res=my_utf8_uni(cs,&s_wc, s, se);
t_res=my_utf8_uni(cs,&t_wc, t, te);
@@ -3139,10 +3173,10 @@ static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
{
save_diff = ((int)s_wc) - ((int)t_wc);
}
- plane=(s_wc>>8) & 0xFF;
- s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
- plane=(t_wc>>8) & 0xFF;
- t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+
+ my_tosort_unicode(uni_plane, &s_wc, cs->state);
+ my_tosort_unicode(uni_plane, &t_wc, cs->state);
+
if ( s_wc != t_wc )
{
return ((int) s_wc) - ((int) t_wc);
@@ -4521,11 +4555,10 @@ struct charset_info_st my_charset_filename=
to_lower_utf8, /* to_lower */
to_upper_utf8, /* to_upper */
to_upper_utf8, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -4887,20 +4920,26 @@ my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)),
static inline void
-my_tolower_utf8mb4(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_tolower_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- int page= *wc >> 8;
- if (page < 256 && uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].tolower;
+ if (*wc <= uni_plane->maxchar)
+ {
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8)]))
+ *wc= page[*wc & 0xFF].tolower;
+ }
}
static inline void
-my_toupper_utf8mb4(MY_UNICASE_INFO * const* uni_plane, my_wc_t *wc)
+my_toupper_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
- int page= *wc >> 8;
- if (page < 256 && uni_plane[page])
- *wc= uni_plane[page][*wc & 0xFF].toupper;
+ if (*wc <= uni_plane->maxchar)
+ {
+ MY_UNICASE_CHARACTER *page;
+ if ((page= uni_plane->page[(*wc >> 8)]))
+ *wc= page[*wc & 0xFF].toupper;
+ }
}
@@ -4911,7 +4950,7 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen,
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->caseup_multiply == 1);
while ((src < srcend) &&
@@ -4943,7 +4982,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
my_wc_t wc;
int res;
const uchar *e= s + slen;
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
/*
Remove end space. We do this to be able to compare
@@ -4954,7 +4993,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{
- my_tosort_unicode(uni_plane, &wc);
+ my_tosort_unicode(uni_plane, &wc, cs->state);
my_hash_add(n1, n2, (uint) (wc & 0xFF));
my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
if (wc > 0xFFFF)
@@ -4979,7 +5018,7 @@ my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->caseup_multiply == 1);
while (*src &&
@@ -5004,7 +5043,7 @@ my_casedn_utf8mb4(CHARSET_INFO *cs,
my_wc_t wc;
int srcres, dstres;
char *srcend= src + srclen, *dstend= dst + dstlen, *dst0= dst;
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(src != dst || cs->casedn_multiply == 1);
while ((src < srcend) &&
@@ -5027,7 +5066,7 @@ my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src)
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->casedn_multiply == 1);
while (*src &&
@@ -5069,7 +5108,7 @@ my_strnncoll_utf8mb4(CHARSET_INFO *cs,
my_wc_t s_wc,t_wc;
const uchar *se= s + slen;
const uchar *te= t + tlen;
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
LINT_INIT(s_wc);
LINT_INIT(t_wc);
@@ -5084,9 +5123,9 @@ my_strnncoll_utf8mb4(CHARSET_INFO *cs,
return bincmp_utf8mb4(s, se, t, te);
}
- my_tosort_unicode(uni_plane, &s_wc);
- my_tosort_unicode(uni_plane, &t_wc);
-
+ my_tosort_unicode(uni_plane, &s_wc, cs->state);
+ my_tosort_unicode(uni_plane, &t_wc, cs->state);
+
if ( s_wc != t_wc )
{
return s_wc > t_wc ? 1 : -1;
@@ -5136,7 +5175,7 @@ my_strnncollsp_utf8mb4(CHARSET_INFO *cs,
int res;
my_wc_t s_wc, t_wc;
const uchar *se= s + slen, *te= t + tlen;
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
LINT_INIT(s_wc);
LINT_INIT(t_wc);
@@ -5155,8 +5194,8 @@ my_strnncollsp_utf8mb4(CHARSET_INFO *cs,
return bincmp_utf8mb4(s, se, t, te);
}
- my_tosort_unicode(uni_plane, &s_wc);
- my_tosort_unicode(uni_plane, &t_wc);
+ my_tosort_unicode(uni_plane, &s_wc, cs->state);
+ my_tosort_unicode(uni_plane, &t_wc, cs->state);
if ( s_wc != t_wc )
{
@@ -5220,7 +5259,7 @@ my_strnncollsp_utf8mb4(CHARSET_INFO *cs,
static int
my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
{
- MY_UNICASE_INFO * const* uni_plane= cs->caseinfo;
+ MY_UNICASE_INFO *uni_plane= cs->caseinfo;
while (s[0] && t[0])
{
my_wc_t s_wc,t_wc;
@@ -5399,11 +5438,10 @@ struct charset_info_st my_charset_utf8mb4_general_ci=
to_lower_utf8mb4, /* to_lower */
to_upper_utf8mb4, /* to_upper */
to_upper_utf8mb4, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
@@ -5432,11 +5470,10 @@ struct charset_info_st my_charset_utf8mb4_bin=
to_lower_utf8mb4, /* to_lower */
to_upper_utf8mb4, /* to_upper */
NULL, /* sort_order */
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default,/* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index 8fd15ebddb2..d1cd51a5d8d 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -690,11 +690,10 @@ struct charset_info_st my_charset_cp1250_czech_ci =
to_lower_win1250ch,
to_upper_win1250ch,
sort_order_win1250ch,
- NULL, /* contractions */
- NULL, /* sort_order_big*/
+ NULL, /* uca */
tab_cp1250_uni, /* tab_to_uni */
idx_uni_cp1250, /* tab_from_uni */
- my_unicase_default, /* caseinfo */
+ &my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
2, /* strxfrm_multiply */
diff --git a/strings/ctype.c b/strings/ctype.c
index b71d7dee4c4..43e9b290b3e 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -38,6 +38,18 @@
*/
+
+/*
+ Avoid using my_snprintf
+ We cannot use my_snprintf() here, because ctype.o is
+ used to build conf_to_src, which must require minimun
+ dependency.
+*/
+
+#undef my_snprinf
+#define my_snprintf "We cannot use my_snprintf in this file"
+
+
int (*my_string_stack_guard)(int)= NULL;
static char *mstr(char *str,const char *src,size_t l1,size_t l2)
@@ -71,11 +83,75 @@ struct my_cs_file_section_st
#define _CS_PRIMARY_ID 15
#define _CS_BINARY_ID 16
#define _CS_CSDESCRIPT 17
-#define _CS_RESET 18
-#define _CS_DIFF1 19
-#define _CS_DIFF2 20
-#define _CS_DIFF3 21
-#define _CS_IDENTICAL 22
+
+
+/* Special purpose commands */
+#define _CS_UCA_VERSION 100
+#define _CS_CL_SUPPRESS_CONTRACTIONS 101
+#define _CS_CL_OPTIMIZE 102
+#define _CS_CL_SHIFT_AFTER_METHOD 103
+
+
+/* Collation Settings */
+#define _CS_ST_SETTINGS 200
+#define _CS_ST_STRENGTH 201
+#define _CS_ST_ALTERNATE 202
+#define _CS_ST_BACKWARDS 203
+#define _CS_ST_NORMALIZATION 204
+#define _CS_ST_CASE_LEVEL 205
+#define _CS_ST_CASE_FIRST 206
+#define _CS_ST_HIRAGANA_QUATERNARY 207
+#define _CS_ST_NUMERIC 208
+#define _CS_ST_VARIABLE_TOP 209
+#define _CS_ST_MATCH_BOUNDARIES 210
+#define _CS_ST_MATCH_STYLE 211
+
+
+/* Rules */
+#define _CS_RULES 300
+#define _CS_RESET 301
+#define _CS_DIFF1 302
+#define _CS_DIFF2 303
+#define _CS_DIFF3 304
+#define _CS_DIFF4 305
+#define _CS_IDENTICAL 306
+
+/* Rules: Expansions */
+#define _CS_EXP_X 320
+#define _CS_EXP_EXTEND 321
+#define _CS_EXP_DIFF1 322
+#define _CS_EXP_DIFF2 323
+#define _CS_EXP_DIFF3 324
+#define _CS_EXP_DIFF4 325
+#define _CS_EXP_IDENTICAL 326
+
+/* Rules: Abbreviating Ordering Specifications */
+#define _CS_A_DIFF1 351
+#define _CS_A_DIFF2 352
+#define _CS_A_DIFF3 353
+#define _CS_A_DIFF4 354
+#define _CS_A_IDENTICAL 355
+
+/* Rules: previous context */
+#define _CS_CONTEXT 370
+
+/* Rules: Placing Characters Before Others*/
+#define _CS_RESET_BEFORE 380
+
+/* Rules: Logical Reset Positions */
+#define _CS_RESET_FIRST_PRIMARY_IGNORABLE 401
+#define _CS_RESET_LAST_PRIMARY_IGNORABLE 402
+#define _CS_RESET_FIRST_SECONDARY_IGNORABLE 403
+#define _CS_RESET_LAST_SECONDARY_IGNORABLE 404
+#define _CS_RESET_FIRST_TERTIARY_IGNORABLE 405
+#define _CS_RESET_LAST_TERTIARY_IGNORABLE 406
+#define _CS_RESET_FIRST_TRAILING 407
+#define _CS_RESET_LAST_TRAILING 408
+#define _CS_RESET_FIRST_VARIABLE 409
+#define _CS_RESET_LAST_VARIABLE 410
+#define _CS_RESET_FIRST_NON_IGNORABLE 411
+#define _CS_RESET_LAST_NON_IGNORABLE 412
+
static const struct my_cs_file_section_st sec[] =
@@ -85,6 +161,8 @@ static const struct my_cs_file_section_st sec[] =
{_CS_MISC, "xml/encoding"},
{_CS_MISC, "charsets"},
{_CS_MISC, "charsets/max-id"},
+ {_CS_MISC, "charsets/copyright"},
+ {_CS_MISC, "charsets/description"},
{_CS_CHARSET, "charsets/charset"},
{_CS_PRIMARY_ID, "charsets/charset/primary-id"},
{_CS_BINARY_ID, "charsets/charset/binary-id"},
@@ -106,11 +184,72 @@ static const struct my_cs_file_section_st sec[] =
{_CS_ORDER, "charsets/charset/collation/order"},
{_CS_FLAG, "charsets/charset/collation/flag"},
{_CS_COLLMAP, "charsets/charset/collation/map"},
- {_CS_RESET, "charsets/charset/collation/rules/reset"},
- {_CS_DIFF1, "charsets/charset/collation/rules/p"},
- {_CS_DIFF2, "charsets/charset/collation/rules/s"},
- {_CS_DIFF3, "charsets/charset/collation/rules/t"},
- {_CS_IDENTICAL, "charsets/charset/collation/rules/i"},
+
+ /* Special purpose commands */
+ {_CS_UCA_VERSION, "charsets/charset/collation/version"},
+ {_CS_CL_SUPPRESS_CONTRACTIONS, "charsets/charset/collation/suppress_contractions"},
+ {_CS_CL_OPTIMIZE, "charsets/charset/collation/optimize"},
+ {_CS_CL_SHIFT_AFTER_METHOD, "charsets/charset/collation/shift-after-method"},
+
+ /* Collation Settings */
+ {_CS_ST_SETTINGS, "charsets/charset/collation/settings"},
+ {_CS_ST_STRENGTH, "charsets/charset/collation/settings/strength"},
+ {_CS_ST_ALTERNATE, "charsets/charset/collation/settings/alternate"},
+ {_CS_ST_BACKWARDS, "charsets/charset/collation/settings/backwards"},
+ {_CS_ST_NORMALIZATION, "charsets/charset/collation/settings/normalization"},
+ {_CS_ST_CASE_LEVEL, "charsets/charset/collation/settings/caseLevel"},
+ {_CS_ST_CASE_FIRST, "charsets/charset/collation/settings/caseFirst"},
+ {_CS_ST_HIRAGANA_QUATERNARY, "charsets/charset/collation/settings/hiraganaQuaternary"},
+ {_CS_ST_NUMERIC, "charsets/charset/collation/settings/numeric"},
+ {_CS_ST_VARIABLE_TOP, "charsets/charset/collation/settings/variableTop"},
+ {_CS_ST_MATCH_BOUNDARIES, "charsets/charset/collation/settings/match-boundaries"},
+ {_CS_ST_MATCH_STYLE, "charsets/charset/collation/settings/match-style"},
+
+ /* Rules */
+ {_CS_RULES, "charsets/charset/collation/rules"},
+ {_CS_RESET, "charsets/charset/collation/rules/reset"},
+ {_CS_DIFF1, "charsets/charset/collation/rules/p"},
+ {_CS_DIFF2, "charsets/charset/collation/rules/s"},
+ {_CS_DIFF3, "charsets/charset/collation/rules/t"},
+ {_CS_DIFF4, "charsets/charset/collation/rules/q"},
+ {_CS_IDENTICAL, "charsets/charset/collation/rules/i"},
+
+ /* Rules: expansions */
+ {_CS_EXP_X, "charsets/charset/collation/rules/x"},
+ {_CS_EXP_EXTEND, "charsets/charset/collation/rules/x/extend"},
+ {_CS_EXP_DIFF1, "charsets/charset/collation/rules/x/p"},
+ {_CS_EXP_DIFF2, "charsets/charset/collation/rules/x/s"},
+ {_CS_EXP_DIFF3, "charsets/charset/collation/rules/x/t"},
+ {_CS_EXP_DIFF4, "charsets/charset/collation/rules/x/q"},
+ {_CS_EXP_IDENTICAL, "charsets/charset/collation/rules/x/i"},
+
+ /* Rules: previous context */
+ {_CS_CONTEXT, "charsets/charset/collation/rules/x/context"},
+
+ /* Rules: Abbreviating Ordering Specifications */
+ {_CS_A_DIFF1, "charsets/charset/collation/rules/pc"},
+ {_CS_A_DIFF2, "charsets/charset/collation/rules/sc"},
+ {_CS_A_DIFF3, "charsets/charset/collation/rules/tc"},
+ {_CS_A_DIFF4, "charsets/charset/collation/rules/qc"},
+ {_CS_A_IDENTICAL, "charsets/charset/collation/rules/ic"},
+
+ /* Rules: Placing Characters Before Others*/
+ {_CS_RESET_BEFORE, "charsets/charset/collation/rules/reset/before"},
+
+ /* Rules: Logical Reset Positions */
+ {_CS_RESET_FIRST_NON_IGNORABLE, "charsets/charset/collation/rules/reset/first_non_ignorable"},
+ {_CS_RESET_LAST_NON_IGNORABLE, "charsets/charset/collation/rules/reset/last_non_ignorable"},
+ {_CS_RESET_FIRST_PRIMARY_IGNORABLE, "charsets/charset/collation/rules/reset/first_primary_ignorable"},
+ {_CS_RESET_LAST_PRIMARY_IGNORABLE, "charsets/charset/collation/rules/reset/last_primary_ignorable"},
+ {_CS_RESET_FIRST_SECONDARY_IGNORABLE, "charsets/charset/collation/rules/reset/first_secondary_ignorable"},
+ {_CS_RESET_LAST_SECONDARY_IGNORABLE, "charsets/charset/collation/rules/reset/last_secondary_ignorable"},
+ {_CS_RESET_FIRST_TERTIARY_IGNORABLE, "charsets/charset/collation/rules/reset/first_tertiary_ignorable"},
+ {_CS_RESET_LAST_TERTIARY_IGNORABLE, "charsets/charset/collation/rules/reset/last_tertiary_ignorable"},
+ {_CS_RESET_FIRST_TRAILING, "charsets/charset/collation/rules/reset/first_trailing"},
+ {_CS_RESET_LAST_TRAILING, "charsets/charset/collation/rules/reset/last_trailing"},
+ {_CS_RESET_FIRST_VARIABLE, "charsets/charset/collation/rules/reset/first_variable"},
+ {_CS_RESET_LAST_VARIABLE, "charsets/charset/collation/rules/reset/last_variable"},
+
{0, NULL}
};
@@ -120,14 +259,16 @@ static const struct my_cs_file_section_st
const struct my_cs_file_section_st *s;
for (s=sec; s->str; s++)
{
- if (!strncmp(attr,s->str,len))
+ if (!strncmp(attr, s->str, len) && s->str[len] == 0)
return s;
}
return NULL;
}
#define MY_CS_CSDESCR_SIZE 64
-#define MY_CS_TAILORING_SIZE 1024
+#define MY_CS_TAILORING_SIZE 32*1024
+#define MY_CS_UCA_VERSION_SIZE 64
+#define MY_CS_CONTEXT_SIZE 64
typedef struct my_cs_file_info
{
@@ -139,12 +280,59 @@ typedef struct my_cs_file_info
uchar sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
char comment[MY_CS_CSDESCR_SIZE];
- char tailoring[MY_CS_TAILORING_SIZE];
+ char *tailoring;
size_t tailoring_length;
+ size_t tailoring_alloced_length;
+ char context[MY_CS_CONTEXT_SIZE];
struct charset_info_st cs;
- int (*add_collation)(struct charset_info_st *cs);
-} MY_CHARSET_LOADER;
+ MY_CHARSET_LOADER *loader;
+} MY_CHARSET_FILE;
+
+
+static void
+my_charset_file_reset_charset(MY_CHARSET_FILE *i)
+{
+ memset(&i->cs, 0, sizeof(i->cs));
+}
+
+static void
+my_charset_file_reset_collation(MY_CHARSET_FILE *i)
+{
+ i->tailoring_length= 0;
+ i->context[0]= '\0';
+}
+
+
+static void
+my_charset_file_init(MY_CHARSET_FILE *i)
+{
+ my_charset_file_reset_charset(i);
+ my_charset_file_reset_collation(i);
+ i->tailoring= NULL;
+ i->tailoring_alloced_length= 0;
+}
+
+
+static void
+my_charset_file_free(MY_CHARSET_FILE *i)
+{
+ i->loader->free(i->tailoring);
+}
+
+
+static int
+my_charset_file_tailoring_realloc(MY_CHARSET_FILE *i, size_t newlen)
+{
+ if (i->tailoring_alloced_length > newlen ||
+ (i->tailoring= i->loader->realloc(i->tailoring,
+ (i->tailoring_alloced_length=
+ (newlen + 32*1024)))))
+ {
+ return MY_XML_OK;
+ }
+ return MY_XML_ERROR;
+}
static int fill_uchar(uchar *a,uint size,const char *str, size_t len)
@@ -182,17 +370,119 @@ static int fill_uint16(uint16 *a,uint size,const char *str, size_t len)
}
+
+
+static int
+tailoring_append(MY_XML_PARSER *st,
+ const char *fmt, size_t len, const char *attr)
+{
+ struct my_cs_file_info *i= (struct my_cs_file_info *) st->user_data;
+ size_t newlen= i->tailoring_length + len + 64; /* 64 for format */
+ if (MY_XML_OK == my_charset_file_tailoring_realloc(i, newlen))
+ {
+ char *dst= i->tailoring + i->tailoring_length;
+ sprintf(dst, fmt, (int) len, attr);
+ i->tailoring_length+= strlen(dst);
+ return MY_XML_OK;
+ }
+ return MY_XML_ERROR;
+}
+
+
+static int
+tailoring_append2(MY_XML_PARSER *st,
+ const char *fmt,
+ size_t len1, const char *attr1,
+ size_t len2, const char *attr2)
+{
+ struct my_cs_file_info *i= (struct my_cs_file_info *) st->user_data;
+ size_t newlen= i->tailoring_length + len1 + len2 + 64; /* 64 for format */
+ if (MY_XML_OK == my_charset_file_tailoring_realloc(i, newlen))
+ {
+ char *dst= i->tailoring + i->tailoring_length;
+ sprintf(dst, fmt, (int) len1, attr1, (int) len2, attr2);
+ i->tailoring_length+= strlen(dst);
+ return MY_XML_OK;
+ }
+ return MY_XML_ERROR;
+}
+
+
+static size_t
+scan_one_character(const char *s, const char *e, my_wc_t *wc)
+{
+ CHARSET_INFO *cs= &my_charset_utf8_general_ci;
+ if (s >= e)
+ return 0;
+
+ /* Escape sequence: \uXXXX */
+ if (s[0] == '\\' && s + 2 < e && s[1] == 'u' && my_isxdigit(cs, s[2]))
+ {
+ size_t len= 3; /* We have at least one digit */
+ for (s+= 3; s < e && my_isxdigit(cs, s[0]); s++, len++)
+ {
+ }
+ wc[0]= 0;
+ return len;
+ }
+ else if (s[0] > 0) /* 7-bit character */
+ {
+ wc[0]= 0;
+ return 1;
+ }
+ else /* Non-escaped character */
+ {
+ int rc= cs->cset->mb_wc(cs, wc, (uchar *) s, (uchar *) e);
+ if (rc > 0)
+ return (size_t) rc;
+ }
+ return 0;
+}
+
+
+static int
+tailoring_append_abbreviation(MY_XML_PARSER *st,
+ const char *fmt, size_t len, const char *attr)
+{
+ size_t clen;
+ const char *attrend= attr + len;
+ my_wc_t wc;
+
+ for ( ; (clen= scan_one_character(attr, attrend, &wc)) > 0; attr+= clen)
+ {
+ DBUG_ASSERT(attr < attrend);
+ if (tailoring_append(st, fmt, clen, attr) != MY_XML_OK)
+ return MY_XML_ERROR;
+ }
+ return MY_XML_OK;
+}
+
+
static int cs_enter(MY_XML_PARSER *st,const char *attr, size_t len)
{
struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
const struct my_cs_file_section_st *s= cs_file_sec(attr,len);
+ int state= s ? s->state : 0;
- if ( s && (s->state == _CS_CHARSET))
- bzero(&i->cs,sizeof(i->cs));
-
- if (s && (s->state == _CS_COLLATION))
- i->tailoring_length= 0;
+ switch (state) {
+ case 0:
+ i->loader->reporter(WARNING_LEVEL, "Unknown LDML tag: '%.*s'", len, attr);
+ break;
+
+ case _CS_CHARSET:
+ my_charset_file_reset_charset(i);
+ break;
+
+ case _CS_COLLATION:
+ my_charset_file_reset_collation(i);
+ break;
+ case _CS_RESET:
+ return tailoring_append(st, " &", 0, NULL);
+
+ default:
+ break;
+ }
return MY_XML_OK;
}
@@ -206,8 +496,60 @@ static int cs_leave(MY_XML_PARSER *st,const char *attr, size_t len)
switch(state){
case _CS_COLLATION:
- rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
+ if (i->tailoring_length)
+ i->cs.tailoring= i->tailoring;
+ rc= i->loader->add_collation ? i->loader->add_collation(&i->cs) : MY_XML_OK;
break;
+
+ /* Rules: Logical Reset Positions */
+ case _CS_RESET_FIRST_NON_IGNORABLE:
+ rc= tailoring_append(st, "[first non-ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_LAST_NON_IGNORABLE:
+ rc= tailoring_append(st, "[last non-ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_FIRST_PRIMARY_IGNORABLE:
+ rc= tailoring_append(st, "[first primary ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_LAST_PRIMARY_IGNORABLE:
+ rc= tailoring_append(st, "[last primary ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_FIRST_SECONDARY_IGNORABLE:
+ rc= tailoring_append(st, "[first secondary ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_LAST_SECONDARY_IGNORABLE:
+ rc= tailoring_append(st, "[last secondary ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_FIRST_TERTIARY_IGNORABLE:
+ rc= tailoring_append(st, "[first tertiary ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_LAST_TERTIARY_IGNORABLE:
+ rc= tailoring_append(st, "[last tertiary ignorable]", 0, NULL);
+ break;
+
+ case _CS_RESET_FIRST_TRAILING:
+ rc= tailoring_append(st, "[first trailing]", 0, NULL);
+ break;
+
+ case _CS_RESET_LAST_TRAILING:
+ rc= tailoring_append(st, "[last trailing]", 0, NULL);
+ break;
+
+ case _CS_RESET_FIRST_VARIABLE:
+ rc= tailoring_append(st, "[first variable]", 0, NULL);
+ break;
+
+ case _CS_RESET_LAST_VARIABLE:
+ rc= tailoring_append(st, "[last variable]", 0, NULL);
+ break;
+
default:
rc=MY_XML_OK;
}
@@ -215,14 +557,40 @@ static int cs_leave(MY_XML_PARSER *st,const char *attr, size_t len)
}
+static const char *diff_fmt[5]=
+{
+ "<%.*s",
+ "<<%.*s",
+ "<<<%.*s",
+ "<<<<%.*s",
+ "=%.*s"
+};
+
+
+static const char *context_diff_fmt[5]=
+{
+ "<%.*s|%.*s",
+ "<<%.*s|%.*s",
+ "<<<%.*s|%.*s",
+ "<<<<%.*s|%.*s",
+ "=%.*s|%.*s"
+};
+
+
static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
{
struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
const struct my_cs_file_section_st *s;
- int state= (int)((s= cs_file_sec(st->attr, strlen(st->attr))) ? s->state :
- 0);
-
+ int state= (int)((s= cs_file_sec(st->attr.start,
+ st->attr.end - st->attr.start)) ?
+ s->state : 0);
+ int rc= MY_XML_OK;
+
switch (state) {
+ case _CS_MISC:
+ case _CS_FAMILY:
+ case _CS_ORDER:
+ break;
case _CS_ID:
i->cs.number= strtol(attr,(char**)NULL,10);
break;
@@ -269,47 +637,185 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
i->cs.ctype=i->ctype;
break;
+
+ /* Special purpose commands */
+ case _CS_UCA_VERSION:
+ rc= tailoring_append(st, "[version %.*s]", len, attr);
+ break;
+
+ case _CS_CL_SUPPRESS_CONTRACTIONS:
+ rc= tailoring_append(st, "[suppress contractions %.*s]", len, attr);
+ break;
+
+ case _CS_CL_OPTIMIZE:
+ rc= tailoring_append(st, "[optimize %.*s]", len, attr);
+ break;
+
+ case _CS_CL_SHIFT_AFTER_METHOD:
+ rc= tailoring_append(st, "[shift-after-method %.*s]", len, attr);
+ break;
+
+ /* Collation Settings */
+ case _CS_ST_STRENGTH:
+ /* 1, 2, 3, 4, 5, or primary, secondary, tertiary, quaternary, identical */
+ rc= tailoring_append(st, "[strength %.*s]", len, attr);
+ break;
+
+ case _CS_ST_ALTERNATE:
+ /* non-ignorable, shifted */
+ rc= tailoring_append(st, "[alternate %.*s]", len, attr);
+ break;
+
+ case _CS_ST_BACKWARDS:
+ /* on, off, 2 */
+ rc= tailoring_append(st, "[backwards %.*s]", len, attr);
+ break;
+
+ case _CS_ST_NORMALIZATION:
+ /*
+ TODO for WL#896: check collations for normalization: vi.xml
+ We want precomposed characters work well at this point.
+ */
+ /* on, off */
+ rc= tailoring_append(st, "[normalization %.*s]", len, attr);
+ break;
+
+ case _CS_ST_CASE_LEVEL:
+ /* on, off */
+ rc= tailoring_append(st, "[caseLevel %.*s]", len, attr);
+ break;
+
+ case _CS_ST_CASE_FIRST:
+ /* upper, lower, off */
+ rc= tailoring_append(st, "[caseFirst %.*s]", len, attr);
+ break;
+
+ case _CS_ST_HIRAGANA_QUATERNARY:
+ /* on, off */
+ rc= tailoring_append(st, "[hiraganaQ %.*s]", len, attr);
+ break;
+
+ case _CS_ST_NUMERIC:
+ /* on, off */
+ rc= tailoring_append(st, "[numeric %.*s]", len, attr);
+ break;
+
+ case _CS_ST_VARIABLE_TOP:
+ /* TODO for WL#896: check value format */
+ rc= tailoring_append(st, "[variableTop %.*s]", len, attr);
+ break;
+
+ case _CS_ST_MATCH_BOUNDARIES:
+ /* none, whole-character, whole-word */
+ rc= tailoring_append(st, "[match-boundaries %.*s]", len, attr);
+ break;
+
+ case _CS_ST_MATCH_STYLE:
+ /* minimal, medial, maximal */
+ rc= tailoring_append(st, "[match-style %.*s]", len, attr);
+ break;
+
+
+ /* Rules */
case _CS_RESET:
+ rc= tailoring_append(st, "%.*s", len, attr);
+ break;
+
case _CS_DIFF1:
case _CS_DIFF2:
case _CS_DIFF3:
+ case _CS_DIFF4:
case _CS_IDENTICAL:
+ rc= tailoring_append(st, diff_fmt[state - _CS_DIFF1], len, attr);
+ break;
+
+
+ /* Rules: Expansion */
+ case _CS_EXP_EXTEND:
+ rc= tailoring_append(st, " / %.*s", len, attr);
+ break;
+
+ case _CS_EXP_DIFF1:
+ case _CS_EXP_DIFF2:
+ case _CS_EXP_DIFF3:
+ case _CS_EXP_DIFF4:
+ case _CS_EXP_IDENTICAL:
+ if (i->context[0])
{
- /*
- Convert collation description from
- Locale Data Markup Language (LDML)
- into ICU Collation Customization expression.
- */
- char arg[16];
- const char *cmd[]= {"&","<","<<","<<<","="};
- i->cs.tailoring= i->tailoring;
- mstr(arg,attr,len,sizeof(arg)-1);
- if (i->tailoring_length + 20 < sizeof(i->tailoring))
- {
- char *dst= i->tailoring_length + i->tailoring;
- i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
- }
+ rc= tailoring_append2(st, context_diff_fmt[state - _CS_EXP_DIFF1],
+ strlen(i->context), i->context, len, attr);
+ i->context[0]= 0;
}
+ else
+ rc= tailoring_append(st, diff_fmt[state - _CS_EXP_DIFF1], len, attr);
+ break;
+
+ /* Rules: Context */
+ case _CS_CONTEXT:
+ if (len < sizeof(i->context) + 1)
+ {
+ memcpy(i->context, attr, len);
+ i->context[len]= '\0';
+ }
+ break;
+
+ /* Rules: Abbreviating Ordering Specifications */
+ case _CS_A_DIFF1:
+ case _CS_A_DIFF2:
+ case _CS_A_DIFF3:
+ case _CS_A_DIFF4:
+ case _CS_A_IDENTICAL:
+ rc= tailoring_append_abbreviation(st, diff_fmt[state - _CS_A_DIFF1], len, attr);
+ break;
+
+ /* Rules: Placing Characters Before Others */
+ case _CS_RESET_BEFORE:
+ /*
+ TODO for WL#896: Add this check into text customization parser:
+ It is an error if the strength of the before relation is not identical
+ to the relation after the reset. We'll need this for WL#896.
+ */
+ rc= tailoring_append(st, "[before %.*s]", len, attr);
+ break;
+
+
+ default:
+ break;
}
- return MY_XML_OK;
+
+ return rc;
}
-my_bool my_parse_charset_xml(const char *buf, size_t len,
- int (*add_collation)(struct charset_info_st *cs))
+my_bool
+my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t len)
{
MY_XML_PARSER p;
- struct my_cs_file_info i;
+ struct my_cs_file_info info;
my_bool rc;
+ my_charset_file_init(&info);
my_xml_parser_create(&p);
my_xml_set_enter_handler(&p,cs_enter);
my_xml_set_value_handler(&p,cs_value);
my_xml_set_leave_handler(&p,cs_leave);
- i.add_collation= add_collation;
- my_xml_set_user_data(&p,(void*)&i);
+ info.loader= loader;
+ my_xml_set_user_data(&p, (void *) &info);
rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
my_xml_parser_free(&p);
+ my_charset_file_free(&info);
+ if (rc != MY_XML_OK)
+ {
+ const char *errstr= my_xml_error_string(&p);
+ if (sizeof(loader->error) > 32 + strlen(errstr))
+ {
+ /* We cannot use my_snprintf() here. See previous comment. */
+ sprintf(loader->error, "at line %d pos %d: %s",
+ my_xml_error_lineno(&p)+1,
+ (int) my_xml_error_pos(&p),
+ my_xml_error_string(&p));
+ }
+ }
return rc;
}
diff --git a/strings/str_alloc.c b/strings/str_alloc.c
index 17139e7b773..91246603f2e 100644
--- a/strings/str_alloc.c
+++ b/strings/str_alloc.c
@@ -31,5 +31,11 @@ static void my_str_free_default(void *ptr)
free(ptr);
}
+void *my_str_realloc_default(void *ptr, size_t size)
+{
+ return realloc(ptr, size);
+}
+
void *(*my_str_malloc)(size_t)= &my_str_malloc_default;
void (*my_str_free)(void *)= &my_str_free_default;
+void *(*my_str_realloc)(void *, size_t)= &my_str_realloc_default;
diff --git a/strings/xml.c b/strings/xml.c
index 3b2c278f553..8073b881a47 100644
--- a/strings/xml.c
+++ b/strings/xml.c
@@ -15,6 +15,7 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
#include "strings_def.h"
+#include "m_string.h"
#include "my_xml.h"
@@ -207,25 +208,71 @@ static int my_xml_value(MY_XML_PARSER *st, const char *str, size_t len)
}
-static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
+/**
+ Ensure the attr buffer is wide enough to hold the new value
+
+ Expand and/or allocate dynamic buffer as needed to hold the concatenated
+ path and the terminating zero.
+
+ @attr st the parser instance
+ @attr len the length of the attribute to be added
+ @return state
+ @retval 1 failed
+ @retval 0 success
+*/
+static int my_xml_attr_ensure_space(MY_XML_PARSER *st, size_t len)
{
- if ((size_t) (st->attrend-st->attr+len+1) > sizeof(st->attr))
+ size_t ofs= st->attr.end - st->attr.start;
+ len++; // Add terminating zero.
+ if (ofs + len > st->attr.buffer_size)
{
- sprintf(st->errstr,"To deep XML");
- return MY_XML_ERROR;
+ st->attr.buffer_size= (SIZE_T_MAX - len) / 2 > st->attr.buffer_size ?
+ st->attr.buffer_size * 2 + len : SIZE_T_MAX;
+
+ if (!st->attr.buffer)
+ {
+ st->attr.buffer= (char *) my_str_malloc(st->attr.buffer_size);
+ if (st->attr.buffer)
+ memcpy(st->attr.buffer, st->attr.static_buffer, ofs + 1 /*term. zero */);
+ }
+ else
+ st->attr.buffer= (char *) my_str_realloc(st->attr.buffer,
+ st->attr.buffer_size);
+ st->attr.start= st->attr.buffer;
+ st->attr.end= st->attr.start + ofs;
+
+ return st->attr.buffer ? MY_XML_OK : MY_XML_ERROR;
}
- if (st->attrend > st->attr)
+ return MY_XML_OK;
+}
+
+
+/** rewind the attr buffer to initial state */
+static void my_xml_attr_rewind(MY_XML_PARSER *p)
+{
+ /* keep the buffer already allocated */
+ p->attr.end= p->attr.start;
+}
+
+
+static int my_xml_enter(MY_XML_PARSER *st, const char *str, size_t len)
+{
+ if (my_xml_attr_ensure_space(st, len + 1 /* the separator char */))
+ return MY_XML_ERROR;
+
+ if (st->attr.end > st->attr.start)
{
- st->attrend[0]= '/';
- st->attrend++;
+ st->attr.end[0]= '/';
+ st->attr.end++;
}
- memcpy(st->attrend,str,len);
- st->attrend+=len;
- st->attrend[0]='\0';
+ memcpy(st->attr.end, str, len);
+ st->attr.end+= len;
+ st->attr.end[0]= '\0';
if (st->flags & MY_XML_FLAG_RELATIVE_NAMES)
return st->enter ? st->enter(st, str, len) : MY_XML_OK;
else
- return st->enter ? st->enter(st,st->attr,st->attrend-st->attr) : MY_XML_OK;
+ return st->enter ?
+ st->enter(st, st->attr.start, st->attr.end - st->attr.start) : MY_XML_OK;
}
@@ -246,8 +293,8 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
int rc;
/* Find previous '/' or beginning */
- for (e=p->attrend; (e>p->attr) && (e[0] != '/') ; e--);
- glen = (size_t) ((e[0] == '/') ? (p->attrend-e-1) : p->attrend-e);
+ for (e= p->attr.end; (e > p->attr.start) && (e[0] != '/') ; e--);
+ glen= (size_t) ((e[0] == '/') ? (p->attr.end - e - 1) : p->attr.end - e);
if (str && (slen != glen))
{
@@ -265,11 +312,12 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
if (p->flags & MY_XML_FLAG_RELATIVE_NAMES)
rc= p->leave_xml ? p->leave_xml(p, str, slen) : MY_XML_OK;
else
- rc= (p->leave_xml ? p->leave_xml(p,p->attr,p->attrend-p->attr) :
+ rc= (p->leave_xml ?
+ p->leave_xml(p, p->attr.start, p->attr.end - p->attr.start) :
MY_XML_OK);
*e='\0';
- p->attrend=e;
+ p->attr.end= e;
return rc;
}
@@ -277,7 +325,9 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, size_t slen)
int my_xml_parse(MY_XML_PARSER *p,const char *str, size_t len)
{
- p->attrend=p->attr;
+
+ my_xml_attr_rewind(p);
+
p->beg=str;
p->cur=str;
p->end=str+len;
@@ -432,7 +482,7 @@ gt:
}
}
- if (p->attr[0])
+ if (p->attr.start[0])
{
sprintf(p->errstr,"unexpected END-OF-INPUT");
return MY_XML_ERROR;
@@ -443,12 +493,22 @@ gt:
void my_xml_parser_create(MY_XML_PARSER *p)
{
- bzero((void*)p,sizeof(p[0]));
+ memset(p, 0, sizeof(p[0]));
+ /*
+ Use static buffer while it's sufficient.
+ */
+ p->attr.start= p->attr.end= p->attr.static_buffer;
+ p->attr.buffer_size= sizeof(p->attr.static_buffer);
}
-void my_xml_parser_free(MY_XML_PARSER *p __attribute__((unused)))
+void my_xml_parser_free(MY_XML_PARSER *p)
{
+ if (p->attr.buffer)
+ {
+ my_str_free(p->attr.buffer);
+ p->attr.buffer= NULL;
+ }
}