diff options
author | Alexander Barkov <bar@mysql.com> | 2009-10-15 15:17:32 +0500 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2009-10-15 15:17:32 +0500 |
commit | 3929dddcd76130529cabfbd68cfff024e6b5c80d (patch) | |
tree | 7f2c2690a6c4b4439750de17315dbc10dec56617 | |
parent | 4def52165d3724080793a752f244e56aea3aecc0 (diff) | |
download | mariadb-git-3929dddcd76130529cabfbd68cfff024e6b5c80d.tar.gz |
Backporting WL#4164 Two-byte collation IDs
-rw-r--r-- | include/my_handler.h | 2 | ||||
-rw-r--r-- | include/my_sys.h | 3 | ||||
-rw-r--r-- | include/myisam.h | 4 | ||||
-rw-r--r-- | mysql-test/r/ctype_ldml.result | bin | 6572 -> 7649 bytes | |||
-rw-r--r-- | mysql-test/std_data/Index.xml | 12 | ||||
-rw-r--r-- | mysql-test/t/ctype_ldml.test | 27 | ||||
-rw-r--r-- | mysys/charset.c | 10 | ||||
-rw-r--r-- | sql/sql_show.cc | 23 | ||||
-rw-r--r-- | sql/table.cc | 20 | ||||
-rw-r--r-- | sql/unireg.cc | 11 | ||||
-rw-r--r-- | storage/myisam/ft_static.c | 4 | ||||
-rw-r--r-- | storage/myisam/mi_open.c | 7 | ||||
-rw-r--r-- | strings/conf_to_src.c | 18 |
13 files changed, 102 insertions, 39 deletions
diff --git a/include/my_handler.h b/include/my_handler.h index a3376cb74a2..ae56331df86 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -53,8 +53,8 @@ typedef struct st_HA_KEYSEG /* Key-portion */ uint16 bit_pos; /* Position to bit part */ uint16 flag; uint16 length; /* Keylength */ + uint16 language; uint8 type; /* Type of key (for sort) */ - uint8 language; uint8 null_bit; /* bitmask to test for NULL */ uint8 bit_start,bit_end; /* if bit field */ uint8 bit_length; /* Length of bit part */ diff --git a/include/my_sys.h b/include/my_sys.h index 4b93dc0e364..34965ae9c67 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -231,8 +231,9 @@ extern uint my_large_page_size; #endif /* charsets */ +#define MY_ALL_CHARSETS_SIZE 2048 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *default_charset_info; -extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *all_charsets[256]; +extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]; extern CHARSET_INFO compiled_charsets[]; /* statistics */ diff --git a/include/myisam.h b/include/myisam.h index 02251eeacb4..70c6f33f118 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -162,7 +162,7 @@ typedef struct st_mi_create_info ulonglong data_file_length; ulonglong key_file_length; uint old_options; - uint8 language; + uint16 language; my_bool with_auto_increment; } MI_CREATE_INFO; @@ -410,7 +410,7 @@ typedef struct st_mi_check_param uint out_flag,warning_printed,error_printed,verbose; uint opt_sort_key,total_files,max_level; uint testflag, key_cache_block_size; - uint8 language; + uint16 language; my_bool using_global_keycache, opt_lock_memory, opt_follow_links; my_bool retry_repair, force_sort; char temp_filename[FN_REFLEN],*isam_file_name; diff --git a/mysql-test/r/ctype_ldml.result b/mysql-test/r/ctype_ldml.result Binary files differindex 711921eb526..5d42b64c54c 100644 --- a/mysql-test/r/ctype_ldml.result +++ b/mysql-test/r/ctype_ldml.result diff --git a/mysql-test/std_data/Index.xml b/mysql-test/std_data/Index.xml index 988dddcc68a..c77d8996229 100644 --- a/mysql-test/std_data/Index.xml +++ b/mysql-test/std_data/Index.xml @@ -1,23 +1,29 @@ <charsets> <charset name="utf8"> - <collation name="utf8_test_ci" id="240"> + <collation name="utf8_test_ci" id="353"> <rules> <reset>a</reset> <s>b</s> </rules> </collation> + <collation name="utf8_maxuserid_ci" id="2047"> + <rules> + <reset>a</reset> + <s>b</s> + </rules> + </collation> </charset> <charset name="ucs2"> - <collation name="ucs2_test_ci" id="241"> + <collation name="ucs2_test_ci" id="358"> <rules> <reset>a</reset> <s>b</s> </rules> </collation> - <collation name="ucs2_vn_ci" id="242"> + <collation name="ucs2_vn_ci" id="359"> <!-- Vietnamese experimental collation --> <rules> <reset>A</reset> diff --git a/mysql-test/t/ctype_ldml.test b/mysql-test/t/ctype_ldml.test index db9461bfbf7..6ebef1d7f16 100644 --- a/mysql-test/t/ctype_ldml.test +++ b/mysql-test/t/ctype_ldml.test @@ -49,6 +49,7 @@ DROP TABLE t1; # # Vietnamese experimental collation # +--echo Vietnamese experimental collation show collation like 'ucs2_vn_ci'; create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); @@ -86,3 +87,29 @@ select hex(c1) as h, c1 from t1 order by c1, h; select group_concat(hex(c1) order by hex(c1)) from t1 group by c1; select group_concat(c1 order by hex(c1) SEPARATOR '') from t1 group by c1; drop table t1; + + +-- echo The following tests check that two-byte collation IDs work +# The file ../std-data/Index.xml has a number of collations with high IDs. + +# Test that the "ID" column in I_S and SHOW queries can handle two bytes +select * from information_schema.collations where id>256 order by id; +show collation like '%test%'; + +# Test that two-byte collation ID is correctly transfered to the client side. +show collation like 'ucs2_vn_ci'; +create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); +insert into t1 values (0x0061); +--enable_metadata +set @@character_set_results=NULL; +select * from t1; +--disable_metadata +drop table t1; + +# +# Check maximum collation ID (2047 as of MySQL-6.0.9) +# +CREATE TABLE t1 (s1 char(10) character set utf8 collate utf8_maxuserid_ci); +INSERT INTO t1 VALUES ('a'),('b'); +SELECT * FROM t1 WHERE s1='a' ORDER BY BINARY s1; +DROP TABLE t1; diff --git a/mysys/charset.c b/mysys/charset.c index 214ca170757..280b2ad6091 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -42,7 +42,7 @@ get_collation_number_internal(const char *name) { CHARSET_INFO **cs; for (cs= all_charsets; - cs < all_charsets+array_elements(all_charsets)-1 ; + cs < all_charsets + array_elements(all_charsets); cs++) { if ( cs[0] && cs[0]->name && @@ -387,7 +387,7 @@ char *get_charsets_dir(char *buf) DBUG_RETURN(res); } -CHARSET_INFO *all_charsets[256]={NULL}; +CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]={NULL}; CHARSET_INFO *default_charset_info = &my_charset_latin1; void add_compiled_collation(CHARSET_INFO *cs) @@ -429,7 +429,7 @@ static my_bool init_available_charsets(myf myflags) /* Copy compiled charsets */ for (cs=all_charsets; - cs < all_charsets+array_elements(all_charsets)-1 ; + cs < all_charsets + array_elements(all_charsets); cs++) { if (*cs) @@ -469,7 +469,7 @@ uint get_charset_number(const char *charset_name, uint cs_flags) init_available_charsets(MYF(0)); for (cs= all_charsets; - cs < all_charsets+array_elements(all_charsets)-1 ; + cs < all_charsets + array_elements(all_charsets); cs++) { if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) && @@ -543,7 +543,7 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags) (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ - if (!cs_number || cs_number >= array_elements(all_charsets)-1) + if (!cs_number || cs_number > array_elements(all_charsets)) return NULL; cs=get_internal_charset(cs_number, flags); diff --git a/sql/sql_show.cc b/sql/sql_show.cc index b16f050dea6..074106dcc44 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -3899,7 +3899,9 @@ int fill_schema_charsets(THD *thd, TABLE_LIST *tables, COND *cond) TABLE *table= tables->table; CHARSET_INFO *scs= system_charset_info; - for (cs= all_charsets ; cs < all_charsets+255 ; cs++) + for (cs= all_charsets ; + cs < all_charsets + array_elements(all_charsets) ; + cs++) { CHARSET_INFO *tmp_cs= cs[0]; if (tmp_cs && (tmp_cs->state & MY_CS_PRIMARY) && @@ -4004,7 +4006,9 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) const char *wild= thd->lex->wild ? thd->lex->wild->ptr() : NullS; TABLE *table= tables->table; CHARSET_INFO *scs= system_charset_info; - for (cs= all_charsets ; cs < all_charsets+255 ; cs++ ) + for (cs= all_charsets ; + cs < all_charsets + array_elements(all_charsets) ; + cs++ ) { CHARSET_INFO **cl; CHARSET_INFO *tmp_cs= cs[0]; @@ -4012,7 +4016,9 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond) (tmp_cs->state & MY_CS_HIDDEN) || !(tmp_cs->state & MY_CS_PRIMARY)) continue; - for (cl= all_charsets; cl < all_charsets+255 ;cl ++) + for (cl= all_charsets; + cl < all_charsets + array_elements(all_charsets) ; + cl ++) { CHARSET_INFO *tmp_cl= cl[0]; if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) || @@ -4045,17 +4051,22 @@ int fill_schema_coll_charset_app(THD *thd, TABLE_LIST *tables, COND *cond) CHARSET_INFO **cs; TABLE *table= tables->table; CHARSET_INFO *scs= system_charset_info; - for (cs= all_charsets ; cs < all_charsets+255 ; cs++ ) + for (cs= all_charsets ; + cs < all_charsets + array_elements(all_charsets) ; + cs++ ) { CHARSET_INFO **cl; CHARSET_INFO *tmp_cs= cs[0]; if (!tmp_cs || !(tmp_cs->state & MY_CS_AVAILABLE) || !(tmp_cs->state & MY_CS_PRIMARY)) continue; - for (cl= all_charsets; cl < all_charsets+255 ;cl ++) + for (cl= all_charsets; + cl < all_charsets + array_elements(all_charsets) ; + cl ++) { CHARSET_INFO *tmp_cl= cl[0]; - if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) || + if (!tmp_cl || !(tmp_cl->state & MY_CS_AVAILABLE) || + (tmp_cl->state & MY_CS_HIDDEN) || !my_charset_same(tmp_cs,tmp_cl)) continue; restore_record(table, s->default_values); diff --git a/sql/table.cc b/sql/table.cc index d71a3ecd9bb..65d46eade8c 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -725,7 +725,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, { share->avg_row_length= uint4korr(head+34); share->row_type= (row_type) head[40]; - share->table_charset= get_charset((uint) head[38],MYF(0)); + share->table_charset= get_charset((((uint) head[41]) << 8) + + (uint) head[38],MYF(0)); share->null_field_first= 1; } if (!share->table_charset) @@ -1184,12 +1185,13 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head, } else { - if (!strpos[14]) + uint csid= strpos[14] + (((uint) strpos[11]) << 8); + if (!csid) charset= &my_charset_bin; - else if (!(charset=get_charset((uint) strpos[14], MYF(0)))) + else if (!(charset= get_charset(csid, MYF(0)))) { error= 5; // Unknown or unavailable charset - errarg= (int) strpos[14]; + errarg= (int) csid; goto err; } } @@ -2457,8 +2459,7 @@ File create_frm(THD *thd, const char *name, const char *db, if ((file= my_create(name, CREATE_MODE, create_flags, MYF(0))) >= 0) { - uint key_length, tmp_key_length; - uint tmp; + uint key_length, tmp_key_length, tmp, csid; bzero((char*) fileinfo,64); /* header */ fileinfo[0]=(uchar) 254; @@ -2498,8 +2499,9 @@ File create_frm(THD *thd, const char *name, const char *db, fileinfo[32]=0; // No filename anymore fileinfo[33]=5; // Mark for 5.0 frm file int4store(fileinfo+34,create_info->avg_row_length); - fileinfo[38]= (create_info->default_table_charset ? - create_info->default_table_charset->number : 0); + csid= (create_info->default_table_charset ? + create_info->default_table_charset->number : 0); + fileinfo[38]= (uchar) csid; /* In future versions, we will store in fileinfo[39] the values of the TRANSACTIONAL and PAGE_CHECKSUM clauses of CREATE TABLE. @@ -2507,7 +2509,7 @@ File create_frm(THD *thd, const char *name, const char *db, fileinfo[39]= 0; fileinfo[40]= (uchar) create_info->row_type; /* Next few bytes where for RAID support */ - fileinfo[41]= 0; + fileinfo[41]= (uchar) (csid >> 8); fileinfo[42]= 0; fileinfo[43]= 0; fileinfo[44]= 0; diff --git a/sql/unireg.cc b/sql/unireg.cc index 18b0786190a..f08c64a3182 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -801,20 +801,27 @@ static bool pack_fields(File file, List<Create_field> &create_fields, recpos= field->offset+1 + (uint) data_offset; int3store(buff+5,recpos); int2store(buff+8,field->pack_flag); - int2store(buff+10,field->unireg_check); + DBUG_ASSERT(field->unireg_check < 256); + buff[10]= (uchar) field->unireg_check; buff[12]= (uchar) field->interval_id; buff[13]= (uchar) field->sql_type; if (field->sql_type == MYSQL_TYPE_GEOMETRY) { + buff[11]= 0; buff[14]= (uchar) field->geom_type; #ifndef HAVE_SPATIAL DBUG_ASSERT(0); // Should newer happen #endif } else if (field->charset) + { + buff[11]= (uchar) (field->charset->number >> 8); buff[14]= (uchar) field->charset->number; + } else - buff[14]= 0; // Numerical + { + buff[11]= buff[14]= 0; // Numerical + } int2store(buff+15, field->comment.length); comment_length+= field->comment.length; set_if_bigger(int_count,field->interval_id); diff --git a/storage/myisam/ft_static.c b/storage/myisam/ft_static.c index 1b6c7458fd7..16cbfa1dada 100644 --- a/storage/myisam/ft_static.c +++ b/storage/myisam/ft_static.c @@ -30,8 +30,8 @@ const HA_KEYSEG ft_keysegs[FT_SEGS]={ 0, /* Bit pos */ HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */ HA_FT_MAXBYTELEN, /* length */ - HA_KEYTYPE_VARTEXT2, /* type */ 63, /* language (will be overwritten) */ + HA_KEYTYPE_VARTEXT2, /* type */ 0, /* null_bit */ 2, 0, 0 /* bit_start, bit_end, bit_length */ }, @@ -41,7 +41,7 @@ const HA_KEYSEG ft_keysegs[FT_SEGS]={ be packed in any way, otherwise w_search() won't be able to update key entry 'in vivo' */ - 0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, HA_FT_WTYPE, 63, 0, 0, 0, 0 + 0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 63, HA_FT_WTYPE, 0, 0, 0, 0 } }; diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index e18146f2357..9117d76de24 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -1130,10 +1130,10 @@ int mi_keyseg_write(File file, const HA_KEYSEG *keyseg) ulong pos; *ptr++= keyseg->type; - *ptr++= keyseg->language; + *ptr++= keyseg->language & 0xFF; /* Collation ID, low byte */ *ptr++= keyseg->null_bit; *ptr++= keyseg->bit_start; - *ptr++= keyseg->bit_end; + *ptr++= keyseg->language >> 8; /* Collation ID, high byte */ *ptr++= keyseg->bit_length; mi_int2store(ptr,keyseg->flag); ptr+=2; mi_int2store(ptr,keyseg->length); ptr+=2; @@ -1152,12 +1152,13 @@ uchar *mi_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg) keyseg->language = *ptr++; keyseg->null_bit = *ptr++; keyseg->bit_start = *ptr++; - keyseg->bit_end = *ptr++; + keyseg->language += ((uint16) (*ptr++)) << 8; keyseg->bit_length = *ptr++; keyseg->flag = mi_uint2korr(ptr); ptr +=2; keyseg->length = mi_uint2korr(ptr); ptr +=2; keyseg->start = mi_uint4korr(ptr); ptr +=4; keyseg->null_pos = mi_uint4korr(ptr); ptr +=4; + keyseg->bit_end= 0; keyseg->charset=0; /* Will be filled in later */ if (keyseg->null_bit) /* We adjust bit_pos if null_bit is last in the byte */ diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c index 9f1ed9b2441..73ade93f6e1 100644 --- a/strings/conf_to_src.c +++ b/strings/conf_to_src.c @@ -23,7 +23,7 @@ #define ROW16_LEN 8 #define MAX_BUF 64*1024 -static CHARSET_INFO all_charsets[256]; +static CHARSET_INFO all_charsets[512]; void @@ -63,7 +63,9 @@ print_array16(FILE *f, const char *set, const char *name, uint16 *a, int n) static int get_charset_number(const char *charset_name) { CHARSET_INFO *cs; - for (cs= all_charsets; cs < all_charsets+255; ++cs) + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) { if ( cs->name && !strcmp(cs->name, charset_name)) return cs->number; @@ -288,7 +290,9 @@ main(int argc, char **argv __attribute__((unused))) sprintf(filename,"%s/%s",argv[1],"Index.xml"); my_read_charset_file(filename); - for (cs=all_charsets; cs < all_charsets+256; cs++) + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) { if (cs->number && !(cs->state & MY_CS_COMPILED)) { @@ -313,7 +317,9 @@ main(int argc, char **argv __attribute__((unused))) fprintf(f,"#include <m_ctype.h>\n\n"); - for (cs=all_charsets; cs < all_charsets+256; cs++) + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) { if (simple_cs_is_full(cs)) { @@ -330,7 +336,9 @@ main(int argc, char **argv __attribute__((unused))) } fprintf(f,"CHARSET_INFO compiled_charsets[] = {\n"); - for (cs=all_charsets; cs < all_charsets+256; cs++) + for (cs= all_charsets; + cs < all_charsets + array_elements(all_charsets); + cs++) { if (simple_cs_is_full(cs)) { |