diff options
Diffstat (limited to 'mysys/charset.c')
-rw-r--r-- | mysys/charset.c | 450 |
1 files changed, 246 insertions, 204 deletions
diff --git a/mysys/charset.c b/mysys/charset.c index 1001b76f417..691a4233269 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -20,15 +20,10 @@ #include <m_string.h> #include <my_dir.h> -typedef struct cs_id_st { - char *name; - uint number; -} CS_ID; const char *charsets_dir = NULL; -static DYNAMIC_ARRAY cs_info_table; -static CS_ID **available_charsets; static int charset_initialized=0; +CHARSET_INFO all_charsets[256]; #define MAX_LINE 1024 @@ -36,6 +31,7 @@ static int charset_initialized=0; #define TO_LOWER_TABLE_SIZE 256 #define TO_UPPER_TABLE_SIZE 256 #define SORT_ORDER_TABLE_SIZE 256 +#define TO_UNI_TABLE_SIZE 256 struct simpleconfig_buf_st { FILE *f; @@ -43,24 +39,6 @@ struct simpleconfig_buf_st { char *p; }; -static uint num_from_csname(CS_ID **cs, const char *name) -{ - CS_ID **c; - for (c = cs; *c; ++c) - if (!strcmp((*c)->name, name)) - return (*c)->number; - return 0; /* this mimics find_type() */ -} - -static char *name_from_csnum(CS_ID **cs, uint number) -{ - CS_ID **c; - if(cs) - for (c = cs; *c; ++c) - if ((*c)->number == number) - return (*c)->name; - return (char*) "?"; /* this mimics find_type() */ -} static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf) { @@ -68,7 +46,7 @@ static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf) for (;;) { - while (isspace(*endptr)) + while (my_isspace(system_charset_info, *endptr)) ++endptr; if (*endptr && *endptr != '#') /* Not comment */ break; /* Found something */ @@ -77,7 +55,7 @@ static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf) endptr = fb->buf; } - while (!isspace(*endptr)) + while (!my_isspace(system_charset_info, *endptr)) *buf++= *endptr++; *buf=0; fb->p = endptr; @@ -108,13 +86,11 @@ char *get_charsets_dir(char *buf) } -static my_bool read_charset_index(CS_ID ***charsets, myf myflags) +static my_bool read_charset_index(myf myflags) { struct simpleconfig_buf_st fb; char buf[MAX_LINE], num_buf[MAX_LINE]; - DYNAMIC_ARRAY cs; - CS_ID *csid; - + strmov(get_charsets_dir(buf), "Index"); if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) @@ -122,13 +98,12 @@ static my_bool read_charset_index(CS_ID ***charsets, myf myflags) fb.buf[0] = '\0'; fb.p = fb.buf; - if (my_init_dynamic_array(&cs, sizeof(CS_ID *), 32, 32)) - return TRUE; - + while (!get_word(&fb, buf) && !get_word(&fb, num_buf)) { uint csnum; uint length; + CHARSET_INFO *cs; if (!(csnum = atoi(num_buf))) { @@ -137,64 +112,76 @@ static my_bool read_charset_index(CS_ID ***charsets, myf myflags) return TRUE; } - if (!(csid = (CS_ID*) my_once_alloc(sizeof(CS_ID), myflags)) || - !(csid->name= + cs=&all_charsets[csnum]; + + if (!(cs->name= (char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags))) { my_fclose(fb.f,myflags); return TRUE; } - memcpy(csid->name,buf,length); - csid->number = csnum; - - insert_dynamic(&cs, (gptr) &csid); + memcpy((char*)cs->name,buf,length); + cs->number=csnum; } my_fclose(fb.f,myflags); - - if (!(*charsets = - (CS_ID **) my_once_alloc((cs.elements + 1) * sizeof(CS_ID *), myflags))) - return TRUE; - /* unwarranted chumminess with dynamic_array implementation? */ - memcpy((byte *) *charsets, cs.buffer, cs.elements * sizeof(CS_ID *)); - (*charsets)[cs.elements] = NULL; - delete_dynamic(&cs); - return FALSE; } +static void set_max_sort_char(CHARSET_INFO *cs) +{ + uchar max_char; + uint i; + + if (!cs->sort_order) + return; + + max_char=cs->sort_order[(uchar) cs->max_sort_char]; + for (i = 0; i < 256; i++) + { + if ((uchar) cs->sort_order[i] > max_char) + { + max_char=(uchar) cs->sort_order[i]; + cs->max_sort_char= (char) i; + } + } +} static my_bool init_available_charsets(myf myflags) { - my_bool error=0; + my_bool error=FALSE; /* We have to use charset_initialized to not lock on THR_LOCK_charset inside get_internal_charset... */ if (!charset_initialized) { + CHARSET_INFO *cs; /* To make things thread safe we are not allowing other threads to interfere while we may changing the cs_info_table */ pthread_mutex_lock(&THR_LOCK_charset); - if (!cs_info_table.buffer) /* If not initialized */ + + bzero(&all_charsets,sizeof(all_charsets)); + + /* Copy compiled charsets */ + + for (cs=compiled_charsets; cs->name; cs++) { - my_init_dynamic_array(&cs_info_table, sizeof(CHARSET_INFO*), 16, 8); - error = read_charset_index(&available_charsets, myflags); + all_charsets[cs->number]=cs[0]; + set_max_sort_char(&all_charsets[cs->number]); } + error = read_charset_index(myflags); charset_initialized=1; pthread_mutex_unlock(&THR_LOCK_charset); } - if(!available_charsets || !available_charsets[0]) - error = TRUE; return error; } void free_charsets(void) { - delete_dynamic(&cs_info_table); charset_initialized=0; } @@ -214,27 +201,123 @@ static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb) return 0; } +static my_bool fill_uint16_array(uint16 *array, int sz, struct simpleconfig_buf_st *fb) +{ + char buf[MAX_LINE]; + while (sz--) + { + if (get_word(fb, buf)) + { + DBUG_PRINT("error",("get_word failed, expecting %d more words", sz + 1)); + return 1; + } + *array++ = (uint16) strtol(buf, NULL, 16); + } + return 0; +} + -static void get_charset_conf_name(uint cs_number, char *buf) +static void get_charset_conf_name(const char *cs_name, char *buf) { - strxmov(get_charsets_dir(buf), - name_from_csnum(available_charsets, cs_number), ".conf", NullS); + strxmov(get_charsets_dir(buf), cs_name, ".conf", NullS); } +typedef struct { + int nchars; + MY_UNI_IDX uidx; +} uni_idx; + +#define PLANE_SIZE 0x100 +#define PLANE_NUM 0x100 +#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM) + +static int pcmp(const void * f, const void * s) +{ + const uni_idx *F=(const uni_idx*)f; + const uni_idx *S=(const uni_idx*)s; + int res; + + if(!(res=((S->nchars)-(F->nchars)))) + res=((F->uidx.from)-(S->uidx.to)); + return res; +} -static my_bool read_charset_file(uint cs_number, CHARSET_INFO *set, +static my_bool create_fromuni(CHARSET_INFO *cs){ + uni_idx idx[PLANE_NUM]; + int i,n; + + /* Clear plane statistics */ + bzero(idx,sizeof(idx)); + + /* Count number of characters in each plane */ + for(i=0;i<0x100;i++) + { + uint16 wc=cs->tab_to_uni[i]; + int pl= PLANE_NUMBER(wc); + + if(wc || !i) + { + if(!idx[pl].nchars) + { + idx[pl].uidx.from=wc; + idx[pl].uidx.to=wc; + }else + { + idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from; + idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to; + } + idx[pl].nchars++; + } + } + + /* Sort planes in descending order */ + qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp); + + for(i=0;i<PLANE_NUM;i++) + { + int ch,numchars; + + /* Skip empty plane */ + if(!idx[i].nchars) + break; + + numchars=idx[i].uidx.to-idx[i].uidx.from+1; + idx[i].uidx.tab=(unsigned char*)my_once_alloc(numchars*sizeof(*idx[i].uidx.tab),MYF(MY_WME)); + bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab)); + + for(ch=1;ch<PLANE_SIZE;ch++) + { + uint16 wc=cs->tab_to_uni[ch]; + if(wc>=idx[i].uidx.from && wc<=idx[i].uidx.to && wc) + { + int ofs=wc-idx[i].uidx.from; + idx[i].uidx.tab[ofs]=ch; + } + } + } + + /* Allocate and fill reverse table for each plane */ + n=i; + cs->tab_from_uni=(MY_UNI_IDX*)my_once_alloc(sizeof(MY_UNI_IDX)*(n+1),MYF(MY_WME)); + for(i=0;i<n;i++) + cs->tab_from_uni[i]=idx[i].uidx; + + /* Set end-of-list marker */ + bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX)); + return FALSE; +} + + +static my_bool read_charset_file(const char *cs_name, CHARSET_INFO *set, myf myflags) { struct simpleconfig_buf_st fb; char buf[FN_REFLEN]; my_bool result; DBUG_ENTER("read_charset_file"); - DBUG_PRINT("enter",("cs_number: %d", cs_number)); - - if (cs_number <= 0) - DBUG_RETURN(TRUE); + DBUG_PRINT("enter",("cs_name: %s", cs_name)); - get_charset_conf_name(cs_number, buf); + get_charset_conf_name(cs_name, buf); DBUG_PRINT("info",("file name: %s", buf)); if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) @@ -247,7 +330,8 @@ static my_bool read_charset_file(uint cs_number, CHARSET_INFO *set, if (fill_array(set->ctype, CTYPE_TABLE_SIZE, &fb) || fill_array(set->to_lower, TO_LOWER_TABLE_SIZE, &fb) || fill_array(set->to_upper, TO_UPPER_TABLE_SIZE, &fb) || - fill_array(set->sort_order, SORT_ORDER_TABLE_SIZE, &fb)) + fill_array(set->sort_order, SORT_ORDER_TABLE_SIZE, &fb) || + fill_uint16_array(set->tab_to_uni,TO_UNI_TABLE_SIZE,&fb)) result=TRUE; my_fclose(fb.f, MYF(0)); @@ -255,84 +339,84 @@ static my_bool read_charset_file(uint cs_number, CHARSET_INFO *set, } -uint get_charset_number(const char *charset_name) -{ - uint number=compiled_charset_number(charset_name); - if (number) - return number; - if (init_available_charsets(MYF(0))) /* If it isn't initialized */ - return 0; - return num_from_csname(available_charsets, charset_name); -} - -const char *get_charset_name(uint charset_number) -{ - const char *name=compiled_charset_name(charset_number); - if (*name != '?') - return name; - if (init_available_charsets(MYF(0))) /* If it isn't initialized */ - return "?"; - return name_from_csnum(available_charsets, charset_number); -} - - -static CHARSET_INFO *find_charset(CHARSET_INFO **table, uint cs_number, - size_t tablesz) +static CHARSET_INFO *add_charset(uint cs_number, myf flags) { - uint i; - for (i = 0; i < tablesz; ++i) - if (table[i]->number == cs_number) - return table[i]; - return NULL; -} + CHARSET_INFO *cs; + uchar tmp_ctype[CTYPE_TABLE_SIZE]; + uchar tmp_to_lower[TO_LOWER_TABLE_SIZE]; + uchar tmp_to_upper[TO_UPPER_TABLE_SIZE]; + uchar tmp_sort_order[SORT_ORDER_TABLE_SIZE]; + uint16 tmp_to_uni[TO_UNI_TABLE_SIZE]; -static CHARSET_INFO *find_charset_by_name(CHARSET_INFO **table, - const char *name, size_t tablesz) -{ - uint i; - for (i = 0; i < tablesz; ++i) - if (!strcmp(table[i]->name,name)) - return table[i]; - return NULL; -} + /* Note: cs->name is already initialized */ + + cs=&all_charsets[cs_number]; -static CHARSET_INFO *add_charset(uint cs_number, const char *cs_name, myf flags) -{ - CHARSET_INFO tmp_cs,*cs; - uchar tmp_ctype[CTYPE_TABLE_SIZE]; - uchar tmp_to_lower[TO_LOWER_TABLE_SIZE]; - uchar tmp_to_upper[TO_UPPER_TABLE_SIZE]; - uchar tmp_sort_order[SORT_ORDER_TABLE_SIZE]; - - /* Don't allocate memory if we are not sure we can find the char set */ - cs= &tmp_cs; - bzero((char*) cs, sizeof(*cs)); cs->ctype=tmp_ctype; cs->to_lower=tmp_to_lower; cs->to_upper=tmp_to_upper; cs->sort_order=tmp_sort_order; - if (read_charset_file(cs_number, cs, flags)) + cs->tab_to_uni=tmp_to_uni; + if (read_charset_file(cs->name, cs, flags)) return NULL; - cs = (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO), - MYF(MY_WME)); - *cs=tmp_cs; - cs->name = (char *) my_once_alloc((uint) strlen(cs_name)+1, MYF(MY_WME)); cs->ctype = (uchar*) my_once_alloc(CTYPE_TABLE_SIZE, MYF(MY_WME)); cs->to_lower = (uchar*) my_once_alloc(TO_LOWER_TABLE_SIZE, MYF(MY_WME)); cs->to_upper = (uchar*) my_once_alloc(TO_UPPER_TABLE_SIZE, MYF(MY_WME)); cs->sort_order=(uchar*) my_once_alloc(SORT_ORDER_TABLE_SIZE, MYF(MY_WME)); + cs->tab_to_uni=(uint16*)my_once_alloc(TO_UNI_TABLE_SIZE*sizeof(uint16), MYF(MY_WME)); cs->number = cs_number; - memcpy((char*) cs->name, (char*) cs_name, strlen(cs_name) + 1); memcpy((char*) cs->ctype, (char*) tmp_ctype, sizeof(tmp_ctype)); memcpy((char*) cs->to_lower, (char*) tmp_to_lower, sizeof(tmp_to_lower)); memcpy((char*) cs->to_upper, (char*) tmp_to_upper, sizeof(tmp_to_upper)); memcpy((char*) cs->sort_order, (char*) tmp_sort_order, sizeof(tmp_sort_order)); - insert_dynamic(&cs_info_table, (gptr) &cs); + memcpy((char*) cs->tab_to_uni, (char*) tmp_to_uni, sizeof(tmp_to_uni)); + + cs->caseup_str = my_caseup_str_8bit; + cs->casedn_str = my_casedn_str_8bit; + cs->caseup = my_caseup_8bit; + cs->casedn = my_casedn_8bit; + cs->strcasecmp = my_strcasecmp_8bit; + cs->strncasecmp = my_strncasecmp_8bit; + cs->mb_wc = my_mb_wc_8bit; + cs->wc_mb = my_wc_mb_8bit; + + set_max_sort_char(cs); + create_fromuni(cs); + return cs; } + +uint get_charset_number(const char *charset_name) +{ + CHARSET_INFO *cs; + if (init_available_charsets(MYF(0))) /* If it isn't initialized */ + return 0; + + for (cs = all_charsets; cs < all_charsets+255; ++cs) + if ( cs->name && !strcmp(cs->name, charset_name)) + return cs->number; + + return 0; /* this mimics find_type() */ +} + + +const char *get_charset_name(uint charset_number) +{ + CHARSET_INFO *cs; + if (init_available_charsets(MYF(0))) /* If it isn't initialized */ + return "?"; + + cs=&all_charsets[charset_number]; + if ( (cs->number==charset_number) && cs->name ) + return (char*) cs->name; + + return (char*) "?"; /* this mimics find_type() */ +} + + static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) { CHARSET_INFO *cs; @@ -341,10 +425,11 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) while we may changing the cs_info_table */ pthread_mutex_lock(&THR_LOCK_charset); - if (!(cs = find_charset((CHARSET_INFO**) cs_info_table.buffer, cs_number, - cs_info_table.elements))) - if (!(cs = find_compiled_charset(cs_number))) - cs=add_charset(cs_number, get_charset_name(cs_number), flags); + + cs = &all_charsets[cs_number]; + if (!(cs->state & (MY_CS_COMPILED | MY_CS_LOADED))) + cs=add_charset(cs_number, flags); + pthread_mutex_unlock(&THR_LOCK_charset); return cs; } @@ -352,25 +437,20 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags) static CHARSET_INFO *get_internal_charset_by_name(const char *name, myf flags) { - CHARSET_INFO *cs; - /* - To make things thread safe we are not allowing other threads to interfere - while we may changing the cs_info_table - */ - pthread_mutex_lock(&THR_LOCK_charset); - if (!(cs = find_charset_by_name((CHARSET_INFO**) cs_info_table.buffer, name, - cs_info_table.elements))) - if (!(cs = find_compiled_charset_by_name(name))) - cs=add_charset(get_charset_number(name), name, flags); - pthread_mutex_unlock(&THR_LOCK_charset); - return cs; + uint cs_number=get_charset_number(name); + return cs_number ? get_internal_charset(cs_number,flags) : NULL; } + CHARSET_INFO *get_charset(uint cs_number, myf flags) { CHARSET_INFO *cs; (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ + + if (!cs_number) + return NULL; + cs=get_internal_charset(cs_number, flags); if (!cs && (flags & MY_WME)) @@ -396,6 +476,7 @@ my_bool set_default_charset(uint cs, myf flags) DBUG_RETURN(TRUE); /* error */ } default_charset_info = new_charset; + system_charset_info = new_charset; DBUG_RETURN(FALSE); } @@ -428,6 +509,7 @@ my_bool set_default_charset_by_name(const char *cs_name, myf flags) } default_charset_info = new_charset; + system_charset_info = new_charset; DBUG_RETURN(FALSE); } @@ -466,7 +548,7 @@ char * list_charsets(myf want_flags) (void)init_available_charsets(MYF(0)); init_dynamic_string(&s, NullS, 256, 1024); - if (want_flags & MY_COMPILED_SETS) + if (want_flags & MY_CS_COMPILED) { CHARSET_INFO *cs; for (cs = compiled_charsets; cs->number > 0; cs++) @@ -476,82 +558,42 @@ char * list_charsets(myf want_flags) } } - if (want_flags & MY_CONFIG_SETS) + if (want_flags & MY_CS_CONFIG) { - CS_ID **c; + CHARSET_INFO *cs; char buf[FN_REFLEN]; MY_STAT status; - if((c=available_charsets)) - for (; *c; ++c) - { - if (charset_in_string((*c)->name, &s)) - continue; - get_charset_conf_name((*c)->number, buf); - if (!my_stat(buf, &status, MYF(0))) - continue; /* conf file doesn't exist */ - dynstr_append(&s, (*c)->name); - dynstr_append(&s, " "); - } + for (cs=all_charsets; cs < all_charsets+255; cs++) + { + if (!cs->name || charset_in_string(cs->name, &s)) + continue; + get_charset_conf_name(cs->name, buf); + if (!my_stat(buf, &status, MYF(0))) + continue; /* conf file doesn't exist */ + dynstr_append(&s, cs->name); + dynstr_append(&s, " "); + } } - if (want_flags & MY_INDEX_SETS) + if (want_flags & (MY_CS_INDEX|MY_CS_LOADED)) { - CS_ID **c; - for (c = available_charsets; *c; ++c) - charset_append(&s, (*c)->name); + CHARSET_INFO *cs; + for (cs = all_charsets; cs < all_charsets + 255; cs++) + if (cs->name && (cs->state & want_flags) ) + charset_append(&s, cs->name); } - - if (want_flags & MY_LOADED_SETS) + + if (s.length) { - uint i; - for (i = 0; i < cs_info_table.elements; i++) - charset_append(&s, - dynamic_element(&cs_info_table, i, CHARSET_INFO *)->name); + s.str[s.length - 1] = '\0'; /* chop trailing space */ + p = my_strdup(s.str, MYF(MY_WME)); } - s.str[s.length - 1] = '\0'; /* chop trailing space */ - p = my_strdup(s.str, MYF(MY_WME)); - dynstr_free(&s); - - return p; -} - -/**************************************************************************** -* Code for debugging. -****************************************************************************/ - - -static void _print_array(uint8 *data, uint size) -{ - uint i; - for (i = 0; i < size; ++i) + else { - if (i == 0 || i % 16 == size % 16) printf(" "); - printf(" %02x", data[i]); - if ((i+1) % 16 == size % 16) printf("\n"); + p = my_strdup("", MYF(MY_WME)); } -} - -/* _print_csinfo is called from test_charset.c */ -void _print_csinfo(CHARSET_INFO *cs) -{ - printf("%s #%d\n", cs->name, cs->number); - printf("ctype:\n"); _print_array(cs->ctype, 257); - printf("to_lower:\n"); _print_array(cs->to_lower, 256); - printf("to_upper:\n"); _print_array(cs->to_upper, 256); - printf("sort_order:\n"); _print_array(cs->sort_order, 256); - printf("collate: %3s (%d, %p, %p, %p, %p, %p)\n", - cs->strxfrm_multiply ? "yes" : "no", - cs->strxfrm_multiply, - cs->strcoll, - cs->strxfrm, - cs->strnncoll, - cs->strnxfrm, - cs->like_range); - printf("multi-byte: %3s (%d, %p, %p, %p)\n", - cs->mbmaxlen ? "yes" : "no", - cs->mbmaxlen, - cs->ismbchar, - cs->ismbhead, - cs->mbcharlen); + dynstr_free(&s); + + return p; } |