diff options
Diffstat (limited to 'mysys/charset.c')
-rw-r--r-- | mysys/charset.c | 532 |
1 files changed, 532 insertions, 0 deletions
diff --git a/mysys/charset.c b/mysys/charset.c new file mode 100644 index 00000000000..88b0972431e --- /dev/null +++ b/mysys/charset.c @@ -0,0 +1,532 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + MA 02111-1307, USA */ + +#include "mysys_priv.h" +#include "mysys_err.h" +#include <m_ctype.h> +#include <m_string.h> +#include <my_dir.h> + +const char *charsets_dir = NULL; +static DYNAMIC_ARRAY cs_info_table; +static TYPELIB available_charsets; +static int charset_initialized=0; + +#define MAX_LINE 1024 + +#define CTYPE_TABLE_SIZE 257 +#define TO_LOWER_TABLE_SIZE 256 +#define TO_UPPER_TABLE_SIZE 256 +#define SORT_ORDER_TABLE_SIZE 256 + +struct simpleconfig_buf_st { + FILE *f; + char buf[MAX_LINE]; + char *p; +}; + +/* Defined in strings/ctype.c */ + +CHARSET_INFO *find_compiled_charset(uint cs_number); +uint compiled_charset_number(const char *name); +const char *compiled_charset_name(uint charset_number); + + +static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf) +{ + char *endptr=fb->p; + + for (;;) + { + while (isspace(*endptr)) + ++endptr; + if (*endptr && *endptr != '#') /* Not comment */ + break; /* Found something */ + if ((fgets(fb->buf, sizeof(fb->buf), fb->f)) == NULL) + return TRUE; /* end of file */ + endptr = fb->buf; + } + + while (!isspace(*endptr)) + *buf++= *endptr++; + *buf=0; + fb->p = endptr; + + return FALSE; +} + + +static char *get_charsets_dir(char *buf) +{ + const char *sharedir = SHAREDIR; + DBUG_ENTER("get_charsets_dir"); + + if (charsets_dir != NULL) + strnmov(buf, charsets_dir, FN_REFLEN); + else + { + if (test_if_hard_path(sharedir) || + is_prefix(sharedir, DEFAULT_CHARSET_HOME)) + strxmov(buf, sharedir, "/", CHARSET_DIR, NullS); + else + strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, + NullS); + } + convert_dirname(buf); + DBUG_PRINT("info",("charsets dir='%s'", buf)); + DBUG_RETURN(strend(buf)); +} + + +static my_bool read_charset_index(TYPELIB *charsets, myf myflags) +{ + struct simpleconfig_buf_st fb; + char buf[MAX_LINE]; + DYNAMIC_ARRAY cs; + my_string s; + + strmov(get_charsets_dir(buf), "Index"); + + if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) + return TRUE; + fb.buf[0] = '\0'; + fb.p = fb.buf; + + if (init_dynamic_array(&cs, sizeof(my_string), 32, 32)) + return TRUE; + + while (!get_word(&fb, buf)) + { + uint length; + if (!(s= (char*) my_once_alloc(length=strlen(buf)+1, myflags))) + { + my_fclose(fb.f,myflags); + return TRUE; + } + memcpy(s,buf,length); + insert_dynamic(&cs, (gptr) &s); + } + my_fclose(fb.f,myflags); + + /* I seriously doubt this is the best way to initialize this + * TYPELIB from the Index file. But it's the best way I could + * come up with right now. */ + + charsets->count = cs.elements; + charsets->name = ""; + if (!(charsets->type_names = + (const char **) my_once_alloc((cs.elements + 1) * sizeof(const char *), + myflags))) + return TRUE; + /* unwarranted chumminess with dynamic_array implementation? */ + memcpy((char*) charsets->type_names, cs.buffer, + cs.elements * sizeof(my_string *)); + charsets->type_names[cs.elements] = NullS; + delete_dynamic(&cs); + + return FALSE; +} + + +static my_bool init_available_charsets(myf myflags) +{ + my_bool error=0; + /* + We have to use charset_initialized to not lock on THR_LOCK_charset + inside get_internal_charset... + */ + if (!charset_initialized) + { + /* + To make things thread safe we are not allowing other threads to interfere + while we may changing the cs_info_table + */ + pthread_mutex_lock(&THR_LOCK_charset); + if (!cs_info_table.buffer) /* If not initialized */ + { + init_dynamic_array(&cs_info_table, sizeof(CHARSET_INFO*), 16, 8); + error = read_charset_index(&available_charsets, myflags); + } + charset_initialized=1; + pthread_mutex_unlock(&THR_LOCK_charset); + } + return error || available_charsets.count == 0; +} + + +void free_charsets(void) +{ + delete_dynamic(&cs_info_table); +} + + +static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb) +{ + char buf[MAX_LINE]; + while (sz--) + { + if (get_word(fb, buf)) + { + DBUG_PRINT("error",("get_word failed, expecting %d more words", sz + 1)); + return 1; + } + *array++ = (uchar) strtol(buf, NULL, 16); + } + return 0; +} + + +static void get_charset_conf_name(uint cs_number, char *buf) +{ + strxmov(get_charsets_dir(buf), + get_type(&available_charsets, cs_number - 1), ".conf", NullS); +} + + +static my_bool read_charset_file(uint cs_number, CHARSET_INFO *set, + myf myflags) +{ + struct simpleconfig_buf_st fb; + char buf[FN_REFLEN]; + my_bool result; + DBUG_ENTER("read_charset_file"); + DBUG_PRINT("enter",("cs_number: %d", cs_number)); + + if (cs_number <= 0) + DBUG_RETURN(TRUE); + + get_charset_conf_name(cs_number, buf); + DBUG_PRINT("info",("file name: %s", buf)); + + if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) + DBUG_RETURN(TRUE); + + fb.buf[0] = '\0'; /* Init for get_word */ + fb.p = fb.buf; + + result=FALSE; + if (fill_array(set->ctype, CTYPE_TABLE_SIZE, &fb) || + fill_array(set->to_lower, TO_LOWER_TABLE_SIZE, &fb) || + fill_array(set->to_upper, TO_UPPER_TABLE_SIZE, &fb) || + fill_array(set->sort_order, SORT_ORDER_TABLE_SIZE, &fb)) + result=TRUE; + + my_fclose(fb.f, MYF(0)); + DBUG_RETURN(result); +} + + +uint get_charset_number(const char *charset_name) +{ + my_bool error; + error = init_available_charsets(MYF(0)); /* If it isn't initialized */ + if (error) + return compiled_charset_number(charset_name); + else + return find_type((char*)charset_name, &available_charsets, 1); +} + +const char *get_charset_name(uint charset_number) +{ + my_bool error; + error = init_available_charsets(MYF(0)); /* If it isn't initialized */ + if (error) + return compiled_charset_name(charset_number); + else + return get_type(&available_charsets, charset_number - 1); +} + + +static CHARSET_INFO *find_charset(CHARSET_INFO **table, uint cs_number, + size_t tablesz) +{ + uint i; + for (i = 0; i < tablesz; ++i) + if (table[i]->number == cs_number) + return table[i]; + return NULL; +} + +static CHARSET_INFO *find_charset_by_name(CHARSET_INFO **table, const char *name, + size_t tablesz) +{ + uint i; + for (i = 0; i < tablesz; ++i) + if (!strcmp(table[i]->name,name)) + return table[i]; + return NULL; +} + +static CHARSET_INFO *add_charset(uint cs_number, const char *cs_name) +{ + CHARSET_INFO tmp_cs,*cs; + uchar tmp_ctype[CTYPE_TABLE_SIZE]; + uchar tmp_to_lower[TO_LOWER_TABLE_SIZE]; + uchar tmp_to_upper[TO_UPPER_TABLE_SIZE]; + uchar tmp_sort_order[SORT_ORDER_TABLE_SIZE]; + + /* Don't allocate memory if we are not sure we can find the char set */ + cs= &tmp_cs; + bzero((char*) cs, sizeof(*cs)); + cs->ctype=tmp_ctype; + cs->to_lower=tmp_to_lower; + cs->to_upper=tmp_to_upper; + cs->sort_order=tmp_sort_order; + if (read_charset_file(cs_number, cs, MYF(MY_WME))) + return NULL; + + cs = (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO), + MYF(MY_WME)); + *cs=tmp_cs; + cs->name = (char *) my_once_alloc(strlen(cs_name) + 1, MYF(MY_WME)); + cs->ctype = (uchar*) my_once_alloc(CTYPE_TABLE_SIZE, MYF(MY_WME)); + cs->to_lower = (uchar*) my_once_alloc(TO_LOWER_TABLE_SIZE, MYF(MY_WME)); + cs->to_upper = (uchar*) my_once_alloc(TO_UPPER_TABLE_SIZE, MYF(MY_WME)); + cs->sort_order=(uchar*) my_once_alloc(SORT_ORDER_TABLE_SIZE, MYF(MY_WME)); + cs->number = cs_number; + memcpy((char*) cs->name, (char*) cs_name, strlen(cs_name) + 1); + memcpy((char*) cs->ctype, (char*) tmp_ctype, sizeof(tmp_ctype)); + memcpy((char*) cs->to_lower, (char*) tmp_to_lower, sizeof(tmp_to_lower)); + memcpy((char*) cs->to_upper, (char*) tmp_to_upper, sizeof(tmp_to_upper)); + memcpy((char*) cs->sort_order, (char*) tmp_sort_order, + sizeof(tmp_sort_order)); + insert_dynamic(&cs_info_table, (gptr) &cs); + return cs; +} + +static CHARSET_INFO *get_internal_charset(uint cs_number) +{ + CHARSET_INFO *cs; + /* + To make things thread safe we are not allowing other threads to interfere + while we may changing the cs_info_table + */ + pthread_mutex_lock(&THR_LOCK_charset); + if (!(cs = find_charset((CHARSET_INFO**) cs_info_table.buffer, cs_number, + cs_info_table.elements))) + if (!(cs = find_compiled_charset(cs_number))) + cs=add_charset(cs_number, get_charset_name(cs_number)); + pthread_mutex_unlock(&THR_LOCK_charset); + return cs; +} + + +static CHARSET_INFO *get_internal_charset_by_name(const char *name) +{ + CHARSET_INFO *cs; + /* + To make things thread safe we are not allowing other threads to interfere + while we may changing the cs_info_table + */ + pthread_mutex_lock(&THR_LOCK_charset); + if (!(cs = find_charset_by_name((CHARSET_INFO**) cs_info_table.buffer, name, + cs_info_table.elements))) + if (!(cs = find_compiled_charset_by_name(name))) + cs=add_charset(get_charset_number(name), name); + pthread_mutex_unlock(&THR_LOCK_charset); + return cs; +} + + +CHARSET_INFO *get_charset(uint cs_number, myf flags) +{ + CHARSET_INFO *cs; + (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ + cs=get_internal_charset(cs_number); + + if (!cs && flags & MY_WME) + { + char index_file[FN_REFLEN], cs_string[23]; + strmov(get_charsets_dir(index_file), "Index"); + cs_string[0]='#'; + int10_to_str(cs_number, cs_string+1, 10); + my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); + } + return cs; +} + +my_bool set_default_charset(uint cs, myf flags) +{ + CHARSET_INFO *new; + DBUG_ENTER("set_default_charset"); + DBUG_PRINT("enter",("character set: %d",(int) cs)); + new = get_charset(cs, flags); + if (!new) + { + DBUG_PRINT("error",("Couldn't set default character set")); + DBUG_RETURN(TRUE); /* error */ + } + default_charset_info = new; + DBUG_RETURN(FALSE); +} + +CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) +{ + CHARSET_INFO *cs; + (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ + cs=get_internal_charset_by_name(cs_name); + + if (!cs && (flags & MY_WME)) + { + char index_file[FN_REFLEN]; + strmov(get_charsets_dir(index_file), "Index"); + my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); + } + + return cs; +} + +my_bool set_default_charset_by_name(const char *cs_name, myf flags) +{ + CHARSET_INFO *new; + DBUG_ENTER("set_default_charset_by_name"); + DBUG_PRINT("enter",("character set: %s", cs_name)); + new = get_charset_by_name(cs_name, flags); + if (!new) + { + DBUG_PRINT("error",("Couldn't set default character set")); + DBUG_RETURN(TRUE); /* error */ + } + + default_charset_info = new; + DBUG_RETURN(FALSE); +} + +/* Only append name if it doesn't exist from before */ + +static my_bool charset_in_string(const char *name, DYNAMIC_STRING *s) +{ + uint length=strlen(name); + const char *pos; + for (pos=s->str ; (pos=strstr(pos,name)) ; pos++) + { + if (! pos[length] || pos[length] == ' ') + return TRUE; /* Already existed */ + } + + return FALSE; +} + +static void charset_append(DYNAMIC_STRING *s, const char *name) +{ + if (!charset_in_string(name, s)) { + dynstr_append(s, name); + dynstr_append(s, " "); + } +} + + +/* Returns a dynamically-allocated string listing the character sets + requested. The caller is responsible for freeing the memory. */ + +char * list_charsets(myf want_flags) +{ + DYNAMIC_STRING s; + char *p; + + init_dynamic_string(&s, NullS, 256, 1024); + + if (want_flags & MY_COMPILED_SETS) + { + CHARSET_INFO *cs; + for (cs = compiled_charsets; cs->number > 0; cs++) + { + dynstr_append(&s, cs->name); + dynstr_append(&s, " "); + } + } + + if (want_flags & MY_CONFIG_SETS) + { + uint i; + const char *cs_name; + char buf[FN_REFLEN]; + MY_STAT stat; + + for (i = 0; i < available_charsets.count; i++) + { + cs_name = get_type(&available_charsets, i); + if (charset_in_string(cs_name, &s)) + continue; + get_charset_conf_name(i + 1, buf); + if (!my_stat(buf, &stat, MYF(0))) + continue; /* conf file doesn't exist */ + dynstr_append(&s, cs_name); + dynstr_append(&s, " "); + } + } + + if (want_flags & MY_INDEX_SETS) + { + uint i; + for (i = 0; i < available_charsets.count; i++) + charset_append(&s, get_type(&available_charsets, i)); + } + + if (want_flags & MY_LOADED_SETS) + { + uint i; + for (i = 0; i < cs_info_table.elements; i++) + charset_append(&s, + dynamic_element(&cs_info_table, i, CHARSET_INFO *)->name); + } + s.str[s.length - 1] = '\0'; /* chop trailing space */ + p = my_strdup(s.str, MYF(MY_WME)); + dynstr_free(&s); + + return p; +} + +/**************************************************************************** +* Code for debugging. +****************************************************************************/ + + +static void _print_array(uint8 *data, uint size) +{ + uint i; + for (i = 0; i < size; ++i) + { + if (i == 0 || i % 16 == size % 16) printf(" "); + printf(" %02x", data[i]); + if ((i+1) % 16 == size % 16) printf("\n"); + } +} + +/* _print_csinfo is called from test_charset.c */ +void _print_csinfo(CHARSET_INFO *cs) +{ + printf("%s #%d\n", cs->name, cs->number); + printf("ctype:\n"); _print_array(cs->ctype, 257); + printf("to_lower:\n"); _print_array(cs->to_lower, 256); + printf("to_upper:\n"); _print_array(cs->to_upper, 256); + printf("sort_order:\n"); _print_array(cs->sort_order, 256); + printf("collate: %3s (%d, %p, %p, %p, %p, %p)\n", + cs->strxfrm_multiply ? "yes" : "no", + cs->strxfrm_multiply, + cs->strcoll, + cs->strxfrm, + cs->strnncoll, + cs->strnxfrm, + cs->like_range); + printf("multi-byte: %3s (%d, %p, %p, %p)\n", + cs->mbmaxlen ? "yes" : "no", + cs->mbmaxlen, + cs->ismbchar, + cs->ismbhead, + cs->mbcharlen); +} |