summaryrefslogtreecommitdiff
path: root/mysys/charset.c
diff options
context:
space:
mode:
Diffstat (limited to 'mysys/charset.c')
-rw-r--r--mysys/charset.c220
1 files changed, 184 insertions, 36 deletions
diff --git a/mysys/charset.c b/mysys/charset.c
index cc1d0a0111e..ad3eb78ae0e 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -214,6 +214,8 @@ copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from)
to->max_sort_char= from->max_sort_char;
to->mbminlen= from->mbminlen;
to->mbmaxlen= from->mbmaxlen;
+ to->caseup_multiply= from->caseup_multiply;
+ to->casedn_multiply= from->casedn_multiply;
to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |
MY_CS_STRNXFRM | MY_CS_UNICODE;
}
@@ -249,7 +251,8 @@ static int add_collation(struct charset_info_st *cs)
return MY_XML_ERROR;
newcs->caseup_multiply= newcs->casedn_multiply= 1;
-
+ newcs->levels_for_order= 1;
+
if (!strcmp(cs->csname,"ucs2") )
{
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
@@ -294,6 +297,7 @@ static int add_collation(struct charset_info_st *cs)
simple_cs_init_functions(newcs);
newcs->mbminlen= 1;
newcs->mbmaxlen= 1;
+ newcs->strxfrm_multiply= 1;
if (simple_cs_is_full(newcs))
{
newcs->state |= MY_CS_LOADED;
@@ -350,13 +354,65 @@ static int add_collation(struct charset_info_st *cs)
}
+/**
+ Report character set initialization errors and warnings.
+ Be silent by default: no warnings on the client side.
+*/
+static void
+default_reporter(enum loglevel level __attribute__ ((unused)),
+ const char *format __attribute__ ((unused)),
+ ...)
+{
+}
+my_error_reporter my_charset_error_reporter= default_reporter;
+
+
+/**
+ Wrappers for memory functions my_malloc (and friends)
+ with C-compatbile API without extra "myf" argument.
+*/
+static void *
+my_once_alloc_c(size_t size)
+{ return my_once_alloc(size, MYF(MY_WME)); }
+
+
+static void *
+my_malloc_c(size_t size)
+{ return my_malloc(size, MYF(MY_WME)); }
+
+
+static void *
+my_realloc_c(void *old, size_t size)
+{ return my_realloc(old, size, MYF(MY_WME|MY_ALLOW_ZERO_PTR)); }
+
+
+/**
+ Initialize character set loader to use mysys memory management functions.
+ @param loader Loader to initialize
+*/
+void
+my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader)
+{
+ loader->error[0]= '\0';
+ loader->once_alloc= my_once_alloc_c;
+ loader->malloc= my_malloc_c;
+ loader->realloc= my_realloc_c;
+ loader->free= my_free;
+ loader->reporter= my_charset_error_reporter;
+ loader->add_collation= add_collation;
+}
+
+
#define MY_MAX_ALLOWED_BUF 1024*1024
#define MY_CHARSET_INDEX "Index.xml"
const char *charsets_dir= NULL;
-static my_bool my_read_charset_file(const char *filename, myf myflags)
+static my_bool
+my_read_charset_file(MY_CHARSET_LOADER *loader,
+ const char *filename,
+ myf myflags)
{
uchar *buf;
int fd;
@@ -375,14 +431,11 @@ static my_bool my_read_charset_file(const char *filename, myf myflags)
if (tmp_len != len)
goto error;
- if (my_parse_charset_xml((char*) buf,len,add_collation))
+ if (my_parse_charset_xml(loader, (char *) buf, len))
{
-#ifdef NOT_YET
- printf("ERROR at line %d pos %d '%s'\n",
- my_xml_error_lineno(&p)+1,
- my_xml_error_pos(&p),
- my_xml_error_string(&p));
-#endif
+ my_printf_error(EE_UNKNOWN_CHARSET, "Error while parsing '%s': %s\n",
+ MYF(0), filename, loader->error);
+ goto error;
}
my_free(buf);
@@ -426,21 +479,62 @@ void add_compiled_collation(struct charset_info_st *cs)
cs->state|= MY_CS_AVAILABLE;
}
-static void *cs_alloc(size_t size)
+
+static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
+static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
+
+typedef struct
{
- return my_once_alloc(size, MYF(MY_WME));
+ ulonglong use_count;
+} MY_COLLATION_STATISTICS;
+
+
+static MY_COLLATION_STATISTICS my_collation_statistics[MY_ALL_CHARSETS_SIZE];
+
+
+my_bool my_collation_is_known_id(uint id)
+{
+ return id > 0 && id < array_elements(all_charsets) && all_charsets[id] ?
+ TRUE : FALSE;
}
-static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
-static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
+/*
+ Collation use statistics functions do not lock
+ counters to avoid mutex contention. This can lose
+ some counter increments with high thread concurrency.
+ But this should be Ok, as we don't need exact numbers.
+*/
+static inline void my_collation_statistics_inc_use_count(uint id)
+{
+ DBUG_ASSERT(my_collation_is_known_id(id));
+ my_collation_statistics[id].use_count++;
+}
+
+
+ulonglong my_collation_statistics_get_use_count(uint id)
+{
+ DBUG_ASSERT(my_collation_is_known_id(id));
+ return my_collation_statistics[id].use_count;
+}
+
+
+const char *my_collation_get_tailoring(uint id)
+{
+ /* all_charsets[id]->tailoring is never changed after server startup. */
+ DBUG_ASSERT(my_collation_is_known_id(id));
+ return all_charsets[id]->tailoring;
+}
+
static void init_available_charsets(void)
{
char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
struct charset_info_st **cs;
+ MY_CHARSET_LOADER loader;
bzero((char*) &all_charsets,sizeof(all_charsets));
+ bzero((char*) &my_collation_statistics, sizeof(my_collation_statistics));
init_compiled_charsets(MYF(0));
/* Copy compiled charsets */
@@ -457,8 +551,9 @@ static void init_available_charsets(void)
}
}
+ my_charset_loader_init_mysys(&loader);
strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
- my_read_charset_file(fname, MYF(0));
+ my_read_charset_file(&loader, fname, MYF(0));
}
@@ -547,7 +642,8 @@ const char *get_charset_name(uint charset_number)
}
-static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
+static CHARSET_INFO *
+get_internal_charset(MY_CHARSET_LOADER *loader, uint cs_number, myf flags)
{
char buf[FN_REFLEN];
struct charset_info_st *cs;
@@ -557,7 +653,10 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
if ((cs= (struct charset_info_st*) all_charsets[cs_number]))
{
if (cs->state & MY_CS_READY) /* if CS is already initialized */
- return cs;
+ {
+ my_collation_statistics_inc_use_count(cs_number);
+ return cs;
+ }
/*
To make things thread safe we are not allowing other threads to interfere
@@ -567,20 +666,25 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */
{
+ MY_CHARSET_LOADER loader;
strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
- my_read_charset_file(buf,flags);
+ my_charset_loader_init_mysys(&loader);
+ my_read_charset_file(&loader, buf, flags);
}
if (cs->state & MY_CS_AVAILABLE)
{
if (!(cs->state & MY_CS_READY))
{
- if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
- (cs->coll->init && cs->coll->init(cs, cs_alloc)))
+ if ((cs->cset->init && cs->cset->init(cs, loader)) ||
+ (cs->coll->init && cs->coll->init(cs, loader)))
+ {
cs= NULL;
+ }
else
cs->state|= MY_CS_READY;
}
+ my_collation_statistics_inc_use_count(cs_number);
}
else
cs= NULL;
@@ -593,16 +697,19 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
CHARSET_INFO *get_charset(uint cs_number, myf flags)
{
- CHARSET_INFO *cs;
+ CHARSET_INFO *cs= NULL;
+
if (cs_number == default_charset_info->number)
return default_charset_info;
my_pthread_once(&charsets_initialized, init_available_charsets);
-
- if (cs_number >= array_elements(all_charsets))
- return NULL;
-
- cs=get_internal_charset(cs_number, flags);
+
+ if (cs_number < array_elements(all_charsets))
+ {
+ MY_CHARSET_LOADER loader;
+ my_charset_loader_init_mysys(&loader);
+ cs= get_internal_charset(&loader, cs_number, flags);
+ }
if (!cs && (flags & MY_WME))
{
@@ -615,29 +722,58 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags)
return cs;
}
-CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
+
+/**
+ Find collation by name: extended version of get_charset_by_name()
+ to return error messages to the caller.
+ @param loader Character set loader
+ @param name Collation name
+ @param flags Flags
+ @return NULL on error, pointer to collation on success
+*/
+
+CHARSET_INFO *
+my_collation_get_by_name(MY_CHARSET_LOADER *loader,
+ const char *name, myf flags)
{
uint cs_number;
CHARSET_INFO *cs;
my_pthread_once(&charsets_initialized, init_available_charsets);
- cs_number=get_collation_number(cs_name);
- cs= cs_number ? get_internal_charset(cs_number,flags) : NULL;
+ cs_number= get_collation_number(name);
+ my_charset_loader_init_mysys(loader);
+ cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
if (!cs && (flags & MY_WME))
{
char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
- my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
+ my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), name, index_file);
}
-
return cs;
}
-CHARSET_INFO *get_charset_by_csname(const char *cs_name,
- uint cs_flags,
- myf flags)
+CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
+{
+ MY_CHARSET_LOADER loader;
+ my_charset_loader_init_mysys(&loader);
+ return my_collation_get_by_name(&loader, cs_name, flags);
+}
+
+
+/**
+ Find character set by name: extended version of get_charset_by_csname()
+ to return error messages to the caller.
+ @param loader Character set loader
+ @param name Collation name
+ @param cs_flags Character set flags (e.g. default or binary collation)
+ @param flags Flags
+ @return NULL on error, pointer to collation on success
+*/
+CHARSET_INFO *
+my_charset_get_by_name(MY_CHARSET_LOADER *loader,
+ const char *cs_name, uint cs_flags, myf flags)
{
uint cs_number;
CHARSET_INFO *cs;
@@ -647,7 +783,7 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name,
my_pthread_once(&charsets_initialized, init_available_charsets);
cs_number= get_charset_number(cs_name, cs_flags);
- cs= cs_number ? get_internal_charset(cs_number, flags) : NULL;
+ cs= cs_number ? get_internal_charset(loader, cs_number, flags) : NULL;
if (!cs && (flags & MY_WME))
{
@@ -660,6 +796,15 @@ CHARSET_INFO *get_charset_by_csname(const char *cs_name,
}
+CHARSET_INFO *
+get_charset_by_csname(const char *cs_name, uint cs_flags, myf flags)
+{
+ MY_CHARSET_LOADER loader;
+ my_charset_loader_init_mysys(&loader);
+ return my_charset_get_by_name(&loader, cs_name, cs_flags, flags);
+}
+
+
/**
Resolve character set by the character set name (utf8, latin1, ...).
@@ -857,8 +1002,11 @@ CHARSET_INFO *fs_character_set()
As we're now interested in cp932 only,
let's just detect it using strcmp().
*/
- fs_cset_cache= !strcmp(buf, "cp932") ?
- &my_charset_cp932_japanese_ci : &my_charset_bin;
+ fs_cset_cache=
+ #ifdef HAVE_CHARSET_cp932
+ !strcmp(buf, "cp932") ? &my_charset_cp932_japanese_ci :
+ #endif
+ &my_charset_bin;
}
return fs_cset_cache;
}