summaryrefslogtreecommitdiff
path: root/mysys/charset.c
diff options
context:
space:
mode:
Diffstat (limited to 'mysys/charset.c')
-rw-r--r--mysys/charset.c196
1 files changed, 61 insertions, 135 deletions
diff --git a/mysys/charset.c b/mysys/charset.c
index d801fcdbd76..1388fc40c6d 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -21,16 +21,6 @@
#include <my_dir.h>
#include <my_xml.h>
-typedef struct
-{
- int nchars;
- MY_UNI_IDX uidx;
-} uni_idx;
-
-#define PLANE_SIZE 0x100
-#define PLANE_NUM 0x100
-#define PLANE_NUMBER(x) (((x)>>8) % PLANE_NUM)
-
/*
The code below implements this functionality:
@@ -48,32 +38,21 @@ my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
}
-static void set_max_sort_char(CHARSET_INFO *cs)
-{
- uchar max_char;
- uint i;
-
- if (!cs->sort_order)
- return;
-
- max_char=cs->sort_order[(uchar) cs->max_sort_char];
- for (i= 0; i < 256; i++)
- {
- if ((uchar) cs->sort_order[i] > max_char)
- {
- max_char=(uchar) cs->sort_order[i];
- cs->max_sort_char= i;
- }
- }
-}
-
-
-static void init_state_maps(CHARSET_INFO *cs)
+static my_bool init_state_maps(CHARSET_INFO *cs)
{
uint i;
- uchar *state_map= cs->state_map;
- uchar *ident_map= cs->ident_map;
+ uchar *state_map;
+ uchar *ident_map;
+ if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
+ return 1;
+
+ if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
+ return 1;
+
+ state_map= cs->state_map;
+ ident_map= cs->ident_map;
+
/* Fill state_map with states to get a faster parser */
for (i=0; i < 256 ; i++)
{
@@ -120,6 +99,7 @@ static void init_state_maps(CHARSET_INFO *cs)
state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
state_map[(uchar)'b']= state_map[(uchar)'b']= (uchar) MY_LEX_IDENT_OR_BIN;
state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
+ return 0;
}
@@ -131,98 +111,11 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
cs->coll= &my_collation_8bit_simple_ci_handler;
cs->cset= &my_charset_8bit_handler;
- cs->mbminlen= 1;
- cs->mbmaxlen= 1;
}
-static int pcmp(const void * f, const void * s)
-{
- const uni_idx *F= (const uni_idx*) f;
- const uni_idx *S= (const uni_idx*) s;
- int res;
-
- if (!(res=((S->nchars)-(F->nchars))))
- res=((F->uidx.from)-(S->uidx.to));
- return res;
-}
-
-static my_bool create_fromuni(CHARSET_INFO *cs)
-{
- uni_idx idx[PLANE_NUM];
- int i,n;
-
- /* Clear plane statistics */
- bzero(idx,sizeof(idx));
-
- /* Count number of characters in each plane */
- for (i=0; i< 0x100; i++)
- {
- uint16 wc=cs->tab_to_uni[i];
- int pl= PLANE_NUMBER(wc);
-
- if (wc || !i)
- {
- if (!idx[pl].nchars)
- {
- idx[pl].uidx.from=wc;
- idx[pl].uidx.to=wc;
- }else
- {
- idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
- idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
- }
- idx[pl].nchars++;
- }
- }
-
- /* Sort planes in descending order */
- qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
-
- for (i=0; i < PLANE_NUM; i++)
- {
- int ch,numchars;
-
- /* Skip empty plane */
- if (!idx[i].nchars)
- break;
-
- numchars=idx[i].uidx.to-idx[i].uidx.from+1;
- if (!(idx[i].uidx.tab=(uchar*) my_once_alloc(numchars *
- sizeof(*idx[i].uidx.tab),
- MYF(MY_WME))))
- return TRUE;
-
- bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
-
- for (ch=1; ch < PLANE_SIZE; ch++)
- {
- uint16 wc=cs->tab_to_uni[ch];
- if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
- {
- int ofs= wc - idx[i].uidx.from;
- idx[i].uidx.tab[ofs]= ch;
- }
- }
- }
-
- /* Allocate and fill reverse table for each plane */
- n=i;
- if (!(cs->tab_from_uni= (MY_UNI_IDX*) my_once_alloc(sizeof(MY_UNI_IDX)*(n+1),
- MYF(MY_WME))))
- return TRUE;
-
- for (i=0; i< n; i++)
- cs->tab_from_uni[i]= idx[i].uidx;
-
- /* Set end-of-list marker */
- bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
- return FALSE;
-}
-
-
-static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
+static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
{
to->number= from->number ? from->number : to->number;
@@ -244,7 +137,8 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
MY_CS_CTYPE_TABLE_SIZE,
MYF(MY_WME))))
goto err;
- init_state_maps(to);
+ if (init_state_maps(to))
+ goto err;
}
if (from->to_lower)
if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
@@ -263,7 +157,7 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
MY_CS_SORT_ORDER_TABLE_SIZE,
MYF(MY_WME))))
goto err;
- set_max_sort_char(to);
+
}
if (from->tab_to_uni)
{
@@ -271,11 +165,10 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
if (!(to->tab_to_uni= (uint16*) my_once_memdup((char*)from->tab_to_uni,
sz, MYF(MY_WME))))
goto err;
- if (create_fromuni(to))
- goto err;
}
- to->mbminlen= 1;
- to->mbmaxlen= 1;
+ if (from->tailoring)
+ if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
+ goto err;
return 0;
@@ -284,6 +177,7 @@ err:
}
+
static my_bool simple_cs_is_full(CHARSET_INFO *cs)
{
return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
@@ -315,14 +209,34 @@ static int add_collation(CHARSET_INFO *cs)
if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
{
- simple_cs_init_functions(all_charsets[cs->number]);
- if (simple_cs_copy_data(all_charsets[cs->number],cs))
- return MY_XML_ERROR;
- if (simple_cs_is_full(all_charsets[cs->number]))
+ CHARSET_INFO *new= all_charsets[cs->number];
+ if (cs_copy_data(all_charsets[cs->number],cs))
+ return MY_XML_ERROR;
+
+ if (!strcmp(cs->csname,"ucs2") )
+ {
+#ifdef HAVE_CHARSET_ucs2
+ new->cset= my_charset_ucs2_general_uca.cset;
+ new->coll= my_charset_ucs2_general_uca.coll;
+ new->strxfrm_multiply= my_charset_ucs2_general_uca.strxfrm_multiply;
+ new->min_sort_char= my_charset_ucs2_general_uca.min_sort_char;
+ new->max_sort_char= my_charset_ucs2_general_uca.max_sort_char;
+ new->mbminlen= 2;
+ new->mbmaxlen= 2;
+ new->state |= MY_CS_AVAILABLE | MY_CS_LOADED;
+#endif
+ }
+ else
{
- all_charsets[cs->number]->state |= MY_CS_LOADED;
+ simple_cs_init_functions(all_charsets[cs->number]);
+ new->mbminlen= 1;
+ new->mbmaxlen= 1;
+ if (simple_cs_is_full(all_charsets[cs->number]))
+ {
+ all_charsets[cs->number]->state |= MY_CS_LOADED;
+ }
+ all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
}
- all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
}
else
{
@@ -432,6 +346,10 @@ void add_compiled_collation(CHARSET_INFO *cs)
cs->state|= MY_CS_AVAILABLE;
}
+static void *cs_alloc(uint size)
+{
+ return my_once_alloc(size, MYF(MY_WME));
+}
#ifdef __NETWARE__
@@ -465,9 +383,9 @@ static my_bool init_available_charsets(myf myflags)
{
if (*cs)
{
- set_max_sort_char(*cs);
if (cs[0]->ctype)
- init_state_maps(*cs);
+ if (init_state_maps(*cs))
+ *cs= NULL;
}
}
@@ -551,6 +469,14 @@ static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
}
cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
}
+ if (cs && !(cs->state & MY_CS_READY))
+ {
+ if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
+ (cs->coll->init && cs->coll->init(cs, cs_alloc)))
+ cs= NULL;
+ else
+ cs->state|= MY_CS_READY;
+ }
pthread_mutex_unlock(&THR_LOCK_charset);
return cs;
}