diff options
author | Alexander Barkov <bar@mnogosearch.org> | 2013-10-02 15:04:07 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mnogosearch.org> | 2013-10-02 15:04:07 +0400 |
commit | 0b6c4bb34f99b8f4023fd0bef25a1b714f96b699 (patch) | |
tree | 87e5f83097f30c9fb7e30928800bcc92690f6bbd /include | |
parent | 9538bbfce9055f99529adb461d101b7b236eb5a3 (diff) | |
download | mariadb-git-0b6c4bb34f99b8f4023fd0bef25a1b714f96b699.tar.gz |
MDEV-4928 Merge collation customization improvements
Merging the following MySQL-5.6 changes:
- WL#5624: Collation customization improvements
http://dev.mysql.com/worklog/task/?id=5624
- WL#4013: Unicode german2 collation
http://dev.mysql.com/worklog/task/?id=4013
- Bug#62429 XML: ExtractValue, UpdateXML max arg length 127 chars
http://bugs.mysql.com/bug.php?id=62429
(required by WL#5624)
Diffstat (limited to 'include')
-rw-r--r-- | include/m_ctype.h | 112 | ||||
-rw-r--r-- | include/m_string.h | 6 | ||||
-rw-r--r-- | include/my_sys.h | 13 | ||||
-rw-r--r-- | include/my_xml.h | 11 |
4 files changed, 114 insertions, 28 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index e3fb2dbc66e..4e05d65ca5f 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -23,6 +23,12 @@ #include <my_attribute.h> #include "my_global.h" /* uint16, uchar */ +enum loglevel { + ERROR_LEVEL= 0, + WARNING_LEVEL= 1, + INFORMATION_LEVEL= 2 +}; + #ifdef __cplusplus extern "C" { #endif @@ -61,24 +67,35 @@ typedef const struct unicase_info_st MY_UNICASE_INFO; typedef const struct uni_ctype_st MY_UNI_CTYPE; typedef const struct my_uni_idx_st MY_UNI_IDX; -struct unicase_info_st +typedef struct unicase_info_char_st { uint32 toupper; uint32 tolower; uint32 sort; +} MY_UNICASE_CHARACTER; + + +struct unicase_info_st +{ + my_wc_t maxchar; + MY_UNICASE_CHARACTER **page; }; -extern MY_UNICASE_INFO *const my_unicase_default[256]; -extern MY_UNICASE_INFO *const my_unicase_turkish[256]; -extern MY_UNICASE_INFO *const my_unicase_mysql500[256]; -#define MY_UCA_MAX_CONTRACTION 4 +extern MY_UNICASE_INFO my_unicase_default; +extern MY_UNICASE_INFO my_unicase_turkish; +extern MY_UNICASE_INFO my_unicase_mysql500; +extern MY_UNICASE_INFO my_unicase_unicode520; + +#define MY_UCA_MAX_CONTRACTION 6 #define MY_UCA_MAX_WEIGHT_SIZE 8 +#define MY_UCA_WEIGHT_LEVELS 1 typedef struct my_contraction_t { my_wc_t ch[MY_UCA_MAX_CONTRACTION]; /* Character sequence */ uint16 weight[MY_UCA_MAX_WEIGHT_SIZE];/* Its weight string, 0-terminated */ + my_bool with_context; } MY_CONTRACTION; @@ -89,6 +106,46 @@ typedef struct my_contraction_list_t char *flags; /* Character flags, e.g. "is contraction head") */ } MY_CONTRACTIONS; +my_bool my_uca_can_be_contraction_head(const MY_CONTRACTIONS *c, my_wc_t wc); +my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc); +uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c, + my_wc_t wc1, my_wc_t wc2); + + +/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */ +typedef struct my_uca_level_info_st +{ + my_wc_t maxchar; + uchar *lengths; + uint16 **weights; + MY_CONTRACTIONS contractions; +} MY_UCA_WEIGHT_LEVEL; + + +typedef struct uca_info_st +{ + MY_UCA_WEIGHT_LEVEL level[MY_UCA_WEIGHT_LEVELS]; + + /* Logical positions */ + my_wc_t first_non_ignorable; + my_wc_t last_non_ignorable; + my_wc_t first_primary_ignorable; + my_wc_t last_primary_ignorable; + my_wc_t first_secondary_ignorable; + my_wc_t last_secondary_ignorable; + my_wc_t first_tertiary_ignorable; + my_wc_t last_tertiary_ignorable; + my_wc_t first_trailing; + my_wc_t last_trailing; + my_wc_t first_variable; + my_wc_t last_variable; + +} MY_UCA_INFO; + + + +extern MY_UCA_INFO my_uca_v400; + struct uni_ctype_st { @@ -122,7 +179,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_CS_BINSORT 16 /* if binary sort order */ #define MY_CS_PRIMARY 32 /* if primary collation */ #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */ -#define MY_CS_UNICODE 128 /* is a charset is full unicode */ +#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */ #define MY_CS_READY 256 /* if a charset is initialized */ #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ #define MY_CS_CSSORT 1024 /* if case sensitive sort order */ @@ -130,6 +187,7 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; #define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */ #define MY_CS_NONASCII 8192 /* if not ASCII-compatible */ #define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */ +#define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */ #define MY_CHARSET_UNDEFINED 0 /* Character repertoire flags */ @@ -202,13 +260,24 @@ enum my_lex_states struct charset_info_st; +typedef struct my_charset_loader_st +{ + char error[128]; + void *(*once_alloc)(size_t); + void *(*malloc)(size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); + void (*reporter)(enum loglevel, const char *format, ...); + int (*add_collation)(struct charset_info_st *cs); +} MY_CHARSET_LOADER; + extern int (*my_string_stack_guard)(int); /* See strings/CHARSET_INFO.txt for information about this structure */ struct my_collation_handler_st { - my_bool (*init)(struct charset_info_st *, void *(*alloc)(size_t)); + my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *); /* Collation routines */ int (*strnncoll)(CHARSET_INFO *, const uchar *, size_t, const uchar *, size_t, my_bool); @@ -259,7 +328,7 @@ typedef size_t (*my_charset_conv_case)(CHARSET_INFO *, /* See strings/CHARSET_INFO.txt about information on this structure */ struct my_charset_handler_st { - my_bool (*init)(struct charset_info_st *, void *(*alloc)(size_t)); + my_bool (*init)(struct charset_info_st *, MY_CHARSET_LOADER *loader); /* Multibyte routines */ uint (*ismbchar)(CHARSET_INFO *, const char *, const char *); uint (*mbcharlen)(CHARSET_INFO *, uint c); @@ -322,6 +391,13 @@ struct my_charset_handler_st extern MY_CHARSET_HANDLER my_charset_8bit_handler; extern MY_CHARSET_HANDLER my_charset_ucs2_handler; + +/* + We define this CHARSET_INFO_DEFINED here to prevent a repeat of the + typedef in hash.c, which will cause a compiler error. +*/ +#define CHARSET_INFO_DEFINED + /* See strings/CHARSET_INFO.txt about information on this structure */ struct charset_info_st { @@ -337,11 +413,10 @@ struct charset_info_st const uchar *to_lower; const uchar *to_upper; const uchar *sort_order; - const MY_CONTRACTIONS *contractions; - const uint16 *const *sort_order_big; + MY_UCA_INFO *uca; const uint16 *tab_to_uni; - MY_UNI_IDX *tab_from_uni; - MY_UNICASE_INFO *const *caseinfo; + MY_UNI_IDX *tab_from_uni; + MY_UNICASE_INFO *caseinfo; const uchar *state_map; const uchar *ident_map; uint strxfrm_multiply; @@ -349,8 +424,8 @@ struct charset_info_st uchar casedn_multiply; uint mbminlen; uint mbmaxlen; - uint16 min_sort_char; - uint16 max_sort_char; /* For LIKE optimization */ + my_wc_t min_sort_char; + my_wc_t max_sort_char; /* For LIKE optimization */ uchar pad_char; my_bool escape_with_backslash_is_dangerous; @@ -600,10 +675,10 @@ int my_wildcmp_unicode(CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, - MY_UNICASE_INFO *const *weights); + MY_UNICASE_INFO *weights); -extern my_bool my_parse_charset_xml(const char *bug, size_t len, - int (*add)(struct charset_info_st *cs)); +extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader, + const char *buf, size_t buflen); extern char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end, pchar c); extern size_t my_strcspn(CHARSET_INFO *cs, const char *str, const char *end, @@ -620,6 +695,9 @@ uint my_charset_repertoire(CHARSET_INFO *cs); my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs); +const MY_CONTRACTIONS *my_charset_get_contractions(const CHARSET_INFO *cs, + int level); + extern size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n, const char* fmt, va_list ap); diff --git a/include/m_string.h b/include/m_string.h index 95b28d6d69a..395fd2ddda6 100644 --- a/include/m_string.h +++ b/include/m_string.h @@ -73,10 +73,12 @@ extern "C" { #endif /* - my_str_malloc() and my_str_free() are assigned to implementations in - strings/alloc.c, but can be overridden in the calling program. + my_str_malloc(), my_str_realloc() and my_str_free() are assigned to + implementations in strings/alloc.c, but can be overridden in + the calling program. */ extern void *(*my_str_malloc)(size_t); +extern void *(*my_str_realloc)(void *, size_t); extern void (*my_str_free)(void *); #if defined(HAVE_STPCPY) && MY_GNUC_PREREQ(3, 4) && !defined(__INTEL_COMPILER) diff --git a/include/my_sys.h b/include/my_sys.h index 3065de17892..e3eb52d6018 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -271,12 +271,6 @@ extern char wild_many,wild_one,wild_prefix; extern const char *charsets_dir; extern my_bool timed_mutexes; -enum loglevel { - ERROR_LEVEL, - WARNING_LEVEL, - INFORMATION_LEVEL -}; - enum cache_type { TYPE_NOT_SET= 0, READ_CACHE, WRITE_CACHE, @@ -947,15 +941,20 @@ void my_uuid2str(const uchar *guid, char *s); void my_uuid_end(); /* character sets */ +extern void my_charset_loader_init_mysys(MY_CHARSET_LOADER *loader); extern uint get_charset_number(const char *cs_name, uint cs_flags); extern uint get_collation_number(const char *name); extern const char *get_charset_name(uint cs_number); extern CHARSET_INFO *get_charset(uint cs_number, myf flags); extern CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags); +extern CHARSET_INFO *my_collation_get_by_name(MY_CHARSET_LOADER *loader, + const char *name, myf flags); extern CHARSET_INFO *get_charset_by_csname(const char *cs_name, uint cs_flags, myf my_flags); - +extern CHARSET_INFO *my_charset_get_by_name(MY_CHARSET_LOADER *loader, + const char *name, + uint cs_flags, myf my_flags); extern my_bool resolve_charset(const char *cs_name, CHARSET_INFO *default_cs, CHARSET_INFO **cs); diff --git a/include/my_xml.h b/include/my_xml.h index aee301167ff..cd7665df9da 100644 --- a/include/my_xml.h +++ b/include/my_xml.h @@ -52,8 +52,15 @@ typedef struct xml_stack_st int flags; enum my_xml_node_type current_node_type; char errstr[128]; - char attr[128]; - char *attrend; + + struct { + char static_buffer[128]; + char *buffer; + size_t buffer_size; + char *start; + char *end; + } attr; + const char *beg; const char *cur; const char *end; |