diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/m_ctype.h | 144 | ||||
-rw-r--r-- | include/my_rnd.h | 1 | ||||
-rw-r--r-- | include/my_sys.h | 133 | ||||
-rw-r--r-- | include/sslopt-longopts.h | 2 | ||||
-rw-r--r-- | include/sslopt-vars.h | 2 |
5 files changed, 275 insertions, 7 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h index 83b12c3c4f6..811b3b71a17 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -34,7 +34,9 @@ enum loglevel { extern "C" { #endif -#define MY_CS_NAME_SIZE 32 +#define MY_CS_CHARACTER_SET_NAME_SIZE 32 +#define MY_CS_COLLATION_NAME_SIZE 64 + #define MY_CS_CTYPE_TABLE_SIZE 257 #define MY_CS_TO_LOWER_TABLE_SIZE 256 #define MY_CS_TO_UPPER_TABLE_SIZE 256 @@ -116,7 +118,7 @@ extern MY_UNICASE_INFO my_unicase_unicode520; */ #define MY_UCA_MAX_WEIGHT_SIZE (8+1) /* Including 0 terminator */ #define MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE (2*8+1) /* Including 0 terminator */ -#define MY_UCA_WEIGHT_LEVELS 2 +#define MY_UCA_WEIGHT_LEVELS 3 typedef struct my_contraction_t { @@ -139,6 +141,65 @@ const uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c, my_wc_t wc1, my_wc_t wc2); +typedef struct my_uca_weight2_t +{ + uint16 weight[2]; +} MY_UCA_WEIGHT2; + + +/* + In DUCET as of Unicode-14.0.0: + - All characters in the range U+0000..U+007F (i.e. using one byte in utf8) + have not more than two weights on all weight levels. + - All characters in the range U+0080..U+07FF (i.e. using two bytes in utf8) + have not more than four weights on all weight levels. + Therefore the limit of 4 weights should cover all byte pairs + (i.e. two ASCII characters or one 2-byte character) + that are a subject for the "process 2 bytes at a time" optimization. + If some collation reorders any character from the mentioned ranges + in the way that it produces more weights, such character will not + be optimized, but will be correctly processed the slower mb_wc-based + method (1 character at a time). +*/ +#define MY_UCA_2BYTES_MAX_WEIGHT_SIZE (4+1) /* Including 0 terminator */ + +typedef struct my_uca_2bytes_item_t +{ + uint16 weight[MY_UCA_2BYTES_MAX_WEIGHT_SIZE]; +} MY_UCA_2BYTES_ITEM; + + +typedef struct my_uca_level_booster_t +{ + /* + A helper array to process 2 bytes at a time during string comparison. + It maps all 2-bytes sequences that make: + - two ASCII characters or + - one 2-byte character + to their weights. The weight length is limited to + MY_UCA_2BYTES_MAX_WEIGHT_SIZE-1 weights. + This array is used in the main loop optimization. + */ + MY_UCA_2BYTES_ITEM weight_strings_2bytes[0x10000]; + /* + A helper array to process 2bytes at a time during string comparison, + with an even more efficient way than the above one. + The weight size is limited to 2 weights, so it's used for the cases + when 2 input bytes produce 1 or 2 weights. + This limit makes the code using this array even simpler and faster. + This array is used for prefix optimization. + */ + MY_UCA_WEIGHT2 weight_strings_2bytes_to_1_or_2_weights[0x10000]; +} MY_UCA_LEVEL_BOOSTER; + + +typedef struct my_uca_contraction_hash_t +{ + size_t nitems_alloced; + MY_CONTRACTION *item; +} MY_UCA_CONTRACTION_HASH; + + /* Collation weights on a single level (e.g. primary, secondary, tertiarty) */ typedef struct my_uca_level_info_st { @@ -147,6 +208,8 @@ typedef struct my_uca_level_info_st uint16 **weights; MY_CONTRACTIONS contractions; uint levelno; + MY_UCA_CONTRACTION_HASH contraction_hash; + MY_UCA_LEVEL_BOOSTER *booster; } MY_UCA_WEIGHT_LEVEL; @@ -168,6 +231,9 @@ typedef struct uca_info_st my_wc_t first_variable; my_wc_t last_variable; + /* Unicode version */ + uint version; + } MY_UCA_INFO; @@ -237,6 +303,46 @@ typedef enum enum_repertoire_t } my_repertoire_t; +/* ID compatibility */ +typedef enum enum_collation_id_type +{ + MY_COLLATION_ID_TYPE_PRECISE= 0, + MY_COLLATION_ID_TYPE_COMPAT_100800= 1 +} my_collation_id_type_t; + + +/* Collation name display modes */ +typedef enum enum_collation_name_mode +{ + MY_COLLATION_NAME_MODE_FULL= 0, + MY_COLLATION_NAME_MODE_CONTEXT= 1 +} my_collation_name_mode_t; + + +/* Level flags */ +#define MY_CS_LEVEL_BIT_PRIMARY 0x00 +#define MY_CS_LEVEL_BIT_SECONDARY 0x01 +#define MY_CS_LEVEL_BIT_TERTIARY 0x02 +#define MY_CS_LEVEL_BIT_QUATERNARY 0x03 + +#define MY_CS_COLL_LEVELS_S1 (1<<MY_CS_LEVEL_BIT_PRIMARY) + +#define MY_CS_COLL_LEVELS_AI_CS (1<<MY_CS_LEVEL_BIT_PRIMARY)| \ + (1<<MY_CS_LEVEL_BIT_TERTIARY) + +#define MY_CS_COLL_LEVELS_S2 (1<<MY_CS_LEVEL_BIT_PRIMARY)| \ + (1<<MY_CS_LEVEL_BIT_SECONDARY) + +#define MY_CS_COLL_LEVELS_S3 (1<<MY_CS_LEVEL_BIT_PRIMARY)| \ + (1<<MY_CS_LEVEL_BIT_SECONDARY) | \ + (1<<MY_CS_LEVEL_BIT_TERTIARY) + +#define MY_CS_COLL_LEVELS_S4 (1<<MY_CS_LEVEL_BIT_PRIMARY)| \ + (1<<MY_CS_LEVEL_BIT_SECONDARY) | \ + (1<<MY_CS_LEVEL_BIT_TERTIARY) | \ + (1<<MY_CS_LEVEL_BIT_QUATERNARY) + + /* Flags for strxfrm */ #define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */ #define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */ @@ -437,8 +543,13 @@ struct my_collation_handler_st */ size_t (*min_str)(CHARSET_INFO *cs, uchar *dst, size_t dstlen, size_t nchars); size_t (*max_str)(CHARSET_INFO *cs, uchar *dst, size_t dstlen, size_t nchars); + + uint (*get_id)(CHARSET_INFO *cs, my_collation_id_type_t type); + LEX_CSTRING (*get_collation_name)(CHARSET_INFO *cs, + my_collation_name_mode_t mode); }; + extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler; extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; extern MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler; @@ -840,6 +951,21 @@ struct charset_info_st } /* Collation routines */ + uint default_flag() const + { + return state & MY_CS_PRIMARY; + } + + uint binsort_flag() const + { + return state & MY_CS_BINSORT; + } + + uint compiled_flag() const + { + return state & MY_CS_COMPILED; + } + int strnncoll(const uchar *a, size_t alen, const uchar *b, size_t blen, my_bool b_is_prefix= FALSE) const { @@ -937,6 +1063,15 @@ struct charset_info_st return (coll->max_str)(this, dst, dstlen, nchars); } + uint get_id(my_collation_id_type_t type) const + { + return (coll->get_id)(this, type); + } + + LEX_CSTRING get_collation_name(my_collation_name_mode_t mode) const + { + return (coll->get_collation_name)(this, mode); + } #endif /* __cplusplus */ }; @@ -1517,6 +1652,9 @@ extern size_t my_strcspn(CHARSET_INFO *cs, const char *str, const char *end, my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len); my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len); +uint my_ci_get_id_generic(CHARSET_INFO *cs, my_collation_id_type_t type); +LEX_CSTRING my_ci_get_collation_name_generic(CHARSET_INFO *cs, + my_collation_name_mode_t mode); typedef struct { @@ -1531,7 +1669,7 @@ my_repertoire_t my_string_repertoire(CHARSET_INFO *cs, my_bool my_charset_is_ascii_based(CHARSET_INFO *cs); my_repertoire_t my_charset_repertoire(CHARSET_INFO *cs); -uint my_strxfrm_flag_normalize(uint flags, uint nlevels); +uint my_strxfrm_flag_normalize(CHARSET_INFO *cs, uint flags); void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend, uint flags, uint level); size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs, diff --git a/include/my_rnd.h b/include/my_rnd.h index a3e3788085d..dc8efbd276e 100644 --- a/include/my_rnd.h +++ b/include/my_rnd.h @@ -25,7 +25,6 @@ struct my_rnd_struct { void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2); double my_rnd(struct my_rnd_struct *rand_st); -double my_rnd_ssl(struct my_rnd_struct *rand_st); C_MODE_END diff --git a/include/my_sys.h b/include/my_sys.h index 00a901a313b..a292200d2c4 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -243,7 +243,7 @@ extern void (*proc_info_hook)(void *, const PSI_stage_info *, PSI_stage_info *, const char *, const char *, const unsigned int); /* charsets */ -#define MY_ALL_CHARSETS_SIZE 2048 +#define MY_ALL_CHARSETS_SIZE 4096 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *default_charset_info; extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE]; extern struct charset_info_st compiled_charsets[]; @@ -1123,4 +1123,135 @@ void my_init_mysys_psi_keys(void); struct st_mysql_file; extern struct st_mysql_file *mysql_stdin; C_MODE_END + + +#ifdef __cplusplus + +class Charset_loader_mysys: public MY_CHARSET_LOADER +{ +public: + Charset_loader_mysys() + { + my_charset_loader_init_mysys(this); + } + + /** + Get a CHARSET_INFO by a character set name. + + @param name Collation name + @param cs_flags e.g. MY_CS_PRIMARY, MY_CS_BINARY + @param my_flags mysys flags (MY_WME, MY_UTF8_IS_UTF8MB3) + @return + @retval NULL on error (e.g. not found) + @retval A CHARSET_INFO pointter on success + */ + CHARSET_INFO *get_charset(const char *cs_name, uint cs_flags, myf my_flags) + { + error[0]= '\0'; // Need to clear in case of the second call + return my_charset_get_by_name(this, cs_name, cs_flags, my_flags); + } + + /** + Get a CHARSET_INFO by an exact collation by name. + + @param name Collation name + @param my_flags e.g. the utf8 translation flag + @return + @retval NULL on error (e.g. not found) + @retval A CHARSET_INFO pointter on success + */ + CHARSET_INFO *get_exact_collation(const char *name, myf my_flags) + { + error[0]= '\0'; // Need to clear in case of the second call + return my_collation_get_by_name(this, name, my_flags); + } + + /** + Get a CHARSET_INFO by a context collation by name. + The returned pointer must be further resolved to a character set. + + @param name Collation name + @param utf8_flag The utf8 translation flag + @return + @retval NULL on error (e.g. not found) + @retval A CHARSET_INFO pointter on success + */ + CHARSET_INFO *get_context_collation(const char *name, myf my_flags) + { + return get_exact_collation_by_context_name(&my_charset_utf8mb4_general_ci, + name, my_flags); + } + + /** + Get an exact CHARSET_INFO by a contextually typed collation name. + + @param name Collation name + @param utf8_flag The utf8 translation flag + @return + @retval NULL on error (e.g. not found) + @retval A CHARSET_INFO pointer on success + */ + CHARSET_INFO *get_exact_collation_by_context_name(CHARSET_INFO *cs, + const char *name, + myf my_flags) + { + char tmp[MY_CS_COLLATION_NAME_SIZE]; + my_snprintf(tmp, sizeof(tmp), "%s_%s", cs->cs_name.str, name); + return get_exact_collation(tmp, my_flags); + } + + /* + Find a collation with binary comparison rules + */ + CHARSET_INFO *get_bin_collation(CHARSET_INFO *cs, myf my_flags) + { + /* + We don't need to handle old_mode=UTF8_IS_UTF8MB3 here, + This method assumes that "cs" points to a real character set name. + It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8". + No thd->get_utf8_flag() flag passed to get_charset_by_csname(). + */ + DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4)); + /* + CREATE TABLE t1 (a CHAR(10) BINARY) + CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + Nothing to do, we have the binary collation already. + */ + if (cs->state & MY_CS_BINSORT) + return cs; + + // CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;/ + error[0]= '\0'; // Need in case of the second execution + return get_charset(cs->cs_name.str, MY_CS_BINSORT, my_flags); + } + + /* + Find the default collation in the given character set + */ + CHARSET_INFO *get_default_collation(CHARSET_INFO *cs, myf my_flags) + { + // See comments in find_bin_collation_or_error() + DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4)); + /* + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4; + Nothing to do, we have the default collation already. + */ + if (cs->state & MY_CS_PRIMARY) + return cs; + /* + CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) + CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + + Don't need to handle old_mode=UTF8_IS_UTF8MB3 here. + See comments in find_bin_collation_or_error. + */ + cs= get_charset(cs->cs_name.str, MY_CS_PRIMARY, my_flags); + DBUG_ASSERT(cs); + return cs; + } +}; + +#endif /*__cplusplus */ + + #endif /* _my_sys_h */ diff --git a/include/sslopt-longopts.h b/include/sslopt-longopts.h index d0278a1645d..b6983b2e718 100644 --- a/include/sslopt-longopts.h +++ b/include/sslopt-longopts.h @@ -21,7 +21,7 @@ {"ssl", OPT_SSL_SSL, "Enable SSL for connection (automatically enabled with other flags).", - &opt_use_ssl, &opt_use_ssl, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + &opt_use_ssl, &opt_use_ssl, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, {"ssl-ca", OPT_SSL_CA, "CA file in PEM format (check OpenSSL docs, implies --ssl).", &opt_ssl_ca, &opt_ssl_ca, 0, GET_STR, REQUIRED_ARG, diff --git a/include/sslopt-vars.h b/include/sslopt-vars.h index e28f19b919d..d263e5dbd90 100644 --- a/include/sslopt-vars.h +++ b/include/sslopt-vars.h @@ -22,7 +22,7 @@ #else #define SSL_STATIC static #endif -SSL_STATIC my_bool opt_use_ssl = 0; +SSL_STATIC my_bool opt_use_ssl = 1; SSL_STATIC char *opt_ssl_ca = 0; SSL_STATIC char *opt_ssl_capath = 0; SSL_STATIC char *opt_ssl_cert = 0; |