summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/m_ctype.h144
-rw-r--r--include/my_rnd.h1
-rw-r--r--include/my_sys.h133
-rw-r--r--include/sslopt-longopts.h2
-rw-r--r--include/sslopt-vars.h2
5 files changed, 275 insertions, 7 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 83b12c3c4f6..811b3b71a17 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -34,7 +34,9 @@ enum loglevel {
extern "C" {
#endif
-#define MY_CS_NAME_SIZE 32
+#define MY_CS_CHARACTER_SET_NAME_SIZE 32
+#define MY_CS_COLLATION_NAME_SIZE 64
+
#define MY_CS_CTYPE_TABLE_SIZE 257
#define MY_CS_TO_LOWER_TABLE_SIZE 256
#define MY_CS_TO_UPPER_TABLE_SIZE 256
@@ -116,7 +118,7 @@ extern MY_UNICASE_INFO my_unicase_unicode520;
*/
#define MY_UCA_MAX_WEIGHT_SIZE (8+1) /* Including 0 terminator */
#define MY_UCA_CONTRACTION_MAX_WEIGHT_SIZE (2*8+1) /* Including 0 terminator */
-#define MY_UCA_WEIGHT_LEVELS 2
+#define MY_UCA_WEIGHT_LEVELS 3
typedef struct my_contraction_t
{
@@ -139,6 +141,65 @@ const uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
my_wc_t wc1, my_wc_t wc2);
+typedef struct my_uca_weight2_t
+{
+ uint16 weight[2];
+} MY_UCA_WEIGHT2;
+
+
+/*
+ In DUCET as of Unicode-14.0.0:
+ - All characters in the range U+0000..U+007F (i.e. using one byte in utf8)
+ have not more than two weights on all weight levels.
+ - All characters in the range U+0080..U+07FF (i.e. using two bytes in utf8)
+ have not more than four weights on all weight levels.
+ Therefore the limit of 4 weights should cover all byte pairs
+ (i.e. two ASCII characters or one 2-byte character)
+ that are a subject for the "process 2 bytes at a time" optimization.
+ If some collation reorders any character from the mentioned ranges
+ in the way that it produces more weights, such character will not
+ be optimized, but will be correctly processed the slower mb_wc-based
+ method (1 character at a time).
+*/
+#define MY_UCA_2BYTES_MAX_WEIGHT_SIZE (4+1) /* Including 0 terminator */
+
+typedef struct my_uca_2bytes_item_t
+{
+ uint16 weight[MY_UCA_2BYTES_MAX_WEIGHT_SIZE];
+} MY_UCA_2BYTES_ITEM;
+
+
+typedef struct my_uca_level_booster_t
+{
+ /*
+ A helper array to process 2 bytes at a time during string comparison.
+ It maps all 2-bytes sequences that make:
+ - two ASCII characters or
+ - one 2-byte character
+ to their weights. The weight length is limited to
+ MY_UCA_2BYTES_MAX_WEIGHT_SIZE-1 weights.
+ This array is used in the main loop optimization.
+ */
+ MY_UCA_2BYTES_ITEM weight_strings_2bytes[0x10000];
+ /*
+ A helper array to process 2bytes at a time during string comparison,
+ with an even more efficient way than the above one.
+ The weight size is limited to 2 weights, so it's used for the cases
+ when 2 input bytes produce 1 or 2 weights.
+ This limit makes the code using this array even simpler and faster.
+ This array is used for prefix optimization.
+ */
+ MY_UCA_WEIGHT2 weight_strings_2bytes_to_1_or_2_weights[0x10000];
+} MY_UCA_LEVEL_BOOSTER;
+
+
+typedef struct my_uca_contraction_hash_t
+{
+ size_t nitems_alloced;
+ MY_CONTRACTION *item;
+} MY_UCA_CONTRACTION_HASH;
+
+
/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
typedef struct my_uca_level_info_st
{
@@ -147,6 +208,8 @@ typedef struct my_uca_level_info_st
uint16 **weights;
MY_CONTRACTIONS contractions;
uint levelno;
+ MY_UCA_CONTRACTION_HASH contraction_hash;
+ MY_UCA_LEVEL_BOOSTER *booster;
} MY_UCA_WEIGHT_LEVEL;
@@ -168,6 +231,9 @@ typedef struct uca_info_st
my_wc_t first_variable;
my_wc_t last_variable;
+ /* Unicode version */
+ uint version;
+
} MY_UCA_INFO;
@@ -237,6 +303,46 @@ typedef enum enum_repertoire_t
} my_repertoire_t;
+/* ID compatibility */
+typedef enum enum_collation_id_type
+{
+ MY_COLLATION_ID_TYPE_PRECISE= 0,
+ MY_COLLATION_ID_TYPE_COMPAT_100800= 1
+} my_collation_id_type_t;
+
+
+/* Collation name display modes */
+typedef enum enum_collation_name_mode
+{
+ MY_COLLATION_NAME_MODE_FULL= 0,
+ MY_COLLATION_NAME_MODE_CONTEXT= 1
+} my_collation_name_mode_t;
+
+
+/* Level flags */
+#define MY_CS_LEVEL_BIT_PRIMARY 0x00
+#define MY_CS_LEVEL_BIT_SECONDARY 0x01
+#define MY_CS_LEVEL_BIT_TERTIARY 0x02
+#define MY_CS_LEVEL_BIT_QUATERNARY 0x03
+
+#define MY_CS_COLL_LEVELS_S1 (1<<MY_CS_LEVEL_BIT_PRIMARY)
+
+#define MY_CS_COLL_LEVELS_AI_CS (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
+ (1<<MY_CS_LEVEL_BIT_TERTIARY)
+
+#define MY_CS_COLL_LEVELS_S2 (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
+ (1<<MY_CS_LEVEL_BIT_SECONDARY)
+
+#define MY_CS_COLL_LEVELS_S3 (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
+ (1<<MY_CS_LEVEL_BIT_SECONDARY) | \
+ (1<<MY_CS_LEVEL_BIT_TERTIARY)
+
+#define MY_CS_COLL_LEVELS_S4 (1<<MY_CS_LEVEL_BIT_PRIMARY)| \
+ (1<<MY_CS_LEVEL_BIT_SECONDARY) | \
+ (1<<MY_CS_LEVEL_BIT_TERTIARY) | \
+ (1<<MY_CS_LEVEL_BIT_QUATERNARY)
+
+
/* Flags for strxfrm */
#define MY_STRXFRM_LEVEL1 0x00000001 /* for primary weights */
#define MY_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */
@@ -437,8 +543,13 @@ struct my_collation_handler_st
*/
size_t (*min_str)(CHARSET_INFO *cs, uchar *dst, size_t dstlen, size_t nchars);
size_t (*max_str)(CHARSET_INFO *cs, uchar *dst, size_t dstlen, size_t nchars);
+
+ uint (*get_id)(CHARSET_INFO *cs, my_collation_id_type_t type);
+ LEX_CSTRING (*get_collation_name)(CHARSET_INFO *cs,
+ my_collation_name_mode_t mode);
};
+
extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
extern MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler;
@@ -840,6 +951,21 @@ struct charset_info_st
}
/* Collation routines */
+ uint default_flag() const
+ {
+ return state & MY_CS_PRIMARY;
+ }
+
+ uint binsort_flag() const
+ {
+ return state & MY_CS_BINSORT;
+ }
+
+ uint compiled_flag() const
+ {
+ return state & MY_CS_COMPILED;
+ }
+
int strnncoll(const uchar *a, size_t alen,
const uchar *b, size_t blen, my_bool b_is_prefix= FALSE) const
{
@@ -937,6 +1063,15 @@ struct charset_info_st
return (coll->max_str)(this, dst, dstlen, nchars);
}
+ uint get_id(my_collation_id_type_t type) const
+ {
+ return (coll->get_id)(this, type);
+ }
+
+ LEX_CSTRING get_collation_name(my_collation_name_mode_t mode) const
+ {
+ return (coll->get_collation_name)(this, mode);
+ }
#endif /* __cplusplus */
};
@@ -1517,6 +1652,9 @@ extern size_t my_strcspn(CHARSET_INFO *cs, const char *str, const char *end,
my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, size_t len);
my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
+uint my_ci_get_id_generic(CHARSET_INFO *cs, my_collation_id_type_t type);
+LEX_CSTRING my_ci_get_collation_name_generic(CHARSET_INFO *cs,
+ my_collation_name_mode_t mode);
typedef struct
{
@@ -1531,7 +1669,7 @@ my_repertoire_t my_string_repertoire(CHARSET_INFO *cs,
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
my_repertoire_t my_charset_repertoire(CHARSET_INFO *cs);
-uint my_strxfrm_flag_normalize(uint flags, uint nlevels);
+uint my_strxfrm_flag_normalize(CHARSET_INFO *cs, uint flags);
void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
uint flags, uint level);
size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
diff --git a/include/my_rnd.h b/include/my_rnd.h
index a3e3788085d..dc8efbd276e 100644
--- a/include/my_rnd.h
+++ b/include/my_rnd.h
@@ -25,7 +25,6 @@ struct my_rnd_struct {
void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2);
double my_rnd(struct my_rnd_struct *rand_st);
-double my_rnd_ssl(struct my_rnd_struct *rand_st);
C_MODE_END
diff --git a/include/my_sys.h b/include/my_sys.h
index 00a901a313b..a292200d2c4 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -243,7 +243,7 @@ extern void (*proc_info_hook)(void *, const PSI_stage_info *, PSI_stage_info *,
const char *, const char *, const unsigned int);
/* charsets */
-#define MY_ALL_CHARSETS_SIZE 2048
+#define MY_ALL_CHARSETS_SIZE 4096
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *default_charset_info;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *all_charsets[MY_ALL_CHARSETS_SIZE];
extern struct charset_info_st compiled_charsets[];
@@ -1123,4 +1123,135 @@ void my_init_mysys_psi_keys(void);
struct st_mysql_file;
extern struct st_mysql_file *mysql_stdin;
C_MODE_END
+
+
+#ifdef __cplusplus
+
+class Charset_loader_mysys: public MY_CHARSET_LOADER
+{
+public:
+ Charset_loader_mysys()
+ {
+ my_charset_loader_init_mysys(this);
+ }
+
+ /**
+ Get a CHARSET_INFO by a character set name.
+
+ @param name Collation name
+ @param cs_flags e.g. MY_CS_PRIMARY, MY_CS_BINARY
+ @param my_flags mysys flags (MY_WME, MY_UTF8_IS_UTF8MB3)
+ @return
+ @retval NULL on error (e.g. not found)
+ @retval A CHARSET_INFO pointter on success
+ */
+ CHARSET_INFO *get_charset(const char *cs_name, uint cs_flags, myf my_flags)
+ {
+ error[0]= '\0'; // Need to clear in case of the second call
+ return my_charset_get_by_name(this, cs_name, cs_flags, my_flags);
+ }
+
+ /**
+ Get a CHARSET_INFO by an exact collation by name.
+
+ @param name Collation name
+ @param my_flags e.g. the utf8 translation flag
+ @return
+ @retval NULL on error (e.g. not found)
+ @retval A CHARSET_INFO pointter on success
+ */
+ CHARSET_INFO *get_exact_collation(const char *name, myf my_flags)
+ {
+ error[0]= '\0'; // Need to clear in case of the second call
+ return my_collation_get_by_name(this, name, my_flags);
+ }
+
+ /**
+ Get a CHARSET_INFO by a context collation by name.
+ The returned pointer must be further resolved to a character set.
+
+ @param name Collation name
+ @param utf8_flag The utf8 translation flag
+ @return
+ @retval NULL on error (e.g. not found)
+ @retval A CHARSET_INFO pointter on success
+ */
+ CHARSET_INFO *get_context_collation(const char *name, myf my_flags)
+ {
+ return get_exact_collation_by_context_name(&my_charset_utf8mb4_general_ci,
+ name, my_flags);
+ }
+
+ /**
+ Get an exact CHARSET_INFO by a contextually typed collation name.
+
+ @param name Collation name
+ @param utf8_flag The utf8 translation flag
+ @return
+ @retval NULL on error (e.g. not found)
+ @retval A CHARSET_INFO pointer on success
+ */
+ CHARSET_INFO *get_exact_collation_by_context_name(CHARSET_INFO *cs,
+ const char *name,
+ myf my_flags)
+ {
+ char tmp[MY_CS_COLLATION_NAME_SIZE];
+ my_snprintf(tmp, sizeof(tmp), "%s_%s", cs->cs_name.str, name);
+ return get_exact_collation(tmp, my_flags);
+ }
+
+ /*
+ Find a collation with binary comparison rules
+ */
+ CHARSET_INFO *get_bin_collation(CHARSET_INFO *cs, myf my_flags)
+ {
+ /*
+ We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
+ This method assumes that "cs" points to a real character set name.
+ It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8".
+ No thd->get_utf8_flag() flag passed to get_charset_by_csname().
+ */
+ DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4));
+ /*
+ CREATE TABLE t1 (a CHAR(10) BINARY)
+ CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
+ Nothing to do, we have the binary collation already.
+ */
+ if (cs->state & MY_CS_BINSORT)
+ return cs;
+
+ // CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;/
+ error[0]= '\0'; // Need in case of the second execution
+ return get_charset(cs->cs_name.str, MY_CS_BINSORT, my_flags);
+ }
+
+ /*
+ Find the default collation in the given character set
+ */
+ CHARSET_INFO *get_default_collation(CHARSET_INFO *cs, myf my_flags)
+ {
+ // See comments in find_bin_collation_or_error()
+ DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4));
+ /*
+ CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4;
+ Nothing to do, we have the default collation already.
+ */
+ if (cs->state & MY_CS_PRIMARY)
+ return cs;
+ /*
+ CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
+ CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
+
+ Don't need to handle old_mode=UTF8_IS_UTF8MB3 here.
+ See comments in find_bin_collation_or_error.
+ */
+ cs= get_charset(cs->cs_name.str, MY_CS_PRIMARY, my_flags);
+ DBUG_ASSERT(cs);
+ return cs;
+ }
+};
+
+#endif /*__cplusplus */
+
+
#endif /* _my_sys_h */
diff --git a/include/sslopt-longopts.h b/include/sslopt-longopts.h
index d0278a1645d..b6983b2e718 100644
--- a/include/sslopt-longopts.h
+++ b/include/sslopt-longopts.h
@@ -21,7 +21,7 @@
{"ssl", OPT_SSL_SSL,
"Enable SSL for connection (automatically enabled with other flags).",
- &opt_use_ssl, &opt_use_ssl, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0},
+ &opt_use_ssl, &opt_use_ssl, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
{"ssl-ca", OPT_SSL_CA,
"CA file in PEM format (check OpenSSL docs, implies --ssl).",
&opt_ssl_ca, &opt_ssl_ca, 0, GET_STR, REQUIRED_ARG,
diff --git a/include/sslopt-vars.h b/include/sslopt-vars.h
index e28f19b919d..d263e5dbd90 100644
--- a/include/sslopt-vars.h
+++ b/include/sslopt-vars.h
@@ -22,7 +22,7 @@
#else
#define SSL_STATIC static
#endif
-SSL_STATIC my_bool opt_use_ssl = 0;
+SSL_STATIC my_bool opt_use_ssl = 1;
SSL_STATIC char *opt_ssl_ca = 0;
SSL_STATIC char *opt_ssl_capath = 0;
SSL_STATIC char *opt_ssl_cert = 0;