summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-uca.c1444
-rw-r--r--strings/ctype-uca.ic839
-rw-r--r--strings/ctype-ucs2.c145
-rw-r--r--strings/ctype-ucs2.h32
-rw-r--r--strings/ctype-unidata.h31
-rw-r--r--strings/ctype-utf16.h80
-rw-r--r--strings/ctype-utf32.h33
-rw-r--r--strings/ctype-utf8.c364
-rw-r--r--strings/ctype-utf8.h190
-rw-r--r--strings/json_lib.c249
-rw-r--r--strings/strcoll.ic267
11 files changed, 2260 insertions, 1414 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 9efd7242118..8368e33cc1d 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -31158,17 +31158,6 @@ typedef struct my_uca_scanner_st
CHARSET_INFO *cs;
} my_uca_scanner;
-/*
- Charset dependent scanner part, to optimize
- some character sets.
-*/
-typedef struct my_uca_scanner_handler_st
-{
- void (*init)(my_uca_scanner *scanner, CHARSET_INFO *cs,
- const MY_UCA_WEIGHT_LEVEL *level,
- const uchar *str, size_t length);
- int (*next)(my_uca_scanner *scanner);
-} my_uca_scanner_handler;
static const uint16 nochar[]= {0,0};
@@ -31421,6 +31410,28 @@ my_uca_can_be_previous_context_tail(const MY_CONTRACTIONS *list, my_wc_t wc)
/**
+ Check if a character needs previous/next context handling:
+ - can be a previois context tail
+ - can be a contraction start
+
+ @param level Pointer to an UCA weight level data
+ @param wc Code point
+
+ @return
+ @retval FALSE - does not need context handling
+ @retval TRUE - needs context handing
+*/
+
+static inline my_bool
+my_uca_needs_context_handling(const MY_UCA_WEIGHT_LEVEL *level, my_wc_t wc)
+{
+ return level->contractions.nitems > 0 &&
+ level->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] &
+ (MY_UCA_PREVIOUS_CONTEXT_TAIL | MY_UCA_CNT_HEAD);
+}
+
+
+/**
Compare two wide character strings, wide analog to strncmp().
@param a Pointer to the first string
@@ -31554,6 +31565,60 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
return NULL;
}
+
+/*
+ Find a context dependent weight of a character.
+ @param scanner - UCA weight scanner. The caller should set
+ its members "page" and "code" to the previous character
+ (or to zeros if there is no a previous character).
+ @param wc - an array of wide characters which has at least
+ MY_UCA_MAX_CONTRACTION elements, where wc[0] is set
+ to the current character (whose weight is being resolved).
+ The values of wc[i>0] is not important, but if wc[0]
+ appears to be a known contraction head, the function
+ will collect further contraction parts into wc[i>0].
+ If wc[0] and the previous character make a previous context
+ pair, then wc[1] is set to the previous character.
+
+ @retval NULL if could not find any contextual weights for wc[0]
+ @retval non null pointer to a zero-terminated weight string otherwise
+*/
+static inline uint16 *
+my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
+{
+ uint16 *cweight;
+ DBUG_ASSERT(scanner->level->contractions.nitems);
+ /*
+ If we have scanned a character which can have previous context,
+ and there were some more characters already before,
+ then reconstruct codepoint of the previous character
+ from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
+ together form a real previous context pair.
+ Note, we support only 2-character long sequences with previous
+ context at the moment. CLDR does not have longer sequences.
+ */
+ if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
+ wc[0]) &&
+ scanner->wbeg != nochar && /* if not the very first character */
+ my_uca_can_be_previous_context_head(&scanner->level->contractions,
+ (wc[1]= ((scanner->page << 8) +
+ scanner->code))) &&
+ (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
+ {
+ scanner->page= scanner->code= 0; /* Clear for the next character */
+ return cweight;
+ }
+ else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
+ wc[0]))
+ {
+ /* Check if w[0] starts a contraction */
+ if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
+ return cweight;
+ }
+ return NULL;
+}
+
+
/****************************************************************/
/**
@@ -31675,223 +31740,6 @@ my_uca_scanner_init_any(my_uca_scanner *scanner,
scanner->cs= cs;
}
-static int my_uca_scanner_next_any(my_uca_scanner *scanner)
-{
- /*
- Check if the weights for the previous character have been
- already fully scanned. If yes, then get the next character and
- initialize wbeg and wlength to its weight string.
- */
-
- if (scanner->wbeg[0]) /* More weights left from the previous step: */
- return *scanner->wbeg++; /* return the next weight from expansion */
-
- do
- {
- const uint16 *wpage;
- my_wc_t wc[MY_UCA_MAX_CONTRACTION];
- int mblen;
-
- /* Get next character */
- if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, wc,
- scanner->sbeg,
- scanner->send)) <= 0))
- {
- if (scanner->sbeg >= scanner->send)
- return -1; /* No more bytes, end of line reached */
- /*
- There are some more bytes left. Non-positive mb_len means that
- we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
- */
- if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
- {
- /* For safety purposes don't go beyond the string range. */
- scanner->sbeg= scanner->send;
- }
- /*
- Treat every complete or incomplete mbminlen unit as a weight which is
- greater than weight for any possible normal character.
- 0xFFFF is greater than any possible weight in the UCA weight table.
- */
- return 0xFFFF;
- }
-
- scanner->sbeg+= mblen;
- if (wc[0] > scanner->level->maxchar)
- {
- /* Return 0xFFFD as weight for all characters outside BMP */
- scanner->wbeg= nochar;
- return 0xFFFD;
- }
-
- if (my_uca_have_contractions_quick(scanner->level))
- {
- uint16 *cweight;
- /*
- If we have scanned a character which can have previous context,
- and there were some more characters already before,
- then reconstruct codepoint of the previous character
- from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
- together form a real previous context pair.
- Note, we support only 2-character long sequences with previous
- context at the moment. CLDR does not have longer sequences.
- */
- if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
- wc[0]) &&
- scanner->wbeg != nochar && /* if not the very first character */
- my_uca_can_be_previous_context_head(&scanner->level->contractions,
- (wc[1]= ((scanner->page << 8) +
- scanner->code))) &&
- (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
- {
- scanner->page= scanner->code= 0; /* Clear for the next character */
- return *cweight;
- }
- else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
- wc[0]))
- {
- /* Check if w[0] starts a contraction */
- if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
- return *cweight;
- }
- }
-
- /* Process single character */
- scanner->page= wc[0] >> 8;
- scanner->code= wc[0] & 0xFF;
-
- /* If weight page for w[0] does not exist, then calculate algoritmically */
- if (!(wpage= scanner->level->weights[scanner->page]))
- return my_uca_scanner_next_implicit(scanner);
-
- /* Calculate pointer to w[0]'s weight, using page and offset */
- scanner->wbeg= wpage +
- scanner->code * scanner->level->lengths[scanner->page];
- } while (!scanner->wbeg[0]); /* Skip ignorable characters */
-
- return *scanner->wbeg++;
-}
-
-
-static my_uca_scanner_handler my_any_uca_scanner_handler=
-{
- my_uca_scanner_init_any,
- my_uca_scanner_next_any
-};
-
-/*
- Compares two strings according to the collation
-
- SYNOPSIS:
- my_strnncoll_uca()
- cs Character set information
- s First string
- slen First string length
- t Second string
- tlen Seconf string length
- level DUCETweight level
-
- NOTES:
- Initializes two weight scanners and gets weights
- corresponding to two strings in a loop. If weights are not
- the same at some step then returns their difference.
-
- In the while() comparison these situations are possible:
- 1. (s_res>0) and (t_res>0) and (s_res == t_res)
- Weights are the same so far, continue comparison
- 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
- A difference has been found, return.
- 3. (s_res>0) and (t_res<0)
- We have reached the end of the second string, or found
- an illegal multibyte sequence in the second string.
- Return a positive number, i.e. the first string is bigger.
- 4. (s_res<0) and (t_res>0)
- We have reached the end of the first string, or found
- an illegal multibyte sequence in the first string.
- Return a negative number, i.e. the second string is bigger.
- 5. (s_res<0) and (t_res<0)
- Both scanners returned -1. It means we have riched
- the end-of-string of illegal-sequence in both strings
- at the same time. Return 0, strings are equal.
-
- RETURN
- Difference between two strings, according to the collation:
- 0 - means strings are equal
- negative number - means the first string is smaller
- positive number - means the first string is bigger
-*/
-
-static int my_strnncoll_uca_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const MY_UCA_WEIGHT_LEVEL *level,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- my_uca_scanner sscanner;
- my_uca_scanner tscanner;
- int s_res;
- int t_res;
-
- scanner_handler->init(&sscanner, cs, level, s, slen);
- scanner_handler->init(&tscanner, cs, level, t, tlen);
-
- do
- {
- s_res= scanner_handler->next(&sscanner);
- t_res= scanner_handler->next(&tscanner);
- } while ( s_res == t_res && s_res >0);
-
- return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
-}
-
-static int my_strnncoll_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
- s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncoll_uca_multilevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- uint num_level= cs->levels_for_order;
- uint i;
- for (i= 0; i != num_level; i++)
- {
- int ret= my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[i],
- s, slen, t, tlen, t_is_prefix);
- if (ret)
- return ret;
- }
- return 0;
-}
-
-
-static int
-my_strnncollsp_generic_uca_nopad_multilevel(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- uint num_level= cs->levels_for_order;
- uint i;
- for (i= 0; i != num_level; i++)
- {
- int ret= my_strnncoll_uca_onelevel(cs, &my_any_uca_scanner_handler,
- &cs->uca->level[i],
- s, slen, t, tlen, FALSE);
- if (ret)
- return ret;
- }
- return 0;
-}
-
static inline int
my_space_weight(const MY_UCA_WEIGHT_LEVEL *level)
@@ -31924,258 +31772,6 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc)
}
-/*
- Compares two strings according to the collation,
- ignoring trailing spaces.
-
- SYNOPSIS:
- my_strnncollsp_uca()
- cs Character set information
- s First string
- slen First string length
- t Second string
- tlen Seconf string length
- level DUCETweight level
-
- NOTES:
- Works exactly the same with my_strnncoll_uca(),
- but ignores trailing spaces.
-
- In the while() comparison these situations are possible:
- 1. (s_res>0) and (t_res>0) and (s_res == t_res)
- Weights are the same so far, continue comparison
- 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
- A difference has been found, return.
- 3. (s_res>0) and (t_res<0)
- We have reached the end of the second string, or found
- an illegal multibyte sequence in the second string.
- Compare the first string to an infinite array of
- space characters until difference is found, or until
- the end of the first string.
- 4. (s_res<0) and (t_res>0)
- We have reached the end of the first string, or found
- an illegal multibyte sequence in the first string.
- Compare the second string to an infinite array of
- space characters until difference is found or until
- the end of the second steing.
- 5. (s_res<0) and (t_res<0)
- Both scanners returned -1. It means we have riched
- the end-of-string of illegal-sequence in both strings
- at the same time. Return 0, strings are equal.
-
- RETURN
- Difference between two strings, according to the collation:
- 0 - means strings are equal
- negative number - means the first string is smaller
- positive number - means the first string is bigger
-*/
-
-static int my_strnncollsp_uca_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const MY_UCA_WEIGHT_LEVEL *level,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- my_uca_scanner sscanner, tscanner;
- int s_res, t_res;
-
- scanner_handler->init(&sscanner, cs, level, s, slen);
- scanner_handler->init(&tscanner, cs, level, t, tlen);
-
- do
- {
- s_res= scanner_handler->next(&sscanner);
- t_res= scanner_handler->next(&tscanner);
- } while ( s_res == t_res && s_res >0);
-
- if (s_res > 0 && t_res < 0)
- {
- /* Calculate weight for SPACE character */
- t_res= my_space_weight(level);
-
- /* compare the first string to spaces */
- do
- {
- if (s_res != t_res)
- return (s_res - t_res);
- s_res= scanner_handler->next(&sscanner);
- } while (s_res > 0);
- return 0;
- }
-
- if (s_res < 0 && t_res > 0)
- {
- /* Calculate weight for SPACE character */
- s_res= my_space_weight(level);
-
- /* compare the second string to spaces */
- do
- {
- if (s_res != t_res)
- return (s_res - t_res);
- t_res= scanner_handler->next(&tscanner);
- } while (t_res > 0);
- return 0;
- }
-
- return ( s_res - t_res );
-}
-
-static int my_strnncollsp_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
- s, slen, t, tlen);
-}
-
-static int my_strnncollsp_uca_multilevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- uint num_level= cs->levels_for_order;
- uint i;
- for (i= 0; i != num_level; i++)
- {
- int ret= my_strnncollsp_uca_onelevel(cs, scanner_handler,
- &cs->uca->level[i], s, slen, t, tlen);
- if (ret)
- return ret;
- }
- return 0;
-}
-
-/*
- Calculates hash value for the given string,
- according to the collation, and ignoring trailing spaces.
-
- SYNOPSIS:
- my_hash_sort_uca()
- cs Character set information
- s String
- slen String's length
- n1 First hash parameter
- n2 Second hash parameter
-
- NOTES:
- Scans consequently weights and updates
- hash parameters n1 and n2. In a case insensitive collation,
- upper and lower case of the same letter will return the same
- weight sequence, and thus will produce the same hash values
- in n1 and n2.
-
- This functions is used for one-level and for multi-level collations.
- We intentionally use only primary level in multi-level collations.
- This helps to have PARTITION BY KEY put primarily equal records
- into the same partition. E.g. in utf8_thai_520_ci records that differ
- only in tone marks go into the same partition.
-
- RETURN
- N/A
-*/
-
-static void my_hash_sort_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- ulong *nr1, ulong *nr2)
-{
- int s_res;
- my_uca_scanner scanner;
- int space_weight= my_space_weight(&cs->uca->level[0]);
- register ulong m1= *nr1, m2= *nr2;
-
- scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
-
- while ((s_res= scanner_handler->next(&scanner)) >0)
- {
- if (s_res == space_weight)
- {
- /* Combine all spaces to be able to skip end spaces */
- uint count= 0;
- do
- {
- count++;
- if ((s_res= scanner_handler->next(&scanner)) <= 0)
- {
- /* Skip strings at end of string */
- goto end;
- }
- }
- while (s_res == space_weight);
-
- /* Add back that has for the space characters */
- do
- {
- /*
- We can't use MY_HASH_ADD_16() here as we, because of a misstake
- in the original code, where we added the 16 byte variable the
- opposite way. Changing this would cause old partitioned tables
- to fail.
- */
- MY_HASH_ADD(m1, m2, space_weight >> 8);
- MY_HASH_ADD(m1, m2, space_weight & 0xFF);
- }
- while (--count != 0);
-
- }
- /* See comment above why we can't use MY_HASH_ADD_16() */
- MY_HASH_ADD(m1, m2, s_res >> 8);
- MY_HASH_ADD(m1, m2, s_res & 0xFF);
- }
-end:
- *nr1= m1;
- *nr2= m2;
-}
-
-
-static void my_hash_sort_uca_nopad(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- ulong *nr1, ulong *nr2)
-{
- int s_res;
- my_uca_scanner scanner;
- register ulong m1= *nr1, m2= *nr2;
-
- scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
-
- while ((s_res= scanner_handler->next(&scanner)) >0)
- {
- /* See comment above why we can't use MY_HASH_ADD_16() */
- MY_HASH_ADD(m1, m2, s_res >> 8);
- MY_HASH_ADD(m1, m2, s_res & 0xFF);
- }
- *nr1= m1;
- *nr2= m2;
-}
-
-
-static uchar *
-my_strnxfrm_uca_onelevel_internal(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- MY_UCA_WEIGHT_LEVEL *level,
- uchar *dst, uchar *de, uint *nweights,
- const uchar *src, size_t srclen)
-{
- my_uca_scanner scanner;
- int s_res;
-
- DBUG_ASSERT(src || !srclen);
-
- scanner_handler->init(&scanner, cs, level, src, srclen);
- for (; dst < de && *nweights &&
- (s_res= scanner_handler->next(&scanner)) > 0 ; (*nweights)--)
- {
- *dst++= s_res >> 8;
- if (dst < de)
- *dst++= s_res & 0xFF;
- }
- return dst;
-}
-
-
static uchar *
my_strnxfrm_uca_padn(uchar *dst, uchar *de, uint nweights, int weight)
{
@@ -32202,27 +31798,6 @@ my_strnxfrm_uca_pad(uchar *dst, uchar *de, int weight)
}
-static uchar *
-my_strnxfrm_uca_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- MY_UCA_WEIGHT_LEVEL *level,
- uchar *dst, uchar *de, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
-
- dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
- dst, de, &nweights,
- src, srclen);
- DBUG_ASSERT(dst <= de);
- if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
- dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
- DBUG_ASSERT(dst <= de);
- my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
- return dst;
-}
-
-
/*
Return the minimum possible weight on a level.
*/
@@ -32233,136 +31808,6 @@ static uint min_weight_on_level(MY_UCA_WEIGHT_LEVEL *level)
}
-static uchar *
-my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- MY_UCA_WEIGHT_LEVEL *level,
- uchar *dst, uchar *de, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
-
- dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
- dst, de, &nweights,
- src, srclen);
- DBUG_ASSERT(dst <= de);
- /* Pad with the minimum possible weight on this level */
- if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
- dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
- DBUG_ASSERT(dst <= de);
- my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
- return dst;
-}
-
-
-/*
- For the given string creates its "binary image", suitable
- to be used in binary comparison, i.e. in memcmp().
-
- SYNOPSIS:
- my_strnxfrm_uca()
- cs Character set information
- dst Where to write the image
- dstlen Space available for the image, in bytes
- src The source string
- srclen Length of the source string, in bytes
-
- NOTES:
- In a loop, scans weights from the source string and writes
- them into the binary image. In a case insensitive collation,
- upper and lower cases of the same letter will produce the
- same image subsequences. When we have reached the end-of-string
- or found an illegal multibyte sequence, the loop stops.
-
- It is impossible to restore the original string using its
- binary image.
-
- Binary images are used for bulk comparison purposes,
- e.g. in ORDER BY, when it is more efficient to create
- a binary image and use it instead of weight scanner
- for the original strings for every comparison.
-
- RETURN
- Number of bytes that have been written into the binary image.
-*/
-
-
-static size_t
-my_strnxfrm_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
- uchar *de= dst + dstlen;
-
- dst= my_strnxfrm_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
- dst, de, nweights, src, srclen, flags);
- /*
- This can probably be changed to memset(dst, 0, de - dst),
- like my_strnxfrm_uca_multilevel() does.
- */
- if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
- dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
- return dst - d0;
-}
-
-
-static size_t
-my_strnxfrm_uca_nopad(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
- uchar *de= dst + dstlen;
-
- dst= my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler, &cs->uca->level[0],
- dst, de, nweights, src, srclen, flags);
- if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
- {
- memset(dst, 0, de - dst);
- dst= de;
- }
- return dst - d0;
-}
-
-
-static size_t
-my_strnxfrm_uca_multilevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uint num_level= cs->levels_for_order;
- uchar *d0= dst;
- uchar *de= dst + dstlen;
- uint current_level;
-
- for (current_level= 0; current_level != num_level; current_level++)
- {
- if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
- (flags & (MY_STRXFRM_LEVEL1 << current_level)))
- dst= cs->state & MY_CS_NOPAD ?
- my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler,
- &cs->uca->level[current_level],
- dst, de, nweights,
- src, srclen, flags) :
- my_strnxfrm_uca_onelevel(cs, scanner_handler,
- &cs->uca->level[current_level],
- dst, de, nweights,
- src, srclen, flags);
- }
-
- if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
- {
- memset(dst, 0, de - dst);
- dst= de;
- }
-
- return dst - d0;
-}
-
/*
This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only
@@ -32568,6 +32013,23 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
/*
+ Tests if an optimized "no contraction" handler can be used for
+ the given collation.
+*/
+static my_bool
+my_uca_collation_can_optimize_no_contractions(CHARSET_INFO *cs)
+{
+ uint i;
+ for (i= 0; i < cs->levels_for_order ; i++)
+ {
+ if (my_uca_have_contractions_quick(&cs->uca->level[i]))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
+/*
Collation language is implemented according to
subset of ICU Collation Customization (tailorings):
http://icu.sourceforge.net/userguide/Collate_Customization.html
@@ -34250,8 +33712,74 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
}
-MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel;
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel;
+static my_bool
+create_tailoring(struct charset_info_st *cs,
+ MY_CHARSET_LOADER *loader);
+
+static my_bool
+my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
+{
+ cs->pad_char= ' ';
+ cs->ctype= my_charset_utf8_unicode_ci.ctype;
+ if (!cs->caseinfo)
+ cs->caseinfo= &my_unicase_default;
+ return create_tailoring(cs, loader);
+}
+
+
+static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
+{
+ /* UCA uses 2 bytes per weight */
+ return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
+}
+
+static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
+{
+ return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
+}
+
+
+/*
+ This structure is used at the collation initialization time, to switch
+ from a full-featured collation handler to a "no contraction" collation
+ handler if the collation is known not to have any contractions.
+*/
+typedef struct
+{
+ MY_COLLATION_HANDLER *pad;
+ MY_COLLATION_HANDLER *nopad;
+ MY_COLLATION_HANDLER *multilevel_pad;
+ MY_COLLATION_HANDLER *multilevel_nopad;
+} MY_COLLATION_HANDLER_PACKAGE;
+
+
+static void my_uca_handler_map(struct charset_info_st *cs,
+ const MY_COLLATION_HANDLER_PACKAGE *from,
+ const MY_COLLATION_HANDLER_PACKAGE *to)
+{
+ if (cs->coll == from->pad) cs->coll= to->pad;
+ else if (cs->coll == from->nopad) cs->coll= to->nopad;
+ else if (cs->coll == from->multilevel_pad) cs->coll= to->multilevel_pad;
+ else if (cs->coll == from->multilevel_nopad) cs->coll= to->multilevel_nopad;
+}
+
+
+/*
+ Define generic collation handlers for multi-level collations with tailoring:
+
+ my_uca_collation_handler_nopad_multilevel_generic
+ my_uca_collation_handler_multilevel_generic
+
+ TODO: Use faster character-set specific versions of MY_COLLATION_HANDLER
+ instead of generic.
+*/
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _generic
+#define MY_MB_WC(scanner, wc, beg, end) (scanner->cs->cset->mb_wc(scanner->cs, wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
/*
@@ -34336,8 +33864,8 @@ create_tailoring(struct charset_info_st *cs,
cs->uca[0]= new_uca;
if (cs->levels_for_order > 1)
cs->coll= (cs->state & MY_CS_NOPAD) ?
- &my_collation_generic_uca_nopad_handler_multilevel :
- &my_collation_any_uca_handler_multilevel;
+ &my_uca_collation_handler_nopad_multilevel_generic :
+ &my_uca_collation_handler_multilevel_generic;
ex:
(loader->free)(rules.rule);
@@ -34346,235 +33874,17 @@ ex:
return rc;
}
-/*
- Universal CHARSET_INFO compatible wrappers
- for the above internal functions.
- Should work for any character set.
-*/
-
-static my_bool
-my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
-{
- cs->pad_char= ' ';
- cs->ctype= my_charset_utf8_unicode_ci.ctype;
- if (!cs->caseinfo)
- cs->caseinfo= &my_unicase_default;
- return create_tailoring(cs, loader);
-}
-
-
-static int my_strnncoll_any_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncoll_any_uca_multilevel(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca_multilevel(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncollsp_any_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
-}
-
-
-static int my_strnncollsp_generic_uca_nopad(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, FALSE);
-}
-
-
-static int my_strnncollsp_any_uca_multilevel(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca_multilevel(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen);
-}
-
-static void my_hash_sort_any_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- ulong *n1, ulong *n2)
-{
- my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static void my_hash_sort_generic_uca_nopad(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- ulong *n1, ulong *n2)
-{
- my_hash_sort_uca_nopad(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static size_t my_strnxfrm_any_uca(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen, flags);
-}
-
-static size_t my_strnxfrm_generic_uca_nopad(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen,
- uint nweights,
- const uchar *src, size_t srclen,
- uint flags)
-{
- return my_strnxfrm_uca_nopad(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen, flags);
-}
-
-static size_t my_strnxfrm_any_uca_multilevel(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen,
- uint nweights, const uchar *src,
- size_t srclen, uint flags)
-{
- return my_strnxfrm_uca_multilevel(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen,
- flags);
-}
-
-static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
-{
- /* UCA uses 2 bytes per weight */
- return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
-}
-
-static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
-{
- return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
-}
-
-
-/* NO PAD handler for character sets with mbminlen==1 */
-MY_COLLATION_HANDLER my_collation_mb_uca_nopad_handler =
-{
- my_coll_init_uca,
- my_strnncoll_any_uca,
- my_strnncollsp_generic_uca_nopad,
- my_strnxfrm_generic_uca_nopad,
- my_strnxfrmlen_any_uca,
- my_like_range_mb,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_generic_uca_nopad,
- my_propagate_complex
-};
-
-
-/* NO PAD handler for character sets with mbminlen>=1 */
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler =
-{
- my_coll_init_uca,
- my_strnncoll_any_uca,
- my_strnncollsp_generic_uca_nopad,
- my_strnxfrm_generic_uca_nopad,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_generic_uca_nopad,
- my_propagate_complex
-};
-
-
-MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
-{
- my_coll_init_uca,
- my_strnncoll_any_uca_multilevel,
- my_strnncollsp_any_uca_multilevel,
- my_strnxfrm_any_uca_multilevel,
- my_strnxfrmlen_any_uca_multilevel,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
-
-
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel =
-{
- my_coll_init_uca,
- my_strnncoll_any_uca_multilevel,
- my_strnncollsp_generic_uca_nopad_multilevel,
- my_strnxfrm_any_uca_multilevel,
- my_strnxfrmlen_any_uca_multilevel,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_generic_uca_nopad,
- my_propagate_complex
-};
-
#ifdef HAVE_CHARSET_ucs2
-/*
- UCS2 optimized CHARSET_INFO compatible wrappers.
-*/
-static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, t_is_prefix);
-}
-static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
-}
-
-static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- ulong *n1, ulong *n2)
-{
- my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static size_t my_strnxfrm_ucs2_uca(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen, flags);
-}
-
-MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
-{
- my_coll_init_uca, /* init */
- my_strnncoll_ucs2_uca,
- my_strnncollsp_ucs2_uca,
- my_strnxfrm_ucs2_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_ucs2_uca,
- my_propagate_complex
-};
+#include "ctype-ucs2.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _ucs2
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_ucs2_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
#define MY_CS_UCS2_UCA_FLAGS (MY_CS_COMMON_UCA_FLAGS|MY_CS_NONASCII)
@@ -34609,7 +33919,7 @@ struct charset_info_st my_charset_ucs2_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
@@ -34641,7 +33951,7 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_latvian_uca_ci=
@@ -34673,7 +33983,7 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_romanian_uca_ci=
@@ -34705,7 +34015,7 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
@@ -34737,7 +34047,7 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_polish_uca_ci=
@@ -34769,7 +34079,7 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_estonian_uca_ci=
@@ -34801,7 +34111,7 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_spanish_uca_ci=
@@ -34833,7 +34143,7 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_swedish_uca_ci=
@@ -34865,7 +34175,7 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_turkish_uca_ci=
@@ -34897,7 +34207,7 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_czech_uca_ci=
@@ -34929,7 +34239,7 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -34962,7 +34272,7 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
@@ -34994,7 +34304,7 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_slovak_uca_ci=
@@ -35026,7 +34336,7 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
@@ -35058,7 +34368,7 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35091,7 +34401,7 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35124,7 +34434,7 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35157,7 +34467,7 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35190,7 +34500,7 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
@@ -35222,7 +34532,7 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35256,7 +34566,7 @@ struct charset_info_st my_charset_ucs2_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
@@ -35288,7 +34598,7 @@ struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35321,7 +34631,7 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35354,7 +34664,7 @@ struct charset_info_st my_charset_ucs2_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35387,7 +34697,7 @@ struct charset_info_st my_charset_ucs2_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_ucs2
};
struct charset_info_st my_charset_ucs2_unicode_520_ci=
@@ -35419,7 +34729,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35452,7 +34762,7 @@ struct charset_info_st my_charset_ucs2_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35485,7 +34795,7 @@ struct charset_info_st my_charset_ucs2_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_ucs2
};
@@ -35518,7 +34828,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_ucs2
};
@@ -35526,20 +34836,38 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf8
-MY_COLLATION_HANDLER my_collation_any_uca_handler =
+
+static my_bool
+my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
+
+#include "ctype-utf8.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb3
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb3
+#include "ctype-uca.ic"
+
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _no_contractions_utf8mb3
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 0
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb3
+#include "ctype-uca.ic"
+
+
+static my_bool
+my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
- my_coll_init_uca, /* init */
- my_strnncoll_any_uca,
- my_strnncollsp_any_uca,
- my_strnxfrm_any_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_mb,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
+ if (my_coll_init_uca(cs, loader))
+ return TRUE;
+ if (my_uca_collation_can_optimize_no_contractions(cs))
+ my_uca_handler_map(cs, &my_uca_package_utf8mb3,
+ &my_uca_package_no_contractions_utf8mb3);
+ return FALSE;
+}
/*
@@ -35602,7 +34930,7 @@ struct charset_info_st my_charset_utf8_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -35635,7 +34963,7 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_latvian_uca_ci=
@@ -35667,7 +34995,7 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_romanian_uca_ci=
@@ -35699,7 +35027,7 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_slovenian_uca_ci=
@@ -35731,7 +35059,7 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_polish_uca_ci=
@@ -35763,7 +35091,7 @@ struct charset_info_st my_charset_utf8_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_estonian_uca_ci=
@@ -35795,7 +35123,7 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_spanish_uca_ci=
@@ -35827,7 +35155,7 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_swedish_uca_ci=
@@ -35859,7 +35187,7 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_turkish_uca_ci=
@@ -35891,7 +35219,7 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_czech_uca_ci=
@@ -35923,7 +35251,7 @@ struct charset_info_st my_charset_utf8_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -35956,7 +35284,7 @@ struct charset_info_st my_charset_utf8_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
@@ -35988,7 +35316,7 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_slovak_uca_ci=
@@ -36020,7 +35348,7 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_spanish2_uca_ci=
@@ -36052,7 +35380,7 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_roman_uca_ci=
@@ -36084,7 +35412,7 @@ struct charset_info_st my_charset_utf8_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_persian_uca_ci=
@@ -36116,7 +35444,7 @@ struct charset_info_st my_charset_utf8_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_esperanto_uca_ci=
@@ -36148,7 +35476,7 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_hungarian_uca_ci=
@@ -36180,7 +35508,7 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_sinhala_uca_ci=
@@ -36212,7 +35540,7 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36245,7 +35573,7 @@ struct charset_info_st my_charset_utf8_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
@@ -36277,7 +35605,7 @@ struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36310,7 +35638,7 @@ struct charset_info_st my_charset_utf8_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36343,7 +35671,7 @@ struct charset_info_st my_charset_utf8_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36376,7 +35704,7 @@ struct charset_info_st my_charset_utf8_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_thai_520_w2=
@@ -36408,7 +35736,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf8mb3
};
struct charset_info_st my_charset_utf8_vietnamese_ci=
@@ -36440,7 +35768,7 @@ struct charset_info_st my_charset_utf8_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36473,7 +35801,7 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb3
};
@@ -36506,7 +35834,7 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb3
};
#endif /* HAVE_CHARSET_utf8 */
@@ -36514,6 +35842,39 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf8mb4
+static my_bool
+my_uca_coll_init_utf8mb4(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
+
+
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb4
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb4
+#include "ctype-uca.ic"
+
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _no_contractions_utf8mb4
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 0
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb4
+#include "ctype-uca.ic"
+
+
+static my_bool
+my_uca_coll_init_utf8mb4(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
+{
+ if (my_coll_init_uca(cs, loader))
+ return TRUE;
+ if (my_uca_collation_can_optimize_no_contractions(cs))
+ my_uca_handler_map(cs, &my_uca_package_utf8mb4,
+ &my_uca_package_no_contractions_utf8mb4);
+ return FALSE;
+}
+
+
extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler;
#define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMMON_UCA_FLAGS|MY_CS_UNICODE_SUPPLEMENT)
@@ -36548,7 +35909,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -36581,7 +35942,7 @@ struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
@@ -36613,7 +35974,7 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
@@ -36645,7 +36006,7 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
@@ -36677,7 +36038,7 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
@@ -36709,7 +36070,7 @@ struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
@@ -36741,7 +36102,7 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
@@ -36773,7 +36134,7 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
@@ -36805,7 +36166,7 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
@@ -36837,7 +36198,7 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
@@ -36869,7 +36230,7 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -36902,7 +36263,7 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
@@ -36934,7 +36295,7 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
@@ -36966,7 +36327,7 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
@@ -36998,7 +36359,7 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
@@ -37030,7 +36391,7 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
@@ -37062,7 +36423,7 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
@@ -37094,7 +36455,7 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
@@ -37126,7 +36487,7 @@ struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
@@ -37158,7 +36519,7 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
@@ -37190,7 +36551,7 @@ struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
@@ -37222,7 +36583,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37255,7 +36616,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37288,7 +36649,7 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_thai_520_w2=
@@ -37320,7 +36681,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
@@ -37352,7 +36713,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37385,7 +36746,7 @@ struct charset_info_st my_charset_utf8mb4_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37418,7 +36779,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb4
};
@@ -37451,7 +36812,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb4
};
@@ -37460,20 +36821,14 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf32
-MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
-{
- my_coll_init_uca, /* init */
- my_strnncoll_any_uca,
- my_strnncollsp_any_uca,
- my_strnxfrm_any_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
+#include "ctype-utf32.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf32
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf32_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
extern MY_CHARSET_HANDLER my_charset_utf32_handler;
@@ -37510,7 +36865,7 @@ struct charset_info_st my_charset_utf32_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -37543,7 +36898,7 @@ struct charset_info_st my_charset_utf32_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_latvian_uca_ci=
@@ -37575,7 +36930,7 @@ struct charset_info_st my_charset_utf32_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_romanian_uca_ci=
@@ -37607,7 +36962,7 @@ struct charset_info_st my_charset_utf32_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_slovenian_uca_ci=
@@ -37639,7 +36994,7 @@ struct charset_info_st my_charset_utf32_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_polish_uca_ci=
@@ -37671,7 +37026,7 @@ struct charset_info_st my_charset_utf32_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_estonian_uca_ci=
@@ -37703,7 +37058,7 @@ struct charset_info_st my_charset_utf32_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_spanish_uca_ci=
@@ -37735,7 +37090,7 @@ struct charset_info_st my_charset_utf32_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_swedish_uca_ci=
@@ -37767,7 +37122,7 @@ struct charset_info_st my_charset_utf32_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_turkish_uca_ci=
@@ -37799,7 +37154,7 @@ struct charset_info_st my_charset_utf32_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_czech_uca_ci=
@@ -37831,7 +37186,7 @@ struct charset_info_st my_charset_utf32_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -37864,7 +37219,7 @@ struct charset_info_st my_charset_utf32_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
@@ -37896,7 +37251,7 @@ struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_slovak_uca_ci=
@@ -37928,7 +37283,7 @@ struct charset_info_st my_charset_utf32_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_spanish2_uca_ci=
@@ -37960,7 +37315,7 @@ struct charset_info_st my_charset_utf32_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_roman_uca_ci=
@@ -37992,7 +37347,7 @@ struct charset_info_st my_charset_utf32_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_persian_uca_ci=
@@ -38024,7 +37379,7 @@ struct charset_info_st my_charset_utf32_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_esperanto_uca_ci=
@@ -38056,7 +37411,7 @@ struct charset_info_st my_charset_utf32_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_hungarian_uca_ci=
@@ -38088,7 +37443,7 @@ struct charset_info_st my_charset_utf32_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_sinhala_uca_ci=
@@ -38120,7 +37475,7 @@ struct charset_info_st my_charset_utf32_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_german2_uca_ci=
@@ -38152,7 +37507,7 @@ struct charset_info_st my_charset_utf32_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
@@ -38184,7 +37539,7 @@ struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_croatian_uca_ci=
@@ -38216,7 +37571,7 @@ struct charset_info_st my_charset_utf32_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38249,7 +37604,7 @@ struct charset_info_st my_charset_utf32_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38282,7 +37637,7 @@ struct charset_info_st my_charset_utf32_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf32
};
@@ -38315,7 +37670,7 @@ struct charset_info_st my_charset_utf32_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38348,7 +37703,7 @@ struct charset_info_st my_charset_utf32_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38381,7 +37736,7 @@ struct charset_info_st my_charset_utf32_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf32
};
@@ -38414,7 +37769,7 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf32
};
@@ -38424,21 +37779,14 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf16
-
-MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
-{
- my_coll_init_uca, /* init */
- my_strnncoll_any_uca,
- my_strnncollsp_any_uca,
- my_strnxfrm_any_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
+#include "ctype-utf16.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf16
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf16_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
extern MY_CHARSET_HANDLER my_charset_utf16_handler;
@@ -38475,7 +37823,7 @@ struct charset_info_st my_charset_utf16_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -38508,7 +37856,7 @@ struct charset_info_st my_charset_utf16_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_latvian_uca_ci=
@@ -38540,7 +37888,7 @@ struct charset_info_st my_charset_utf16_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_romanian_uca_ci=
@@ -38572,7 +37920,7 @@ struct charset_info_st my_charset_utf16_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_slovenian_uca_ci=
@@ -38604,7 +37952,7 @@ struct charset_info_st my_charset_utf16_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_polish_uca_ci=
@@ -38636,7 +37984,7 @@ struct charset_info_st my_charset_utf16_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_estonian_uca_ci=
@@ -38668,7 +38016,7 @@ struct charset_info_st my_charset_utf16_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_spanish_uca_ci=
@@ -38700,7 +38048,7 @@ struct charset_info_st my_charset_utf16_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_swedish_uca_ci=
@@ -38732,7 +38080,7 @@ struct charset_info_st my_charset_utf16_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_turkish_uca_ci=
@@ -38764,7 +38112,7 @@ struct charset_info_st my_charset_utf16_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_czech_uca_ci=
@@ -38796,7 +38144,7 @@ struct charset_info_st my_charset_utf16_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -38829,7 +38177,7 @@ struct charset_info_st my_charset_utf16_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
@@ -38861,7 +38209,7 @@ struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_slovak_uca_ci=
@@ -38893,7 +38241,7 @@ struct charset_info_st my_charset_utf16_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_spanish2_uca_ci=
@@ -38925,7 +38273,7 @@ struct charset_info_st my_charset_utf16_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_roman_uca_ci=
@@ -38957,7 +38305,7 @@ struct charset_info_st my_charset_utf16_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_persian_uca_ci=
@@ -38989,7 +38337,7 @@ struct charset_info_st my_charset_utf16_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_esperanto_uca_ci=
@@ -39021,7 +38369,7 @@ struct charset_info_st my_charset_utf16_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_hungarian_uca_ci=
@@ -39053,7 +38401,7 @@ struct charset_info_st my_charset_utf16_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_sinhala_uca_ci=
@@ -39085,7 +38433,7 @@ struct charset_info_st my_charset_utf16_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_german2_uca_ci=
@@ -39117,7 +38465,7 @@ struct charset_info_st my_charset_utf16_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39150,7 +38498,7 @@ struct charset_info_st my_charset_utf16_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39183,7 +38531,7 @@ struct charset_info_st my_charset_utf16_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39216,7 +38564,7 @@ struct charset_info_st my_charset_utf16_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39249,7 +38597,7 @@ struct charset_info_st my_charset_utf16_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf16
};
@@ -39282,7 +38630,7 @@ struct charset_info_st my_charset_utf16_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39315,7 +38663,7 @@ struct charset_info_st my_charset_utf16_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39348,7 +38696,7 @@ struct charset_info_st my_charset_utf16_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf16
};
@@ -39381,7 +38729,7 @@ struct charset_info_st my_charset_utf16_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf16
};
diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic
new file mode 100644
index 00000000000..70c10199e3e
--- /dev/null
+++ b/strings/ctype-uca.ic
@@ -0,0 +1,839 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#ifndef MY_FUNCTION_NAME
+#error MY_FUNCTION_NAME is not defined
+#endif
+#ifndef MY_MB_WC
+#error MY_MB_WC is not defined
+#endif
+#ifndef MY_LIKE_RANGE
+#error MY_LIKE_RANGE is not defined
+#endif
+#ifndef MY_UCA_ASCII_OPTIMIZE
+#error MY_ASCII_OPTIMIZE is not defined
+#endif
+#ifndef MY_UCA_COMPILE_CONTRACTIONS
+#error MY_UCA_COMPILE_CONTRACTIONS is not defined
+#endif
+#ifndef MY_UCA_COLL_INIT
+#error MY_UCA_COLL_INIT is not defined
+#endif
+
+
+static inline int
+MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
+{
+ /*
+ Check if the weights for the previous character have been
+ already fully scanned. If yes, then get the next character and
+ initialize wbeg and wlength to its weight string.
+ */
+
+ if (scanner->wbeg[0]) /* More weights left from the previous step: */
+ return *scanner->wbeg++; /* return the next weight from expansion */
+
+ do
+ {
+ const uint16 *wpage;
+ my_wc_t wc[MY_UCA_MAX_CONTRACTION];
+ int mblen;
+
+ /* Get next character */
+#if MY_UCA_ASCII_OPTIMIZE
+ /* Get next ASCII character */
+ if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
+ {
+ wc[0]= scanner->sbeg[0];
+ scanner->sbeg+= 1;
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+ if (my_uca_needs_context_handling(scanner->level, wc[0]))
+ {
+ uint16 *cweight= my_uca_context_weight_find(scanner, wc);
+ if (cweight)
+ return *cweight;
+ }
+#endif
+
+ scanner->page= 0;
+ scanner->code= (int) wc[0];
+ scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
+ if (scanner->wbeg[0])
+ return *scanner->wbeg++;
+ continue;
+ }
+ else
+#endif
+ /* Get next MB character */
+ if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
+ scanner->send)) <= 0))
+ {
+ if (scanner->sbeg >= scanner->send)
+ return -1; /* No more bytes, end of line reached */
+ /*
+ There are some more bytes left. Non-positive mb_len means that
+ we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
+ */
+ if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
+ {
+ /* For safety purposes don't go beyond the string range. */
+ scanner->sbeg= scanner->send;
+ }
+ /*
+ Treat every complete or incomplete mbminlen unit as a weight which is
+ greater than weight for any possible normal character.
+ 0xFFFF is greater than any possible weight in the UCA weight table.
+ */
+ return 0xFFFF;
+ }
+
+ scanner->sbeg+= mblen;
+ if (wc[0] > scanner->level->maxchar)
+ {
+ /* Return 0xFFFD as weight for all characters outside BMP */
+ scanner->wbeg= nochar;
+ return 0xFFFD;
+ }
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+ if (my_uca_needs_context_handling(scanner->level, wc[0]))
+ {
+ uint16 *cweight= my_uca_context_weight_find(scanner, wc);
+ if (cweight)
+ return *cweight;
+ }
+#endif
+
+ /* Process single character */
+ scanner->page= wc[0] >> 8;
+ scanner->code= wc[0] & 0xFF;
+
+ /* If weight page for w[0] does not exist, then calculate algoritmically */
+ if (!(wpage= scanner->level->weights[scanner->page]))
+ return my_uca_scanner_next_implicit(scanner);
+
+ /* Calculate pointer to w[0]'s weight, using page and offset */
+ scanner->wbeg= wpage +
+ scanner->code * scanner->level->lengths[scanner->page];
+ } while (!scanner->wbeg[0]); /* Skip ignorable characters */
+
+ return *scanner->wbeg++;
+}
+
+
+
+/*
+ Compares two strings according to the collation
+
+ SYNOPSIS:
+ strnncoll_onelevel()
+ cs Character set information
+ level Weight level (0 primary, 1 secondary, 2 tertiary, etc)
+ s First string
+ slen First string length
+ t Second string
+ tlen Seconf string length
+ level DUCETweight level
+
+ NOTES:
+ Initializes two weight scanners and gets weights
+ corresponding to two strings in a loop. If weights are not
+ the same at some step then returns their difference.
+
+ In the while() comparison these situations are possible:
+ 1. (s_res>0) and (t_res>0) and (s_res == t_res)
+ Weights are the same so far, continue comparison
+ 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
+ A difference has been found, return.
+ 3. (s_res>0) and (t_res<0)
+ We have reached the end of the second string, or found
+ an illegal multibyte sequence in the second string.
+ Return a positive number, i.e. the first string is bigger.
+ 4. (s_res<0) and (t_res>0)
+ We have reached the end of the first string, or found
+ an illegal multibyte sequence in the first string.
+ Return a negative number, i.e. the second string is bigger.
+ 5. (s_res<0) and (t_res<0)
+ Both scanners returned -1. It means we have riched
+ the end-of-string of illegal-sequence in both strings
+ at the same time. Return 0, strings are equal.
+
+ RETURN
+ Difference between two strings, according to the collation:
+ 0 - means strings are equal
+ negative number - means the first string is smaller
+ positive number - means the first string is bigger
+*/
+
+static int
+MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ my_uca_scanner sscanner;
+ my_uca_scanner tscanner;
+ int s_res;
+ int t_res;
+
+ my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+ my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+
+ do
+ {
+ s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+ t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+ } while ( s_res == t_res && s_res >0);
+
+ return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
+}
+
+
+/*
+ One-level, PAD SPACE.
+*/
+static int
+MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
+ s, slen, t, tlen, t_is_prefix);
+}
+
+
+/*
+ Multi-level, PAD SPACE.
+*/
+static int
+MY_FUNCTION_NAME(strnncoll_multilevel)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ uint i, num_level= cs->levels_for_order;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
+ s, slen, t, tlen,
+ t_is_prefix);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+
+/*
+ Compares two strings according to the collation,
+ ignoring trailing spaces.
+
+ SYNOPSIS:
+ strnncollsp_onelevel()
+ cs Character set information
+ level UCA weight level
+ s First string
+ slen First string length
+ t Second string
+ tlen Seconf string length
+ level DUCETweight level
+
+ NOTES:
+ Works exactly the same with my_strnncoll_uca(),
+ but ignores trailing spaces.
+
+ In the while() comparison these situations are possible:
+ 1. (s_res>0) and (t_res>0) and (s_res == t_res)
+ Weights are the same so far, continue comparison
+ 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
+ A difference has been found, return.
+ 3. (s_res>0) and (t_res<0)
+ We have reached the end of the second string, or found
+ an illegal multibyte sequence in the second string.
+ Compare the first string to an infinite array of
+ space characters until difference is found, or until
+ the end of the first string.
+ 4. (s_res<0) and (t_res>0)
+ We have reached the end of the first string, or found
+ an illegal multibyte sequence in the first string.
+ Compare the second string to an infinite array of
+ space characters until difference is found or until
+ the end of the second steing.
+ 5. (s_res<0) and (t_res<0)
+ Both scanners returned -1. It means we have riched
+ the end-of-string of illegal-sequence in both strings
+ at the same time. Return 0, strings are equal.
+
+ RETURN
+ Difference between two strings, according to the collation:
+ 0 - means strings are equal
+ negative number - means the first string is smaller
+ positive number - means the first string is bigger
+*/
+
+static int
+MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ my_uca_scanner sscanner, tscanner;
+ int s_res, t_res;
+
+ my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+ my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+
+ do
+ {
+ s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+ t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+ } while ( s_res == t_res && s_res >0);
+
+ if (s_res > 0 && t_res < 0)
+ {
+ /* Calculate weight for SPACE character */
+ t_res= my_space_weight(level);
+
+ /* compare the first string to spaces */
+ do
+ {
+ if (s_res != t_res)
+ return (s_res - t_res);
+ s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+ } while (s_res > 0);
+ return 0;
+ }
+
+ if (s_res < 0 && t_res > 0)
+ {
+ /* Calculate weight for SPACE character */
+ s_res= my_space_weight(level);
+
+ /* compare the second string to spaces */
+ do
+ {
+ if (s_res != t_res)
+ return (s_res - t_res);
+ t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+ } while (t_res > 0);
+ return 0;
+ }
+
+ return ( s_res - t_res );
+}
+
+
+/*
+ One-level, PAD SPACE
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ return MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[0],
+ s, slen, t, tlen);
+}
+
+
+/*
+ One-level, NO PAD
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nopad)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
+ s, slen, t, tlen, FALSE);
+}
+
+
+/*
+ Multi-level, PAD SPACE
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_multilevel)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+
+ uint i, num_level= cs->levels_for_order;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[i],
+ s, slen, t, tlen);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+
+/*
+ Multi-level, NO PAD
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ uint num_level= cs->levels_for_order;
+ uint i;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
+ s, slen, t, tlen, FALSE);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+
+
+/*
+ Calculates hash value for the given string,
+ according to the collation, and ignoring trailing spaces.
+
+ SYNOPSIS:
+ hash_sort()
+ cs Character set information
+ s String
+ slen String's length
+ n1 First hash parameter
+ n2 Second hash parameter
+
+ NOTES:
+ Scans consequently weights and updates
+ hash parameters n1 and n2. In a case insensitive collation,
+ upper and lower case of the same letter will return the same
+ weight sequence, and thus will produce the same hash values
+ in n1 and n2.
+
+ This functions is used for one-level and for multi-level collations.
+ We intentionally use only primary level in multi-level collations.
+ This helps to have PARTITION BY KEY put primarily equal records
+ into the same partition. E.g. in utf8_thai_520_ci records that differ
+ only in tone marks go into the same partition.
+
+ RETURN
+ N/A
+*/
+
+static void
+MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ ulong *nr1, ulong *nr2)
+{
+ int s_res;
+ my_uca_scanner scanner;
+ int space_weight= my_space_weight(&cs->uca->level[0]);
+ register ulong m1= *nr1, m2= *nr2;
+
+ my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
+
+ while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
+ {
+ if (s_res == space_weight)
+ {
+ /* Combine all spaces to be able to skip end spaces */
+ uint count= 0;
+ do
+ {
+ count++;
+ if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0)
+ {
+ /* Skip strings at end of string */
+ goto end;
+ }
+ }
+ while (s_res == space_weight);
+
+ /* Add back that has for the space characters */
+ do
+ {
+ /*
+ We can't use MY_HASH_ADD_16() here as we, because of a misstake
+ in the original code, where we added the 16 byte variable the
+ opposite way. Changing this would cause old partitioned tables
+ to fail.
+ */
+ MY_HASH_ADD(m1, m2, space_weight >> 8);
+ MY_HASH_ADD(m1, m2, space_weight & 0xFF);
+ }
+ while (--count != 0);
+
+ }
+ /* See comment above why we can't use MY_HASH_ADD_16() */
+ MY_HASH_ADD(m1, m2, s_res >> 8);
+ MY_HASH_ADD(m1, m2, s_res & 0xFF);
+ }
+end:
+ *nr1= m1;
+ *nr2= m2;
+}
+
+
+static void
+MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ ulong *nr1, ulong *nr2)
+{
+ int s_res;
+ my_uca_scanner scanner;
+ register ulong m1= *nr1, m2= *nr2;
+
+ my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
+
+ while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
+ {
+ /* See comment above why we can't use MY_HASH_ADD_16() */
+ MY_HASH_ADD(m1, m2, s_res >> 8);
+ MY_HASH_ADD(m1, m2, s_res & 0xFF);
+ }
+ *nr1= m1;
+ *nr2= m2;
+}
+
+
+
+/*
+ For the given string creates its "binary image", suitable
+ to be used in binary comparison, i.e. in memcmp().
+
+ SYNOPSIS:
+ my_strnxfrm_uca()
+ cs Character set information
+ dst Where to write the image
+ dstlen Space available for the image, in bytes
+ src The source string
+ srclen Length of the source string, in bytes
+
+ NOTES:
+ In a loop, scans weights from the source string and writes
+ them into the binary image. In a case insensitive collation,
+ upper and lower cases of the same letter will produce the
+ same image subsequences. When we have reached the end-of-string
+ or found an illegal multibyte sequence, the loop stops.
+
+ It is impossible to restore the original string using its
+ binary image.
+
+ Binary images are used for bulk comparison purposes,
+ e.g. in ORDER BY, when it is more efficient to create
+ a binary image and use it instead of weight scanner
+ for the original strings for every comparison.
+
+ RETURN
+ Number of bytes that have been written into the binary image.
+*/
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
+ MY_UCA_WEIGHT_LEVEL *level,
+ uchar *dst, uchar *de,
+ uint *nweights,
+ const uchar *src, size_t srclen)
+{
+ my_uca_scanner scanner;
+ int s_res;
+
+ DBUG_ASSERT(src || !srclen);
+
+#if MY_UCA_ASCII_OPTIMIZE && !MY_UCA_COMPILE_CONTRACTIONS
+ /*
+ Fast path for the ASCII range with no contractions.
+ */
+ {
+ const uchar *de2= de - 1; /* Last position where 2 bytes fit */
+ const uint16 *weights0= level->weights[0];
+ uint lengths0= level->lengths[0];
+ for ( ; ; src++, srclen--)
+ {
+ const uint16 *weight;
+ if (!srclen || !*nweights)
+ return dst; /* Done */
+ if (*src > 0x7F)
+ break; /* Non-ASCII */
+
+ weight= weights0 + (((uint) *src) * lengths0);
+ if (!(s_res= *weight))
+ continue; /* Ignorable */
+ if (weight[1]) /* Expansion (e.g. in a user defined collation */
+ break;
+
+ /* Here we have a character with extactly one 2-byte UCA weight */
+ if (dst < de2) /* Most typical case is when both bytes fit */
+ {
+ *dst++= s_res >> 8;
+ *dst++= s_res & 0xFF;
+ (*nweights)--;
+ continue;
+ }
+ if (dst >= de) /* No space left in "dst" */
+ return dst;
+ *dst++= s_res >> 8; /* There is space only for one byte */
+ (*nweights)--;
+ return dst;
+ }
+ }
+#endif
+
+ my_uca_scanner_init_any(&scanner, cs, level, src, srclen);
+ for (; dst < de && *nweights &&
+ (s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--)
+ {
+ *dst++= s_res >> 8;
+ if (dst < de)
+ *dst++= s_res & 0xFF;
+ }
+ return dst;
+}
+
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_onelevel)(CHARSET_INFO *cs,
+ MY_UCA_WEIGHT_LEVEL *level,
+ uchar *dst, uchar *de, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *d0= dst;
+ dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
+ dst, de, &nweights,
+ src, srclen);
+ DBUG_ASSERT(dst <= de);
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
+ DBUG_ASSERT(dst <= de);
+ my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
+ return dst;
+}
+
+
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(CHARSET_INFO *cs,
+ MY_UCA_WEIGHT_LEVEL *level,
+ uchar *dst, uchar *de, uint nweights,
+ const uchar *src, size_t srclen,
+ uint flags)
+{
+ uchar *d0= dst;
+ dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
+ dst, de, &nweights,
+ src, srclen);
+ DBUG_ASSERT(dst <= de);
+ /* Pad with the minimum possible weight on this level */
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
+ DBUG_ASSERT(dst <= de);
+ my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
+ return dst;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *d0= dst;
+ uchar *de= dst + dstlen;
+
+ /*
+ There are two ways to handle trailing spaces for PAD SPACE collations:
+ 1. Keep trailing spaces as they are, so have strnxfrm_onelevel() scan
+ spaces as normal characters. This will call scanner_next() for every
+ trailing space and calculate its weight using UCA weights.
+ 2. Strip trailing spaces before calling strnxfrm_onelevel(), as it will
+ append weights for implicit spaces anyway, up to the desired key size.
+ This will effectively generate exactly the same sortable key result.
+ The latter is much faster.
+ */
+
+ if (flags & MY_STRXFRM_PAD_WITH_SPACE)
+ srclen= cs->cset->lengthsp(cs, (const char*) src, srclen);
+ dst= MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, &cs->uca->level[0],
+ dst, de, nweights,
+ src, srclen, flags);
+ /*
+ This can probably be changed to memset(dst, 0, de - dst),
+ like my_strnxfrm_uca_multilevel() does.
+ */
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
+ return dst - d0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen,
+ uint nweights,
+ const uchar *src, size_t srclen,
+ uint flags)
+{
+ uchar *d0= dst;
+ uchar *de= dst + dstlen;
+
+ dst= MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, &cs->uca->level[0],
+ dst, de, nweights,
+ src, srclen, flags);
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ {
+ memset(dst, 0, de - dst);
+ dst= de;
+ }
+ return dst - d0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_multilevel)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen,
+ uint nweights,
+ const uchar *src, size_t srclen,
+ uint flags)
+{
+ uint num_level= cs->levels_for_order;
+ uchar *d0= dst;
+ uchar *de= dst + dstlen;
+ uint current_level;
+
+ for (current_level= 0; current_level != num_level; current_level++)
+ {
+ if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
+ (flags & (MY_STRXFRM_LEVEL1 << current_level)))
+ dst= cs->state & MY_CS_NOPAD ?
+ MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs,
+ &cs->uca->level[current_level],
+ dst, de, nweights,
+ src, srclen, flags) :
+ MY_FUNCTION_NAME(strnxfrm_onelevel)(cs,
+ &cs->uca->level[current_level],
+ dst, de, nweights,
+ src, srclen, flags);
+ }
+
+ if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
+ {
+ memset(dst, 0, de - dst);
+ dst= de;
+ }
+
+ return dst - d0;
+}
+
+
+/*
+ One-level, PAD SPACE
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
+{
+ MY_UCA_COLL_INIT,
+ MY_FUNCTION_NAME(strnncoll),
+ MY_FUNCTION_NAME(strnncollsp),
+ MY_FUNCTION_NAME(strnxfrm),
+ my_strnxfrmlen_any_uca,
+ MY_LIKE_RANGE,
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort),
+ my_propagate_complex
+};
+
+
+/*
+ One-level, NO PAD
+ For character sets with mbminlen==1 use MY_LIKE_RANGE=my_like_range_mb
+ For character sets with mbminlen>=2 use MY_LIKE_RANGE=my_like_range_generic
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
+{
+ MY_UCA_COLL_INIT,
+ MY_FUNCTION_NAME(strnncoll),
+ MY_FUNCTION_NAME(strnncollsp_nopad),
+ MY_FUNCTION_NAME(strnxfrm_nopad),
+ my_strnxfrmlen_any_uca,
+ MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort_nopad),
+ my_propagate_complex
+};
+
+
+/*
+ Multi-level, PAD SPACE
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
+{
+ MY_UCA_COLL_INIT,
+ MY_FUNCTION_NAME(strnncoll_multilevel),
+ MY_FUNCTION_NAME(strnncollsp_multilevel),
+ MY_FUNCTION_NAME(strnxfrm_multilevel),
+ my_strnxfrmlen_any_uca_multilevel,
+ MY_LIKE_RANGE,
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort),
+ my_propagate_complex
+};
+
+
+/*
+ Multi-level, NO PAD
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
+{
+ MY_UCA_COLL_INIT,
+ MY_FUNCTION_NAME(strnncoll_multilevel),
+ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
+ MY_FUNCTION_NAME(strnxfrm_multilevel),
+ my_strnxfrmlen_any_uca_multilevel,
+ MY_LIKE_RANGE,
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort),
+ my_propagate_complex
+};
+
+
+MY_COLLATION_HANDLER_PACKAGE MY_FUNCTION_NAME(package)=
+{
+ &MY_FUNCTION_NAME(collation_handler),
+ &MY_FUNCTION_NAME(collation_handler_nopad),
+ &MY_FUNCTION_NAME(collation_handler_multilevel),
+ &MY_FUNCTION_NAME(collation_handler_nopad_multilevel)
+};
+
+
+#undef MY_FUNCTION_NAME
+#undef MY_MB_WC
+#undef MY_LIKE_RANGE
+#undef MY_UCA_ASCII_OPTIMIZE
+#undef MY_UCA_COMPILE_CONTRACTIONS
+#undef MY_UCA_COLL_INIT
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 7596b7f2168..28e7def3ddf 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -23,6 +23,8 @@
#include <my_sys.h>
#include <stdarg.h>
+#include "ctype-unidata.h"
+
#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
#define HAVE_CHARSET_mb2
@@ -1184,35 +1186,7 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
but the JSON functions needed my_utf16_uni()
so the #ifdef was moved lower.
*/
-
-
-/*
- D800..DB7F - Non-provate surrogate high (896 pages)
- DB80..DBFF - Private surrogate high (128 pages)
- DC00..DFFF - Surrogate low (1024 codes in a page)
-*/
-#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
-#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
-#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
-#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
-
-#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
-#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
-/* Test if a byte is a leading byte of a high or low surrogate head: */
-#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
-/* Test if a Unicode code point is a high or low surrogate head */
-#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
-
-#define MY_UTF16_WC2(a, b) ((a << 8) + b)
-
-/*
- a= 110110?? (<< 18)
- b= ???????? (<< 10)
- c= 110111?? (<< 8)
- d= ???????? (<< 0)
-*/
-#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
- ((c & 3) << 8) + d + 0x10000)
+#include "ctype-utf16.h"
#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0))
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
@@ -1220,10 +1194,17 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
{
my_wc_t wc= MY_UTF16_WC2(b0, b1);
- MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
+ MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf16_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 0
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 my_unicase_default_page00
+#define UNICASE_PAGES my_unicase_default_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
@@ -1261,32 +1242,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
- if (s + 2 > e)
- return MY_CS_TOOSMALL2;
-
- /*
- High bytes: 0xD[89AB] = B'110110??'
- Low bytes: 0xD[CDEF] = B'110111??'
- Surrogate mask: 0xFC = B'11111100'
- */
-
- if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
- {
- if (s + 4 > e)
- return MY_CS_TOOSMALL4;
-
- if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
- return MY_CS_ILSEQ;
-
- *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
- return 4;
- }
-
- if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
- return MY_CS_ILSEQ;
-
- *pwc= MY_UTF16_WC2(s[0], s[1]);
- return 2;
+ return my_mb_wc_utf16_quick(pwc, s, e);
}
@@ -1546,7 +1502,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_ci,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
@@ -1578,7 +1534,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_nopad_ci,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
@@ -1775,6 +1731,13 @@ struct charset_info_st my_charset_utf16_nopad_bin=
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e) (cs->cset->mb_wc(cs, pwc, s, e))
+#define OPTIMIZE_ASCII 0
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 my_unicase_default_page00
+#define UNICASE_PAGES my_unicase_default_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
@@ -1879,7 +1842,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_ci,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
@@ -1911,7 +1874,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_nopad_ci,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
@@ -2109,6 +2072,8 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
#ifdef HAVE_CHARSET_utf32
+#include "ctype-utf32.h"
+
/*
Check is b0 and b1 start a valid UTF32 four-byte sequence.
Don't accept characters greater than U+10FFFF.
@@ -2117,8 +2082,6 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1))
-#define MY_UTF32_WC4(b0,b1,b2,b3) ((((my_wc_t)b0) << 24) + (b1 << 16) + \
- (b2 << 8) + (b3))
static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
uchar b2, uchar b3)
@@ -2126,12 +2089,19 @@ static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
if (wc <= 0xFFFF)
{
- MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
+ MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
return MY_CS_REPLACEMENT_CHARACTER;
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf32_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 0
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 my_unicase_default_page00
+#define UNICASE_PAGES my_unicase_default_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3)
#include "strcoll.ic"
@@ -2161,10 +2131,7 @@ static int
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
- if (s + 4 > e)
- return MY_CS_TOOSMALL4;
- *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
- return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
+ return my_mb_wc_utf32_quick(pwc, s, e);
}
@@ -2698,7 +2665,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_ci,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf32_ci,
@@ -2730,7 +2697,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_nopad_ci,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf32_ci,
@@ -2928,6 +2895,8 @@ struct charset_info_st my_charset_utf32_nopad_bin=
#ifdef HAVE_CHARSET_ucs2
+#include "ctype-ucs2.h"
+
static const uchar ctype_ucs2[] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
@@ -2995,20 +2964,30 @@ static const uchar to_upper_ucs2[] = {
static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
{
my_wc_t wc= UCS2_CODE(b0, b1);
- MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
+ MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
-#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
-#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
+#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 0
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 my_unicase_default_page00
+#define UNICASE_PAGES my_unicase_default_pages
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1)
#include "strcoll.ic"
-#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
-#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
+#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
+#define DEFINE_STRNXFRM_UNICODE_BIN2
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 0
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1)
#include "strcoll.ic"
@@ -3037,11 +3016,7 @@ my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
- if (s+2 > e) /* Need 2 characters */
- return MY_CS_TOOSMALL2;
-
- *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
- return 2;
+ return my_mb_wc_ucs2_quick(pwc, s, e);
}
static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
@@ -3280,7 +3255,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_ci,
- my_strnxfrm_unicode,
+ my_strnxfrm_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
@@ -3296,7 +3271,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_bin,
- my_strnxfrm_unicode,
+ my_strnxfrm_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_bin,
@@ -3312,7 +3287,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_nopad_ci,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
@@ -3328,7 +3303,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_nopad_bin,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_bin,
diff --git a/strings/ctype-ucs2.h b/strings/ctype-ucs2.h
new file mode 100644
index 00000000000..c989324172d
--- /dev/null
+++ b/strings/ctype-ucs2.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UCS2_H
+#define _CTYPE_UCS2_H
+
+
+static inline int
+my_mb_wc_ucs2_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ if (s+2 > e) /* Need 2 characters */
+ return MY_CS_TOOSMALL2;
+ *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
+ return 2;
+}
+
+
+#endif /* _CTYPE_UCS2_H */
diff --git a/strings/ctype-unidata.h b/strings/ctype-unidata.h
new file mode 100644
index 00000000000..6712f5e1d79
--- /dev/null
+++ b/strings/ctype-unidata.h
@@ -0,0 +1,31 @@
+#ifndef CTYPE_UNIDATA_H_INCLUDED
+#define CTYPE_UNIDATA_H_INCLUDED
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#define MY_UNICASE_INFO_DEFAULT_MAXCHAR 0xFFFF
+extern MY_UNICASE_CHARACTER my_unicase_default_page00[256];
+extern MY_UNICASE_CHARACTER *my_unicase_default_pages[256];
+
+size_t my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights);
+size_t my_strxfrm_pad_unicode(uchar *str, uchar *strend);
+
+
+#define PUT_WC_BE2_HAVE_1BYTE(dst, de, wc) \
+ do { *dst++= (uchar) (wc >> 8); if (dst < de) *dst++= (uchar) (wc & 0xFF); } while(0)
+
+#endif /* CTYPE_UNIDATA_H_INCLUDED */
diff --git a/strings/ctype-utf16.h b/strings/ctype-utf16.h
new file mode 100644
index 00000000000..d4cf4664f97
--- /dev/null
+++ b/strings/ctype-utf16.h
@@ -0,0 +1,80 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF16_H
+#define _CTYPE_UTF16_H
+
+/*
+ D800..DB7F - Non-provate surrogate high (896 pages)
+ DB80..DBFF - Private surrogate high (128 pages)
+ DC00..DFFF - Surrogate low (1024 codes in a page)
+*/
+#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
+#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
+#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
+#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
+
+#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
+#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
+/* Test if a byte is a leading byte of a high or low surrogate head: */
+#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
+/* Test if a Unicode code point is a high or low surrogate head */
+#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
+
+#define MY_UTF16_WC2(a, b) ((a << 8) + b)
+
+/*
+ a= 110110?? (<< 18)
+ b= ???????? (<< 10)
+ c= 110111?? (<< 8)
+ d= ???????? (<< 0)
+*/
+#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
+ ((c & 3) << 8) + d + 0x10000)
+
+static inline int
+my_mb_wc_utf16_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ if (s + 2 > e)
+ return MY_CS_TOOSMALL2;
+
+ /*
+ High bytes: 0xD[89AB] = B'110110??'
+ Low bytes: 0xD[CDEF] = B'110111??'
+ Surrogate mask: 0xFC = B'11111100'
+ */
+
+ if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
+ {
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+
+ if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
+ return MY_CS_ILSEQ;
+
+ *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
+ return 4;
+ }
+
+ if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
+ return MY_CS_ILSEQ;
+
+ *pwc= MY_UTF16_WC2(s[0], s[1]);
+ return 2;
+}
+
+#endif /* _CTYPE_UTF16_H */
diff --git a/strings/ctype-utf32.h b/strings/ctype-utf32.h
new file mode 100644
index 00000000000..e295dc6d081
--- /dev/null
+++ b/strings/ctype-utf32.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF32_H
+#define _CTYPE_UTF32_H
+
+#define MY_UTF32_WC4(b0,b1,b2,b3) ((((my_wc_t)b0) << 24) + (b1 << 16) + \
+ (b2 << 8) + (b3))
+
+static inline int
+my_mb_wc_utf32_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+ *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
+ return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
+}
+
+#endif /* _CTYPE_UTF32_H */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 4ef376dccc8..4ddb086b734 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -26,78 +26,10 @@
#define EILSEQ ENOENT
#endif
-/* Detect special bytes and sequences */
-#define IS_CONTINUATION_BYTE(c) (((uchar) (c) ^ 0x80) < 0x40)
-/*
- Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
- Use this macro if the caller already checked b0 for:
- - an MB1 character
- - an unused gap between MB1 and MB2HEAD
-*/
-#define IS_UTF8MB2_STEP2(b0,b1) (((uchar) (b0) < 0xE0) && \
- IS_CONTINUATION_BYTE((uchar) b1))
+#include "ctype-utf8.h"
+#include "ctype-unidata.h"
-/*
- Check MB3 character assuming that b0 is already known to be
- in the valid MB3HEAD range [0xE0..0xEF].
-*/
-#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
- IS_CONTINUATION_BYTE(b2) && \
- ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
-
-/*
- Check MB3 character assuming that b0 is already known to be >= 0xE0,
- but is not checked for the high end 0xF0 yet.
- Use this macro if the caller already checked b0 for:
- - an MB1 character
- - an unused gap between MB1 and MB2HEAD
- - an MB2HEAD
-*/
-#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
- IS_UTF8MB3_STEP2(b0,b1,b2))
-
-/*
- UTF-8 quick four-byte mask:
- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- Encoding allows to encode U+00010000..U+001FFFFF
-
- The maximum character defined in the Unicode standard is U+0010FFFF.
- Higher characters U+00110000..U+001FFFFF are not used.
-
- 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
- 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
-
- Valid codes:
- [F0][90..BF][80..BF][80..BF]
- [F1][80..BF][80..BF][80..BF]
- [F2][80..BF][80..BF][80..BF]
- [F3][80..BF][80..BF][80..BF]
- [F4][80..8F][80..BF][80..BF]
-*/
-
-/*
- Check MB4 character assuming that b0 is already
- known to be in the range [0xF0..0xF4]
-*/
-#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
- IS_CONTINUATION_BYTE(b2) && \
- IS_CONTINUATION_BYTE(b3) && \
- (b0 >= 0xf1 || b1 >= 0x90) && \
- (b0 <= 0xf3 || b1 <= 0x8F))
-#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
- IS_UTF8MB4_STEP2(b0,b1,b2,b3))
-
-/* Convert individual bytes to Unicode code points */
-#define UTF8MB2_CODE(b0,b1) (((my_wc_t) ((uchar) b0 & 0x1f) << 6) |\
- ((my_wc_t) ((uchar) b1 ^ 0x80)))
-#define UTF8MB3_CODE(b0,b1,b2) (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
- ((my_wc_t) ((uchar) b1 ^ 0x80) << 6) |\
- ((my_wc_t) ((uchar) b2 ^ 0x80)))
-#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
- ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
- ((my_wc_t) ((uchar) b2 ^ 0x80) << 6) |\
- (my_wc_t) ((uchar) b3 ^ 0x80))
/* Definitions for strcoll.ic */
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
@@ -180,7 +112,7 @@ int my_valid_mbcharlen_utf8mb3(const uchar *s, const uchar *e)
#include "my_uctype.h"
-static MY_UNICASE_CHARACTER plane00[]={
+MY_UNICASE_CHARACTER my_unicase_default_page00[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
@@ -313,7 +245,7 @@ static MY_UNICASE_CHARACTER plane00[]={
/*
- Almost similar to plane00, but maps sorting order
+ Almost similar to my_unicase_default_page00, but maps sorting order
for U+00DF to 0x00DF instead of 0x0053.
*/
static MY_UNICASE_CHARACTER plane00_mysql500[]={
@@ -1759,9 +1691,10 @@ static MY_UNICASE_CHARACTER planeFF[]={
};
-static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
+MY_UNICASE_CHARACTER *my_unicase_default_pages[256]=
{
- plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL,
+ my_unicase_default_page00,
+ plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F,
@@ -1798,8 +1731,8 @@ static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
MY_UNICASE_INFO my_unicase_default=
{
- 0xFFFF,
- my_unicase_pages_default
+ MY_UNICASE_INFO_DEFAULT_MAXCHAR,
+ my_unicase_default_pages
};
@@ -4646,7 +4579,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs,
@return Result length
*/
-static size_t
+size_t
my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights)
{
uchar *str0;
@@ -4675,7 +4608,7 @@ my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights)
@return Result length
*/
-static size_t
+size_t
my_strxfrm_pad_unicode(uchar *str, uchar *strend)
{
uchar *str0= str;
@@ -4690,95 +4623,6 @@ my_strxfrm_pad_unicode(uchar *str, uchar *strend)
}
-size_t my_strnxfrm_unicode_internal(CHARSET_INFO *cs,
- uchar *dst, uchar *de, uint *nweights,
- const uchar *src, const uchar *se)
-{
- my_wc_t UNINIT_VAR(wc);
- int res;
- uchar *dst0= dst;
- MY_UNICASE_INFO *uni_plane= (cs->state & MY_CS_BINSORT) ?
- NULL : cs->caseinfo;
-
- DBUG_ASSERT(src || !se);
-
- for (; dst < de && *nweights; (*nweights)--)
- {
- if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0)
- break;
- src+= res;
-
- if (uni_plane)
- my_tosort_unicode(uni_plane, &wc, cs->state);
-
- *dst++= (uchar) (wc >> 8);
- if (dst < de)
- *dst++= (uchar) (wc & 0xFF);
- }
- return dst - dst0;
-}
-
-
-/*
- Store sorting weights using 2 bytes per character.
-
- This function is shared between
- - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
- which support BMP only (U+0000..U+FFFF).
- - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
- which map all supplementary characters to weight 0xFFFD.
-*/
-size_t
-my_strnxfrm_unicode(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *dst0= dst;
- uchar *de= dst + dstlen;
- dst+= my_strnxfrm_unicode_internal(cs, dst, de, &nweights,
- src, src + srclen);
- DBUG_ASSERT(dst <= de); /* Safety */
-
- if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
- dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
-
- my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
-
- if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
- dst+= my_strxfrm_pad_unicode(dst, de);
- return dst - dst0;
-}
-
-
-size_t
-my_strnxfrm_unicode_nopad(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *dst0= dst;
- uchar *de= dst + dstlen;
- dst+= my_strnxfrm_unicode_internal(cs, dst, de, &nweights,
- src, src + srclen);
- DBUG_ASSERT(dst <= de); /* Safety */
-
- if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
- {
- size_t len= de - dst;
- set_if_smaller(len, nweights * 2);
- memset(dst, 0x00, len);
- dst+= len;
- }
-
- my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
-
- if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
- {
- memset(dst, 0x00, de - dst);
- dst= de;
- }
- return dst - dst0;
-}
-
/*
For BMP-only collations that use 2 bytes per weight.
*/
@@ -4977,42 +4821,7 @@ static const uchar to_upper_utf8[] = {
static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
- uchar c;
-
- if (s >= e)
- return MY_CS_TOOSMALL;
-
- c= s[0];
- if (c < 0x80)
- {
- *pwc = c;
- return 1;
- }
- else if (c < 0xc2)
- return MY_CS_ILSEQ;
- else if (c < 0xe0)
- {
- if (s+2 > e) /* We need 2 characters */
- return MY_CS_TOOSMALL2;
-
- if (!(IS_CONTINUATION_BYTE(s[1])))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB2_CODE(c, s[1]);
- return 2;
- }
- else if (c < 0xf0)
- {
- if (s+3 > e) /* We need 3 characters */
- return MY_CS_TOOSMALL3;
-
- if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB3_CODE(c, s[1], s[2]);
- return 3;
- }
- return MY_CS_ILSEQ;
+ return my_mb_wc_utf8mb3_quick(pwc, s, e);
}
@@ -5308,7 +5117,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
It represents a single byte character.
Convert it into weight according to collation.
*/
- s_wc= plane00[(uchar) s[0]].tolower;
+ s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
s++;
}
else
@@ -5350,7 +5159,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
if ((uchar) t[0] < 128)
{
/* Convert single byte character into weight */
- t_wc= plane00[(uchar) t[0]].tolower;
+ t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
t++;
}
else
@@ -5413,14 +5222,14 @@ int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
static inline int my_weight_mb1_utf8_general_ci(uchar b)
{
- return (int) plane00[b & 0xFF].sort;
+ return (int) my_unicase_default_page00[b & 0xFF].sort;
}
static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1)
{
my_wc_t wc= UTF8MB2_CODE(b0, b1);
- MY_UNICASE_CHARACTER *page= my_unicase_pages_default[wc >> 8];
+ MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
@@ -5428,16 +5237,23 @@ static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1)
static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2)
{
my_wc_t wc= UTF8MB3_CODE(b0, b1, b2);
- MY_UNICASE_CHARACTER *page= my_unicase_pages_default[wc >> 8];
+ MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
-#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_ci
-#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x)
-#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y)
-#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z)
+#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 1
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 my_unicase_default_page00
+#define UNICASE_PAGES my_unicase_default_pages
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x)
+#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y)
+#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z)
#include "strcoll.ic"
@@ -5473,19 +5289,28 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
}
-#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_mysql500_ci
-#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_mysql500_ci(x)
-#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_mysql500_ci(x,y)
-#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_mysql500_ci(x,y,z)
+#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_general_mysql500_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 1
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 plane00_mysql500
+#define UNICASE_PAGES my_unicase_pages_mysql500
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_mysql500_ci(x)
+#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_mysql500_ci(x,y)
+#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_mysql500_ci(x,y,z)
#include "strcoll.ic"
-#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_bin
-#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB1(x) ((int) (uchar) (x))
-#define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y))
-#define WEIGHT_MB3(x,y,z) ((int) UTF8MB3_CODE(x,y,z))
+#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8_bin
+#define DEFINE_STRNXFRM_UNICODE_BIN2
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 1
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) ((int) (uchar) (x))
+#define WEIGHT_MB2(x,y) ((int) UTF8MB2_CODE(x,y))
+#define WEIGHT_MB3(x,y,z) ((int) UTF8MB3_CODE(x,y,z))
#include "strcoll.ic"
@@ -5534,7 +5359,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_ci,
my_strnncollsp_utf8_general_ci,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_utf8,
@@ -5550,7 +5375,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_mysql500_ci,
my_strnncollsp_utf8_general_mysql500_ci,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf8_general_mysql500_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_utf8,
@@ -5566,7 +5391,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
NULL, /* init */
my_strnncoll_utf8_bin,
my_strnncollsp_utf8_bin,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf8_bin,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_mb_bin,
@@ -5582,7 +5407,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_ci,
my_strnncollsp_utf8_general_nopad_ci,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_utf8,
@@ -5598,7 +5423,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf8_bin,
my_strnncollsp_utf8_nopad_bin,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_utf8_bin,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_mb_bin,
@@ -5927,7 +5752,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler =
NULL, /* init */
my_strnncoll_utf8_cs,
my_strnncollsp_utf8_cs,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_simple,
my_wildcmp_mb,
@@ -7212,13 +7037,30 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* my_well_formed_char_length_filename */
+#define MY_FUNCTION_NAME(x) my_ ## x ## _filename
+#define DEFINE_STRNNCOLL 0
+#define DEFINE_STRNXFRM_UNICODE
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_filename(cs, pwc, s, e)
+#define OPTIMIZE_ASCII 0
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 my_unicase_default_page00
+#define UNICASE_PAGES my_unicase_default_pages
+
+/*
+#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_ci(x)
+#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_ci(x,y)
+#define WEIGHT_MB3(x,y,z) my_weight_mb3_utf8_general_ci(x,y,z)
+*/
+#include "strcoll.ic"
+
static MY_COLLATION_HANDLER my_collation_filename_handler =
{
NULL, /* init */
my_strnncoll_simple,
my_strnncollsp_simple,
- my_strnxfrm_unicode,
+ my_strnxfrm_filename,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_utf8,
@@ -7375,52 +7217,7 @@ static int
my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
- uchar c;
-
- if (s >= e)
- return MY_CS_TOOSMALL;
-
- c= s[0];
- if (c < 0x80)
- {
- *pwc= c;
- return 1;
- }
- else if (c < 0xc2)
- return MY_CS_ILSEQ;
- else if (c < 0xe0)
- {
- if (s + 2 > e) /* We need 2 characters */
- return MY_CS_TOOSMALL2;
-
- if (!(IS_CONTINUATION_BYTE(s[1])))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB2_CODE(c, s[1]);
- return 2;
- }
- else if (c < 0xf0)
- {
- if (s + 3 > e) /* We need 3 characters */
- return MY_CS_TOOSMALL3;
-
- if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB3_CODE(c, s[1], s[2]);
- return 3;
- }
- else if (c < 0xf5)
- {
- if (s + 4 > e) /* We need 4 characters */
- return MY_CS_TOOSMALL4;
-
- if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
- return MY_CS_ILSEQ;
- *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
- return 4;
- }
- return MY_CS_ILSEQ;
+ return my_mb_wc_utf8mb4_quick(pwc, s, e);
}
@@ -7752,7 +7549,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
It represents a single byte character.
Convert it into weight according to collation.
*/
- s_wc= plane00[(uchar) s[0]].tolower;
+ s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
s++;
}
else
@@ -7776,7 +7573,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
if ((uchar) t[0] < 128)
{
/* Convert single byte character into weight */
- t_wc= plane00[(uchar) t[0]].tolower;
+ t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
t++;
}
else
@@ -7847,6 +7644,13 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb4_quick(pwc, s, e)
+#define OPTIMIZE_ASCII 1
+#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0 my_unicase_default_page00
+#define UNICASE_PAGES my_unicase_default_pages
#define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3)
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB1(b0) my_weight_mb1_utf8_general_ci(b0)
@@ -7897,7 +7701,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
NULL, /* init */
my_strnncoll_utf8mb4_general_ci,
my_strnncollsp_utf8mb4_general_ci,
- my_strnxfrm_unicode,
+ my_strnxfrm_utf8mb4_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_utf8mb4,
@@ -7929,7 +7733,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler=
NULL, /* init */
my_strnncoll_utf8mb4_general_ci,
my_strnncollsp_utf8mb4_general_nopad_ci,
- my_strnxfrm_unicode_nopad,
+ my_strnxfrm_nopad_utf8mb4_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
my_wildcmp_utf8mb4,
diff --git a/strings/ctype-utf8.h b/strings/ctype-utf8.h
new file mode 100644
index 00000000000..9a44c1658f2
--- /dev/null
+++ b/strings/ctype-utf8.h
@@ -0,0 +1,190 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF8_H
+#define _CTYPE_UTF8_H
+
+/* Detect special bytes and sequences */
+#define IS_CONTINUATION_BYTE(c) (((uchar) (c) ^ 0x80) < 0x40)
+
+/*
+ Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
+ Use this macro if the caller already checked b0 for:
+ - an MB1 character
+ - an unused gap between MB1 and MB2HEAD
+*/
+#define IS_UTF8MB2_STEP2(b0,b1) (((uchar) (b0) < 0xE0) && \
+ IS_CONTINUATION_BYTE((uchar) b1))
+
+/*
+ Check MB3 character assuming that b0 is already known to be
+ in the valid MB3HEAD range [0xE0..0xEF].
+*/
+#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
+ IS_CONTINUATION_BYTE(b2) && \
+ ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
+
+/*
+ Check MB3 character assuming that b0 is already known to be >= 0xE0,
+ but is not checked for the high end 0xF0 yet.
+ Use this macro if the caller already checked b0 for:
+ - an MB1 character
+ - an unused gap between MB1 and MB2HEAD
+ - an MB2HEAD
+*/
+#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
+ IS_UTF8MB3_STEP2(b0,b1,b2))
+
+/*
+ UTF-8 quick four-byte mask:
+ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ Encoding allows to encode U+00010000..U+001FFFFF
+
+ The maximum character defined in the Unicode standard is U+0010FFFF.
+ Higher characters U+00110000..U+001FFFFF are not used.
+
+ 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
+ 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
+
+ Valid codes:
+ [F0][90..BF][80..BF][80..BF]
+ [F1][80..BF][80..BF][80..BF]
+ [F2][80..BF][80..BF][80..BF]
+ [F3][80..BF][80..BF][80..BF]
+ [F4][80..8F][80..BF][80..BF]
+*/
+
+/*
+ Check MB4 character assuming that b0 is already
+ known to be in the range [0xF0..0xF4]
+*/
+#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
+ IS_CONTINUATION_BYTE(b2) && \
+ IS_CONTINUATION_BYTE(b3) && \
+ (b0 >= 0xf1 || b1 >= 0x90) && \
+ (b0 <= 0xf3 || b1 <= 0x8F))
+#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
+ IS_UTF8MB4_STEP2(b0,b1,b2,b3))
+
+/* Convert individual bytes to Unicode code points */
+#define UTF8MB2_CODE(b0,b1) (((my_wc_t) ((uchar) b0 & 0x1f) << 6) |\
+ ((my_wc_t) ((uchar) b1 ^ 0x80)))
+#define UTF8MB3_CODE(b0,b1,b2) (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
+ ((my_wc_t) ((uchar) b1 ^ 0x80) << 6) |\
+ ((my_wc_t) ((uchar) b2 ^ 0x80)))
+#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
+ ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
+ ((my_wc_t) ((uchar) b2 ^ 0x80) << 6) |\
+ (my_wc_t) ((uchar) b3 ^ 0x80))
+
+static inline int
+my_mb_wc_utf8mb3_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ uchar c;
+
+ if (s >= e)
+ return MY_CS_TOOSMALL;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc = c;
+ return 1;
+ }
+ else if (c < 0xc2)
+ return MY_CS_ILSEQ;
+ else if (c < 0xe0)
+ {
+ if (s+2 > e) /* We need 2 characters */
+ return MY_CS_TOOSMALL2;
+
+ if (!(IS_CONTINUATION_BYTE(s[1])))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB2_CODE(c, s[1]);
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (s+3 > e) /* We need 3 characters */
+ return MY_CS_TOOSMALL3;
+
+ if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB3_CODE(c, s[1], s[2]);
+ return 3;
+ }
+ return MY_CS_ILSEQ;
+}
+
+
+#ifdef HAVE_CHARSET_utf8mb4
+static inline int
+my_mb_wc_utf8mb4_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ uchar c;
+
+ if (s >= e)
+ return MY_CS_TOOSMALL;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc= c;
+ return 1;
+ }
+ else if (c < 0xc2)
+ return MY_CS_ILSEQ;
+ else if (c < 0xe0)
+ {
+ if (s + 2 > e) /* We need 2 characters */
+ return MY_CS_TOOSMALL2;
+
+ if (!(IS_CONTINUATION_BYTE(s[1])))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB2_CODE(c, s[1]);
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (s + 3 > e) /* We need 3 characters */
+ return MY_CS_TOOSMALL3;
+
+ if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB3_CODE(c, s[1], s[2]);
+ return 3;
+ }
+ else if (c < 0xf5)
+ {
+ if (s + 4 > e) /* We need 4 characters */
+ return MY_CS_TOOSMALL4;
+
+ if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
+ return MY_CS_ILSEQ;
+ *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
+ return 4;
+ }
+ return MY_CS_ILSEQ;
+}
+#endif /* HAVE_CHARSET_utf8mb4*/
+
+
+#endif /* _CTYPE_UTF8_H */
diff --git a/strings/json_lib.c b/strings/json_lib.c
index 24c79cb9044..3763ac4ed54 100644
--- a/strings/json_lib.c
+++ b/strings/json_lib.c
@@ -1845,3 +1845,252 @@ int json_path_compare(const json_path_t *a, const json_path_t *b,
return json_path_parts_compare(a->steps+1, a->last_step,
b->steps+1, b->last_step, vt);
}
+
+
+static enum json_types smart_read_value(json_engine_t *je,
+ const char **value, int *value_len)
+{
+ if (json_read_value(je))
+ goto err_return;
+
+ *value= (char *) je->value;
+
+ if (json_value_scalar(je))
+ *value_len= je->value_len;
+ else
+ {
+ if (json_skip_level(je))
+ goto err_return;
+
+ *value_len= (int) ((char *) je->s.c_str - *value);
+ }
+
+ return je->value_type;
+
+err_return:
+ return JSV_BAD_JSON;
+}
+
+
+enum json_types json_type(const char *js, const char *js_end,
+ const char **value, int *value_len)
+{
+ json_engine_t je;
+
+ json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+ (const uchar *) js_end);
+
+ return smart_read_value(&je, value, value_len);
+}
+
+
+enum json_types json_get_array_item(const char *js, const char *js_end,
+ int n_item,
+ const char **value, int *value_len)
+{
+ json_engine_t je;
+ int c_item= 0;
+
+ json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+ (const uchar *) js_end);
+
+ if (json_read_value(&je) ||
+ je.value_type != JSON_VALUE_ARRAY)
+ goto err_return;
+
+ while (!json_scan_next(&je))
+ {
+ switch (je.state)
+ {
+ case JST_VALUE:
+ if (c_item == n_item)
+ return smart_read_value(&je, value, value_len);
+
+ if (json_skip_key(&je))
+ goto err_return;
+
+ c_item++;
+ break;
+
+ case JST_ARRAY_END:
+ *value= (const char *) (je.s.c_str - je.sav_c_len);
+ *value_len= c_item;
+ return JSV_NOTHING;
+ }
+ }
+
+err_return:
+ return JSV_BAD_JSON;
+}
+
+
+/** Simple json lookup for a value by the key.
+
+ Expects JSON object.
+ Only scans the 'first level' of the object, not
+ the nested structures.
+
+ @param js [in] json object to search in
+ @param js_end [in] end of json string
+ @param key [in] key to search for
+ @param key_end [in] - " -
+ @param value_start [out] pointer into js (value or closing })
+ @param value_len [out] length of the value found or number of keys
+
+ @retval the type of the key value
+ @retval JSV_BAD_JSON - syntax error found reading JSON.
+ or not JSON object.
+ @retval JSV_NOTHING - no such key found.
+*/
+enum json_types json_get_object_key(const char *js, const char *js_end,
+ const char *key,
+ const char **value, int *value_len)
+{
+ const char *key_end= key + strlen(key);
+ json_engine_t je;
+ json_string_t key_name;
+ int n_keys= 0;
+
+ json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
+
+ json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+ (const uchar *) js_end);
+
+ if (json_read_value(&je) ||
+ je.value_type != JSON_VALUE_OBJECT)
+ goto err_return;
+
+ while (!json_scan_next(&je))
+ {
+ switch (je.state)
+ {
+ case JST_KEY:
+ n_keys++;
+ json_string_set_str(&key_name, (const uchar *) key,
+ (const uchar *) key_end);
+ if (json_key_matches(&je, &key_name))
+ return smart_read_value(&je, value, value_len);
+
+ if (json_skip_key(&je))
+ goto err_return;
+
+ break;
+
+ case JST_OBJ_END:
+ *value= (const char *) (je.s.c_str - je.sav_c_len);
+ *value_len= n_keys;
+ return JSV_NOTHING;
+ }
+ }
+
+err_return:
+ return JSV_BAD_JSON;
+}
+
+
+enum json_types json_get_object_nkey(const char *js,const char *js_end, int nkey,
+ const char **keyname, const char **keyname_end,
+ const char **value, int *value_len)
+{
+ return JSV_NOTHING;
+}
+
+
+/** Check if json is valid (well-formed)
+
+ @retval 0 - success, json is well-formed
+ @retval 1 - error, json is invalid
+*/
+int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
+{
+ json_engine_t je;
+ json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
+ while (json_scan_next(&je) == 0) /* no-op */ ;
+ return je.s.error == 0;
+}
+
+
+/*
+ Expects the JSON object as an js argument, and the key name.
+ Looks for this key in the object and returns
+ the location of all the text related to it.
+ The text includes the comma, separating this key.
+
+ comma_pos - the hint where the comma is. It is important
+ if you plan to replace the key rather than just cut.
+ 1 - comma is on the left
+ 2 - comma is on the right.
+ 0 - no comma at all (the object has just this single key)
+
+ if no such key found *key_start is set to NULL.
+*/
+int json_locate_key(const char *js, const char *js_end,
+ const char *kname,
+ const char **key_start, const char **key_end,
+ int *comma_pos)
+{
+ const char *kname_end= kname + strlen(kname);
+ json_engine_t je;
+ json_string_t key_name;
+ int t_next, c_len, match_result;
+
+ json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
+
+ json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+ (const uchar *) js_end);
+
+ if (json_read_value(&je) ||
+ je.value_type != JSON_VALUE_OBJECT)
+ goto err_return;
+
+ *key_start= (const char *) je.s.c_str;
+ *comma_pos= 0;
+
+ while (!json_scan_next(&je))
+ {
+ switch (je.state)
+ {
+ case JST_KEY:
+ json_string_set_str(&key_name, (const uchar *) kname,
+ (const uchar *) kname_end);
+ match_result= json_key_matches(&je, &key_name);
+ if (json_skip_key(&je))
+ goto err_return;
+ get_first_nonspace(&je.s, &t_next, &c_len);
+ je.s.c_str-= c_len;
+
+ if (match_result)
+ {
+ *key_end= (const char *) je.s.c_str;
+
+ if (*comma_pos == 1)
+ return 0;
+
+ DBUG_ASSERT(*comma_pos == 0);
+
+ if (t_next == C_COMMA)
+ {
+ *key_end+= c_len;
+ *comma_pos= 2;
+ }
+ else if (t_next == C_RCURB)
+ *comma_pos= 0;
+ else
+ goto err_return;
+ return 0;
+ }
+
+ *key_start= (const char *) je.s.c_str;
+ *comma_pos= 1;
+ break;
+
+ case JST_OBJ_END:
+ *key_start= NULL;
+ return 0;
+ }
+ }
+
+err_return:
+ return 1;
+
+}
diff --git a/strings/strcoll.ic b/strings/strcoll.ic
index c647a5ef57e..9dfccb9018c 100644
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -15,11 +15,18 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-
#ifndef MY_FUNCTION_NAME
#error MY_FUNCTION_NAME is not defined
#endif
+/*
+ Define strnncoll() and strnncollsp() by default,
+ unless "#define DEFINE_STRNNCOLL 0" is specified.
+*/
+#ifndef DEFINE_STRNNCOLL
+#define DEFINE_STRNNCOLL 1
+#endif
+
/*
The weight for automatically padded spaces when comparing strings with
@@ -54,6 +61,8 @@
#endif
+#if DEFINE_STRNNCOLL
+
/**
Scan a valid character, or a bad byte, or an auto-padded space
from a string and calculate the weight of the scanned sequence.
@@ -278,6 +287,8 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
}
#endif
+#endif /* DEFINE_STRNNCOLL */
+
#ifdef DEFINE_STRNXFRM
#ifndef WEIGHT_MB2_FRM
@@ -322,11 +333,261 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
#endif /* DEFINE_STRNXFRM */
+#if defined(DEFINE_STRNXFRM_UNICODE) || defined(DEFINE_STRNXFRM_UNICODE_NOPAD)
+
+/*
+ Store sorting weights using 2 bytes per character.
+
+ This function is shared between
+ - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
+ which support BMP only (U+0000..U+FFFF).
+ - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
+ which map all supplementary characters to weight 0xFFFD.
+*/
+
+#ifndef MY_MB_WC
+#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef OPTIMIZE_ASCII
+#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef UNICASE_MAXCHAR
+#error UNICASE_MAXCHAR must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef UNICASE_PAGE0
+#error UNICASE_PAGE0 must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef UNICASE_PAGES
+#error UNICASE_PAGES must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs,
+ uchar *dst, uchar *de,
+ uint *nweights,
+ const uchar *src, const uchar *se)
+{
+ my_wc_t UNINIT_VAR(wc);
+ uchar *dst0= dst;
+
+ DBUG_ASSERT(src || !se);
+ DBUG_ASSERT((cs->state & MY_CS_LOWER_SORT) == 0);
+ DBUG_ASSERT(0x7F <= UNICASE_MAXCHAR);
+
+ for (; dst < de && *nweights; (*nweights)--)
+ {
+ int res;
+#if OPTIMIZE_ASCII
+ if (src >= se)
+ break;
+ if (src[0] <= 0x7F)
+ {
+ wc= UNICASE_PAGE0[*src++].sort;
+ PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+ continue;
+ }
+#endif
+ if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
+ break;
+ src+= res;
+ if (wc <= UNICASE_MAXCHAR)
+ {
+ MY_UNICASE_CHARACTER *page;
+ if ((page= UNICASE_PAGES[wc >> 8]))
+ wc= page[wc & 0xFF].sort;
+ }
+ else
+ wc= MY_CS_REPLACEMENT_CHARACTER;
+ PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+ }
+ return dst - dst0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *dst0= dst;
+ uchar *de= dst + dstlen;
+ dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+ src, src + srclen);
+ DBUG_ASSERT(dst <= de); /* Safety */
+
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
+
+ my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ dst+= my_strxfrm_pad_unicode(dst, de);
+ return dst - dst0;
+}
+
+
+#ifdef DEFINE_STRNXFRM_UNICODE_NOPAD
+static size_t
+MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen,
+ uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *dst0= dst;
+ uchar *de= dst + dstlen;
+ dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+ src, src + srclen);
+ DBUG_ASSERT(dst <= de); /* Safety */
+
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ {
+ size_t len= de - dst;
+ set_if_smaller(len, nweights * 2);
+ memset(dst, 0x00, len);
+ dst+= len;
+ }
+
+ my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ {
+ memset(dst, 0x00, de - dst);
+ dst= de;
+ }
+ return dst - dst0;
+}
+#endif
+
+#endif /* DEFINE_STRNXFRM_UNICODE || DEFINE_STRNXFRM_UNICODE_NOPAD */
+
+
+
+#ifdef DEFINE_STRNXFRM_UNICODE_BIN2
+
+/*
+ Store sorting weights using 2 bytes per character.
+
+ These functions are shared between
+ - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
+ which support BMP only (U+0000..U+FFFF).
+ - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
+ which map all supplementary characters to weight 0xFFFD.
+*/
+
+#ifndef MY_MB_WC
+#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
+#endif
+
+#ifndef OPTIMIZE_ASCII
+#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
+#endif
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs,
+ uchar *dst, uchar *de,
+ uint *nweights,
+ const uchar *src,
+ const uchar *se)
+{
+ my_wc_t UNINIT_VAR(wc);
+ uchar *dst0= dst;
+
+ DBUG_ASSERT(src || !se);
+
+ for (; dst < de && *nweights; (*nweights)--)
+ {
+ int res;
+#if OPTIMIZE_ASCII
+ if (src >= se)
+ break;
+ if (src[0] <= 0x7F)
+ {
+ wc= *src++;
+ PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+ continue;
+ }
+#endif
+ if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
+ break;
+ src+= res;
+ if (wc > 0xFFFF)
+ wc= MY_CS_REPLACEMENT_CHARACTER;
+ PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+ }
+ return dst - dst0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *dst0= dst;
+ uchar *de= dst + dstlen;
+ dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+ src, src + srclen);
+ DBUG_ASSERT(dst <= de); /* Safety */
+
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
+
+ my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ dst+= my_strxfrm_pad_unicode(dst, de);
+ return dst - dst0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *dst0= dst;
+ uchar *de= dst + dstlen;
+ dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+ src, src + srclen);
+ DBUG_ASSERT(dst <= de); /* Safety */
+
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ {
+ size_t len= de - dst;
+ set_if_smaller(len, nweights * 2);
+ memset(dst, 0x00, len);
+ dst+= len;
+ }
+
+ my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ {
+ memset(dst, 0x00, de - dst);
+ dst= de;
+ }
+ return dst - dst0;
+}
+
+#endif /* DEFINE_STRNXFRM_UNICODE_BIN2 */
+
+
/*
We usually include this file at least two times from the same source file,
for the _ci and the _bin collations. Prepare for the second inclusion.
*/
#undef MY_FUNCTION_NAME
+#undef MY_MB_WC
+#undef OPTIMIZE_ASCII
+#undef UNICASE_MAXCHAR
+#undef UNICASE_PAGE0
+#undef UNICASE_PAGES
#undef WEIGHT_ILSEQ
#undef WEIGHT_MB1
#undef WEIGHT_MB2
@@ -335,4 +596,8 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
#undef WEIGHT_PAD_SPACE
#undef WEIGHT_MB2_FRM
#undef DEFINE_STRNXFRM
+#undef DEFINE_STRNXFRM_UNICODE
+#undef DEFINE_STRNXFRM_UNICODE_NOPAD
+#undef DEFINE_STRNXFRM_UNICODE_BIN2
+#undef DEFINE_STRNNCOLL
#undef DEFINE_STRNNCOLLSP_NOPAD