summaryrefslogtreecommitdiff
path: root/strings
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2018-10-16 19:10:57 +0400
committerAlexander Barkov <bar@mariadb.com>2018-10-17 06:44:40 +0400
commit6eae037c4c76a5746f3954356a5a8b78da49dd1b (patch)
tree826075811261a303042ea598a5349ae691536e00 /strings
parentfee24b1281ab5a306880f19e749da75b8797a61d (diff)
downloadmariadb-git-6eae037c4c76a5746f3954356a5a8b78da49dd1b.tar.gz
MDEV-17474 Change Unicode collation implementation from "handler" to "inline" style
Diffstat (limited to 'strings')
-rw-r--r--strings/ctype-uca.c1263
-rw-r--r--strings/ctype-uca.ic763
-rw-r--r--strings/ctype-ucs2.c74
-rw-r--r--strings/ctype-ucs2.h32
-rw-r--r--strings/ctype-utf16.h80
-rw-r--r--strings/ctype-utf32.h33
-rw-r--r--strings/ctype-utf8.c155
-rw-r--r--strings/ctype-utf8.h190
8 files changed, 1323 insertions, 1267 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 2cb4652dd0f..4c670861a9f 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -31158,17 +31158,6 @@ typedef struct my_uca_scanner_st
CHARSET_INFO *cs;
} my_uca_scanner;
-/*
- Charset dependent scanner part, to optimize
- some character sets.
-*/
-typedef struct my_uca_scanner_handler_st
-{
- void (*init)(my_uca_scanner *scanner, CHARSET_INFO *cs,
- const MY_UCA_WEIGHT_LEVEL *level,
- const uchar *str, size_t length);
- int (*next)(my_uca_scanner *scanner);
-} my_uca_scanner_handler;
static const uint16 nochar[]= {0,0};
@@ -31675,223 +31664,6 @@ my_uca_scanner_init_any(my_uca_scanner *scanner,
scanner->cs= cs;
}
-static int my_uca_scanner_next_any(my_uca_scanner *scanner)
-{
- /*
- Check if the weights for the previous character have been
- already fully scanned. If yes, then get the next character and
- initialize wbeg and wlength to its weight string.
- */
-
- if (scanner->wbeg[0]) /* More weights left from the previous step: */
- return *scanner->wbeg++; /* return the next weight from expansion */
-
- do
- {
- const uint16 *wpage;
- my_wc_t wc[MY_UCA_MAX_CONTRACTION];
- int mblen;
-
- /* Get next character */
- if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, wc,
- scanner->sbeg,
- scanner->send)) <= 0))
- {
- if (scanner->sbeg >= scanner->send)
- return -1; /* No more bytes, end of line reached */
- /*
- There are some more bytes left. Non-positive mb_len means that
- we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
- */
- if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
- {
- /* For safety purposes don't go beyond the string range. */
- scanner->sbeg= scanner->send;
- }
- /*
- Treat every complete or incomplete mbminlen unit as a weight which is
- greater than weight for any possible normal character.
- 0xFFFF is greater than any possible weight in the UCA weight table.
- */
- return 0xFFFF;
- }
-
- scanner->sbeg+= mblen;
- if (wc[0] > scanner->level->maxchar)
- {
- /* Return 0xFFFD as weight for all characters outside BMP */
- scanner->wbeg= nochar;
- return 0xFFFD;
- }
-
- if (my_uca_have_contractions_quick(scanner->level))
- {
- uint16 *cweight;
- /*
- If we have scanned a character which can have previous context,
- and there were some more characters already before,
- then reconstruct codepoint of the previous character
- from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
- together form a real previous context pair.
- Note, we support only 2-character long sequences with previous
- context at the moment. CLDR does not have longer sequences.
- */
- if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
- wc[0]) &&
- scanner->wbeg != nochar && /* if not the very first character */
- my_uca_can_be_previous_context_head(&scanner->level->contractions,
- (wc[1]= ((scanner->page << 8) +
- scanner->code))) &&
- (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
- {
- scanner->page= scanner->code= 0; /* Clear for the next character */
- return *cweight;
- }
- else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
- wc[0]))
- {
- /* Check if w[0] starts a contraction */
- if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
- return *cweight;
- }
- }
-
- /* Process single character */
- scanner->page= wc[0] >> 8;
- scanner->code= wc[0] & 0xFF;
-
- /* If weight page for w[0] does not exist, then calculate algoritmically */
- if (!(wpage= scanner->level->weights[scanner->page]))
- return my_uca_scanner_next_implicit(scanner);
-
- /* Calculate pointer to w[0]'s weight, using page and offset */
- scanner->wbeg= wpage +
- scanner->code * scanner->level->lengths[scanner->page];
- } while (!scanner->wbeg[0]); /* Skip ignorable characters */
-
- return *scanner->wbeg++;
-}
-
-
-static my_uca_scanner_handler my_any_uca_scanner_handler=
-{
- my_uca_scanner_init_any,
- my_uca_scanner_next_any
-};
-
-/*
- Compares two strings according to the collation
-
- SYNOPSIS:
- my_strnncoll_uca()
- cs Character set information
- s First string
- slen First string length
- t Second string
- tlen Seconf string length
- level DUCETweight level
-
- NOTES:
- Initializes two weight scanners and gets weights
- corresponding to two strings in a loop. If weights are not
- the same at some step then returns their difference.
-
- In the while() comparison these situations are possible:
- 1. (s_res>0) and (t_res>0) and (s_res == t_res)
- Weights are the same so far, continue comparison
- 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
- A difference has been found, return.
- 3. (s_res>0) and (t_res<0)
- We have reached the end of the second string, or found
- an illegal multibyte sequence in the second string.
- Return a positive number, i.e. the first string is bigger.
- 4. (s_res<0) and (t_res>0)
- We have reached the end of the first string, or found
- an illegal multibyte sequence in the first string.
- Return a negative number, i.e. the second string is bigger.
- 5. (s_res<0) and (t_res<0)
- Both scanners returned -1. It means we have riched
- the end-of-string of illegal-sequence in both strings
- at the same time. Return 0, strings are equal.
-
- RETURN
- Difference between two strings, according to the collation:
- 0 - means strings are equal
- negative number - means the first string is smaller
- positive number - means the first string is bigger
-*/
-
-static int my_strnncoll_uca_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const MY_UCA_WEIGHT_LEVEL *level,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- my_uca_scanner sscanner;
- my_uca_scanner tscanner;
- int s_res;
- int t_res;
-
- scanner_handler->init(&sscanner, cs, level, s, slen);
- scanner_handler->init(&tscanner, cs, level, t, tlen);
-
- do
- {
- s_res= scanner_handler->next(&sscanner);
- t_res= scanner_handler->next(&tscanner);
- } while ( s_res == t_res && s_res >0);
-
- return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
-}
-
-static int my_strnncoll_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
- s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncoll_uca_multilevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- uint num_level= cs->levels_for_order;
- uint i;
- for (i= 0; i != num_level; i++)
- {
- int ret= my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[i],
- s, slen, t, tlen, t_is_prefix);
- if (ret)
- return ret;
- }
- return 0;
-}
-
-
-static int
-my_strnncollsp_generic_uca_nopad_multilevel(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- uint num_level= cs->levels_for_order;
- uint i;
- for (i= 0; i != num_level; i++)
- {
- int ret= my_strnncoll_uca_onelevel(cs, &my_any_uca_scanner_handler,
- &cs->uca->level[i],
- s, slen, t, tlen, FALSE);
- if (ret)
- return ret;
- }
- return 0;
-}
-
static inline int
my_space_weight(const MY_UCA_WEIGHT_LEVEL *level)
@@ -31924,258 +31696,6 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc)
}
-/*
- Compares two strings according to the collation,
- ignoring trailing spaces.
-
- SYNOPSIS:
- my_strnncollsp_uca()
- cs Character set information
- s First string
- slen First string length
- t Second string
- tlen Seconf string length
- level DUCETweight level
-
- NOTES:
- Works exactly the same with my_strnncoll_uca(),
- but ignores trailing spaces.
-
- In the while() comparison these situations are possible:
- 1. (s_res>0) and (t_res>0) and (s_res == t_res)
- Weights are the same so far, continue comparison
- 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
- A difference has been found, return.
- 3. (s_res>0) and (t_res<0)
- We have reached the end of the second string, or found
- an illegal multibyte sequence in the second string.
- Compare the first string to an infinite array of
- space characters until difference is found, or until
- the end of the first string.
- 4. (s_res<0) and (t_res>0)
- We have reached the end of the first string, or found
- an illegal multibyte sequence in the first string.
- Compare the second string to an infinite array of
- space characters until difference is found or until
- the end of the second steing.
- 5. (s_res<0) and (t_res<0)
- Both scanners returned -1. It means we have riched
- the end-of-string of illegal-sequence in both strings
- at the same time. Return 0, strings are equal.
-
- RETURN
- Difference between two strings, according to the collation:
- 0 - means strings are equal
- negative number - means the first string is smaller
- positive number - means the first string is bigger
-*/
-
-static int my_strnncollsp_uca_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const MY_UCA_WEIGHT_LEVEL *level,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- my_uca_scanner sscanner, tscanner;
- int s_res, t_res;
-
- scanner_handler->init(&sscanner, cs, level, s, slen);
- scanner_handler->init(&tscanner, cs, level, t, tlen);
-
- do
- {
- s_res= scanner_handler->next(&sscanner);
- t_res= scanner_handler->next(&tscanner);
- } while ( s_res == t_res && s_res >0);
-
- if (s_res > 0 && t_res < 0)
- {
- /* Calculate weight for SPACE character */
- t_res= my_space_weight(level);
-
- /* compare the first string to spaces */
- do
- {
- if (s_res != t_res)
- return (s_res - t_res);
- s_res= scanner_handler->next(&sscanner);
- } while (s_res > 0);
- return 0;
- }
-
- if (s_res < 0 && t_res > 0)
- {
- /* Calculate weight for SPACE character */
- s_res= my_space_weight(level);
-
- /* compare the second string to spaces */
- do
- {
- if (s_res != t_res)
- return (s_res - t_res);
- t_res= scanner_handler->next(&tscanner);
- } while (t_res > 0);
- return 0;
- }
-
- return ( s_res - t_res );
-}
-
-static int my_strnncollsp_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
- s, slen, t, tlen);
-}
-
-static int my_strnncollsp_uca_multilevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- uint num_level= cs->levels_for_order;
- uint i;
- for (i= 0; i != num_level; i++)
- {
- int ret= my_strnncollsp_uca_onelevel(cs, scanner_handler,
- &cs->uca->level[i], s, slen, t, tlen);
- if (ret)
- return ret;
- }
- return 0;
-}
-
-/*
- Calculates hash value for the given string,
- according to the collation, and ignoring trailing spaces.
-
- SYNOPSIS:
- my_hash_sort_uca()
- cs Character set information
- s String
- slen String's length
- n1 First hash parameter
- n2 Second hash parameter
-
- NOTES:
- Scans consequently weights and updates
- hash parameters n1 and n2. In a case insensitive collation,
- upper and lower case of the same letter will return the same
- weight sequence, and thus will produce the same hash values
- in n1 and n2.
-
- This functions is used for one-level and for multi-level collations.
- We intentionally use only primary level in multi-level collations.
- This helps to have PARTITION BY KEY put primarily equal records
- into the same partition. E.g. in utf8_thai_520_ci records that differ
- only in tone marks go into the same partition.
-
- RETURN
- N/A
-*/
-
-static void my_hash_sort_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- ulong *nr1, ulong *nr2)
-{
- int s_res;
- my_uca_scanner scanner;
- int space_weight= my_space_weight(&cs->uca->level[0]);
- register ulong m1= *nr1, m2= *nr2;
-
- scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
-
- while ((s_res= scanner_handler->next(&scanner)) >0)
- {
- if (s_res == space_weight)
- {
- /* Combine all spaces to be able to skip end spaces */
- uint count= 0;
- do
- {
- count++;
- if ((s_res= scanner_handler->next(&scanner)) <= 0)
- {
- /* Skip strings at end of string */
- goto end;
- }
- }
- while (s_res == space_weight);
-
- /* Add back that has for the space characters */
- do
- {
- /*
- We can't use MY_HASH_ADD_16() here as we, because of a misstake
- in the original code, where we added the 16 byte variable the
- opposite way. Changing this would cause old partitioned tables
- to fail.
- */
- MY_HASH_ADD(m1, m2, space_weight >> 8);
- MY_HASH_ADD(m1, m2, space_weight & 0xFF);
- }
- while (--count != 0);
-
- }
- /* See comment above why we can't use MY_HASH_ADD_16() */
- MY_HASH_ADD(m1, m2, s_res >> 8);
- MY_HASH_ADD(m1, m2, s_res & 0xFF);
- }
-end:
- *nr1= m1;
- *nr2= m2;
-}
-
-
-static void my_hash_sort_uca_nopad(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- const uchar *s, size_t slen,
- ulong *nr1, ulong *nr2)
-{
- int s_res;
- my_uca_scanner scanner;
- register ulong m1= *nr1, m2= *nr2;
-
- scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
-
- while ((s_res= scanner_handler->next(&scanner)) >0)
- {
- /* See comment above why we can't use MY_HASH_ADD_16() */
- MY_HASH_ADD(m1, m2, s_res >> 8);
- MY_HASH_ADD(m1, m2, s_res & 0xFF);
- }
- *nr1= m1;
- *nr2= m2;
-}
-
-
-static uchar *
-my_strnxfrm_uca_onelevel_internal(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- MY_UCA_WEIGHT_LEVEL *level,
- uchar *dst, uchar *de, uint *nweights,
- const uchar *src, size_t srclen)
-{
- my_uca_scanner scanner;
- int s_res;
-
- DBUG_ASSERT(src || !srclen);
-
- scanner_handler->init(&scanner, cs, level, src, srclen);
- for (; dst < de && *nweights &&
- (s_res= scanner_handler->next(&scanner)) > 0 ; (*nweights)--)
- {
- *dst++= s_res >> 8;
- if (dst < de)
- *dst++= s_res & 0xFF;
- }
- return dst;
-}
-
-
static uchar *
my_strnxfrm_uca_padn(uchar *dst, uchar *de, uint nweights, int weight)
{
@@ -32202,27 +31722,6 @@ my_strnxfrm_uca_pad(uchar *dst, uchar *de, int weight)
}
-static uchar *
-my_strnxfrm_uca_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- MY_UCA_WEIGHT_LEVEL *level,
- uchar *dst, uchar *de, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
-
- dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
- dst, de, &nweights,
- src, srclen);
- DBUG_ASSERT(dst <= de);
- if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
- dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
- DBUG_ASSERT(dst <= de);
- my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
- return dst;
-}
-
-
/*
Return the minimum possible weight on a level.
*/
@@ -32233,136 +31732,6 @@ static uint min_weight_on_level(MY_UCA_WEIGHT_LEVEL *level)
}
-static uchar *
-my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- MY_UCA_WEIGHT_LEVEL *level,
- uchar *dst, uchar *de, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
-
- dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
- dst, de, &nweights,
- src, srclen);
- DBUG_ASSERT(dst <= de);
- /* Pad with the minimum possible weight on this level */
- if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
- dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
- DBUG_ASSERT(dst <= de);
- my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
- return dst;
-}
-
-
-/*
- For the given string creates its "binary image", suitable
- to be used in binary comparison, i.e. in memcmp().
-
- SYNOPSIS:
- my_strnxfrm_uca()
- cs Character set information
- dst Where to write the image
- dstlen Space available for the image, in bytes
- src The source string
- srclen Length of the source string, in bytes
-
- NOTES:
- In a loop, scans weights from the source string and writes
- them into the binary image. In a case insensitive collation,
- upper and lower cases of the same letter will produce the
- same image subsequences. When we have reached the end-of-string
- or found an illegal multibyte sequence, the loop stops.
-
- It is impossible to restore the original string using its
- binary image.
-
- Binary images are used for bulk comparison purposes,
- e.g. in ORDER BY, when it is more efficient to create
- a binary image and use it instead of weight scanner
- for the original strings for every comparison.
-
- RETURN
- Number of bytes that have been written into the binary image.
-*/
-
-
-static size_t
-my_strnxfrm_uca(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
- uchar *de= dst + dstlen;
-
- dst= my_strnxfrm_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
- dst, de, nweights, src, srclen, flags);
- /*
- This can probably be changed to memset(dst, 0, de - dst),
- like my_strnxfrm_uca_multilevel() does.
- */
- if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
- dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
- return dst - d0;
-}
-
-
-static size_t
-my_strnxfrm_uca_nopad(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uchar *d0= dst;
- uchar *de= dst + dstlen;
-
- dst= my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler, &cs->uca->level[0],
- dst, de, nweights, src, srclen, flags);
- if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
- {
- memset(dst, 0, de - dst);
- dst= de;
- }
- return dst - d0;
-}
-
-
-static size_t
-my_strnxfrm_uca_multilevel(CHARSET_INFO *cs,
- my_uca_scanner_handler *scanner_handler,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- uint num_level= cs->levels_for_order;
- uchar *d0= dst;
- uchar *de= dst + dstlen;
- uint current_level;
-
- for (current_level= 0; current_level != num_level; current_level++)
- {
- if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
- (flags & (MY_STRXFRM_LEVEL1 << current_level)))
- dst= cs->state & MY_CS_NOPAD ?
- my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler,
- &cs->uca->level[current_level],
- dst, de, nweights,
- src, srclen, flags) :
- my_strnxfrm_uca_onelevel(cs, scanner_handler,
- &cs->uca->level[current_level],
- dst, de, nweights,
- src, srclen, flags);
- }
-
- if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
- {
- memset(dst, 0, de - dst);
- dst= de;
- }
-
- return dst - d0;
-}
-
/*
This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only
@@ -34248,8 +33617,46 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
}
-MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel;
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel;
+static my_bool
+create_tailoring(struct charset_info_st *cs,
+ MY_CHARSET_LOADER *loader);
+
+static my_bool
+my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
+{
+ cs->pad_char= ' ';
+ cs->ctype= my_charset_utf8_unicode_ci.ctype;
+ if (!cs->caseinfo)
+ cs->caseinfo= &my_unicase_default;
+ return create_tailoring(cs, loader);
+}
+
+
+static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
+{
+ /* UCA uses 2 bytes per weight */
+ return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
+}
+
+static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
+{
+ return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
+}
+
+
+/*
+ Define generic collation handlers for multi-level collations with tailoring:
+
+ my_uca_collation_handler_nopad_multilevel_generic
+ my_uca_collation_handler_multilevel_generic
+
+ TODO: Use faster character-set specific versions of MY_COLLATION_HANDLER
+ instead of generic.
+*/
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _generic
+#define MY_MB_WC(scanner, wc, beg, end) (scanner->cs->cset->mb_wc(scanner->cs, wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#include "ctype-uca.ic"
/*
@@ -34334,8 +33741,8 @@ create_tailoring(struct charset_info_st *cs,
cs->uca[0]= new_uca;
if (cs->levels_for_order > 1)
cs->coll= (cs->state & MY_CS_NOPAD) ?
- &my_collation_generic_uca_nopad_handler_multilevel :
- &my_collation_any_uca_handler_multilevel;
+ &my_uca_collation_handler_nopad_multilevel_generic :
+ &my_uca_collation_handler_multilevel_generic;
ex:
(loader->free)(rules.rule);
@@ -34344,235 +33751,14 @@ ex:
return rc;
}
-/*
- Universal CHARSET_INFO compatible wrappers
- for the above internal functions.
- Should work for any character set.
-*/
-
-static my_bool
-my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
-{
- cs->pad_char= ' ';
- cs->ctype= my_charset_utf8_unicode_ci.ctype;
- if (!cs->caseinfo)
- cs->caseinfo= &my_unicase_default;
- return create_tailoring(cs, loader);
-}
-
-
-static int my_strnncoll_any_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncoll_any_uca_multilevel(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca_multilevel(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncollsp_any_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
-}
-
-
-static int my_strnncollsp_generic_uca_nopad(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, FALSE);
-}
-
-
-static int my_strnncollsp_any_uca_multilevel(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca_multilevel(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen);
-}
-
-static void my_hash_sort_any_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- ulong *n1, ulong *n2)
-{
- my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static void my_hash_sort_generic_uca_nopad(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- ulong *n1, ulong *n2)
-{
- my_hash_sort_uca_nopad(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static size_t my_strnxfrm_any_uca(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen, flags);
-}
-
-static size_t my_strnxfrm_generic_uca_nopad(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen,
- uint nweights,
- const uchar *src, size_t srclen,
- uint flags)
-{
- return my_strnxfrm_uca_nopad(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen, flags);
-}
-
-static size_t my_strnxfrm_any_uca_multilevel(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen,
- uint nweights, const uchar *src,
- size_t srclen, uint flags)
-{
- return my_strnxfrm_uca_multilevel(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen,
- flags);
-}
-
-static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
-{
- /* UCA uses 2 bytes per weight */
- return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
-}
-
-static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
-{
- return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
-}
-
-
-/* NO PAD handler for character sets with mbminlen==1 */
-MY_COLLATION_HANDLER my_collation_mb_uca_nopad_handler =
-{
- my_coll_init_uca,
- my_strnncoll_any_uca,
- my_strnncollsp_generic_uca_nopad,
- my_strnxfrm_generic_uca_nopad,
- my_strnxfrmlen_any_uca,
- my_like_range_mb,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_generic_uca_nopad,
- my_propagate_complex
-};
-
-
-/* NO PAD handler for character sets with mbminlen>=1 */
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler =
-{
- my_coll_init_uca,
- my_strnncoll_any_uca,
- my_strnncollsp_generic_uca_nopad,
- my_strnxfrm_generic_uca_nopad,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_generic_uca_nopad,
- my_propagate_complex
-};
-
-
-MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
-{
- my_coll_init_uca,
- my_strnncoll_any_uca_multilevel,
- my_strnncollsp_any_uca_multilevel,
- my_strnxfrm_any_uca_multilevel,
- my_strnxfrmlen_any_uca_multilevel,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
-
-
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel =
-{
- my_coll_init_uca,
- my_strnncoll_any_uca_multilevel,
- my_strnncollsp_generic_uca_nopad_multilevel,
- my_strnxfrm_any_uca_multilevel,
- my_strnxfrmlen_any_uca_multilevel,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_generic_uca_nopad,
- my_propagate_complex
-};
-
#ifdef HAVE_CHARSET_ucs2
-/*
- UCS2 optimized CHARSET_INFO compatible wrappers.
-*/
-static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen,
- my_bool t_is_prefix)
-{
- return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
- s, slen, t, tlen, t_is_prefix);
-}
-static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- const uchar *t, size_t tlen)
-{
- return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
-}
-
-static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs,
- const uchar *s, size_t slen,
- ulong *n1, ulong *n2)
-{
- my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static size_t my_strnxfrm_ucs2_uca(CHARSET_INFO *cs,
- uchar *dst, size_t dstlen, uint nweights,
- const uchar *src, size_t srclen, uint flags)
-{
- return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
- dst, dstlen, nweights, src, srclen, flags);
-}
-
-MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
-{
- my_coll_init_uca, /* init */
- my_strnncoll_ucs2_uca,
- my_strnncollsp_ucs2_uca,
- my_strnxfrm_ucs2_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_ucs2_uca,
- my_propagate_complex
-};
+#include "ctype-ucs2.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _ucs2
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_ucs2_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#include "ctype-uca.ic"
#define MY_CS_UCS2_UCA_FLAGS (MY_CS_COMMON_UCA_FLAGS|MY_CS_NONASCII)
@@ -34607,7 +33793,7 @@ struct charset_info_st my_charset_ucs2_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
@@ -34639,7 +33825,7 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_latvian_uca_ci=
@@ -34671,7 +33857,7 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_romanian_uca_ci=
@@ -34703,7 +33889,7 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
@@ -34735,7 +33921,7 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_polish_uca_ci=
@@ -34767,7 +33953,7 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_estonian_uca_ci=
@@ -34799,7 +33985,7 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_spanish_uca_ci=
@@ -34831,7 +34017,7 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_swedish_uca_ci=
@@ -34863,7 +34049,7 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_turkish_uca_ci=
@@ -34895,7 +34081,7 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_czech_uca_ci=
@@ -34927,7 +34113,7 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -34960,7 +34146,7 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
@@ -34992,7 +34178,7 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_slovak_uca_ci=
@@ -35024,7 +34210,7 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
@@ -35056,7 +34242,7 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35089,7 +34275,7 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35122,7 +34308,7 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35155,7 +34341,7 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35188,7 +34374,7 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
@@ -35220,7 +34406,7 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35254,7 +34440,7 @@ struct charset_info_st my_charset_ucs2_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
@@ -35286,7 +34472,7 @@ struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35319,7 +34505,7 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35352,7 +34538,7 @@ struct charset_info_st my_charset_ucs2_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35385,7 +34571,7 @@ struct charset_info_st my_charset_ucs2_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_ucs2
};
struct charset_info_st my_charset_ucs2_unicode_520_ci=
@@ -35417,7 +34603,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35450,7 +34636,7 @@ struct charset_info_st my_charset_ucs2_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_ucs2_uca_handler
+ &my_uca_collation_handler_ucs2
};
@@ -35483,7 +34669,7 @@ struct charset_info_st my_charset_ucs2_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_ucs2
};
@@ -35516,7 +34702,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_ucs2
};
@@ -35524,20 +34710,12 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf8
-MY_COLLATION_HANDLER my_collation_any_uca_handler =
-{
- my_coll_init_uca, /* init */
- my_strnncoll_any_uca,
- my_strnncollsp_any_uca,
- my_strnxfrm_any_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_mb,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
+
+#include "ctype-utf8.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb3
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#include "ctype-uca.ic"
/*
@@ -35600,7 +34778,7 @@ struct charset_info_st my_charset_utf8_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -35633,7 +34811,7 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_latvian_uca_ci=
@@ -35665,7 +34843,7 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_romanian_uca_ci=
@@ -35697,7 +34875,7 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_slovenian_uca_ci=
@@ -35729,7 +34907,7 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_polish_uca_ci=
@@ -35761,7 +34939,7 @@ struct charset_info_st my_charset_utf8_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_estonian_uca_ci=
@@ -35793,7 +34971,7 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_spanish_uca_ci=
@@ -35825,7 +35003,7 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_swedish_uca_ci=
@@ -35857,7 +35035,7 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_turkish_uca_ci=
@@ -35889,7 +35067,7 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_czech_uca_ci=
@@ -35921,7 +35099,7 @@ struct charset_info_st my_charset_utf8_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -35954,7 +35132,7 @@ struct charset_info_st my_charset_utf8_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
@@ -35986,7 +35164,7 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_slovak_uca_ci=
@@ -36018,7 +35196,7 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_spanish2_uca_ci=
@@ -36050,7 +35228,7 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_roman_uca_ci=
@@ -36082,7 +35260,7 @@ struct charset_info_st my_charset_utf8_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_persian_uca_ci=
@@ -36114,7 +35292,7 @@ struct charset_info_st my_charset_utf8_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_esperanto_uca_ci=
@@ -36146,7 +35324,7 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_hungarian_uca_ci=
@@ -36178,7 +35356,7 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_sinhala_uca_ci=
@@ -36210,7 +35388,7 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36243,7 +35421,7 @@ struct charset_info_st my_charset_utf8_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
@@ -36275,7 +35453,7 @@ struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36308,7 +35486,7 @@ struct charset_info_st my_charset_utf8_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36341,7 +35519,7 @@ struct charset_info_st my_charset_utf8_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36374,7 +35552,7 @@ struct charset_info_st my_charset_utf8_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_thai_520_w2=
@@ -36406,7 +35584,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf8mb3
};
struct charset_info_st my_charset_utf8_vietnamese_ci=
@@ -36438,7 +35616,7 @@ struct charset_info_st my_charset_utf8_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb3
};
@@ -36471,7 +35649,7 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb3
};
@@ -36504,7 +35682,7 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb3
};
#endif /* HAVE_CHARSET_utf8 */
@@ -36512,6 +35690,12 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf8mb4
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb4
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#include "ctype-uca.ic"
+
+
extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler;
#define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMMON_UCA_FLAGS|MY_CS_UNICODE_SUPPLEMENT)
@@ -36546,7 +35730,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -36579,7 +35763,7 @@ struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
@@ -36611,7 +35795,7 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
@@ -36643,7 +35827,7 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
@@ -36675,7 +35859,7 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
@@ -36707,7 +35891,7 @@ struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
@@ -36739,7 +35923,7 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
@@ -36771,7 +35955,7 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
@@ -36803,7 +35987,7 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
@@ -36835,7 +36019,7 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
@@ -36867,7 +36051,7 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -36900,7 +36084,7 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
@@ -36932,7 +36116,7 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
@@ -36964,7 +36148,7 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
@@ -36996,7 +36180,7 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
@@ -37028,7 +36212,7 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
@@ -37060,7 +36244,7 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
@@ -37092,7 +36276,7 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
@@ -37124,7 +36308,7 @@ struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
@@ -37156,7 +36340,7 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
@@ -37188,7 +36372,7 @@ struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
@@ -37220,7 +36404,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37253,7 +36437,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37286,7 +36470,7 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_thai_520_w2=
@@ -37318,7 +36502,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
@@ -37350,7 +36534,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37383,7 +36567,7 @@ struct charset_info_st my_charset_utf8mb4_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_any_uca_handler
+ &my_uca_collation_handler_utf8mb4
};
@@ -37416,7 +36600,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb4
};
@@ -37449,7 +36633,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
- &my_collation_mb_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf8mb4
};
@@ -37458,20 +36642,11 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf32
-MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
-{
- my_coll_init_uca, /* init */
- my_strnncoll_any_uca,
- my_strnncollsp_any_uca,
- my_strnxfrm_any_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
+#include "ctype-utf32.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf32
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf32_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#include "ctype-uca.ic"
extern MY_CHARSET_HANDLER my_charset_utf32_handler;
@@ -37508,7 +36683,7 @@ struct charset_info_st my_charset_utf32_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -37541,7 +36716,7 @@ struct charset_info_st my_charset_utf32_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_latvian_uca_ci=
@@ -37573,7 +36748,7 @@ struct charset_info_st my_charset_utf32_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_romanian_uca_ci=
@@ -37605,7 +36780,7 @@ struct charset_info_st my_charset_utf32_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_slovenian_uca_ci=
@@ -37637,7 +36812,7 @@ struct charset_info_st my_charset_utf32_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_polish_uca_ci=
@@ -37669,7 +36844,7 @@ struct charset_info_st my_charset_utf32_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_estonian_uca_ci=
@@ -37701,7 +36876,7 @@ struct charset_info_st my_charset_utf32_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_spanish_uca_ci=
@@ -37733,7 +36908,7 @@ struct charset_info_st my_charset_utf32_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_swedish_uca_ci=
@@ -37765,7 +36940,7 @@ struct charset_info_st my_charset_utf32_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_turkish_uca_ci=
@@ -37797,7 +36972,7 @@ struct charset_info_st my_charset_utf32_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_czech_uca_ci=
@@ -37829,7 +37004,7 @@ struct charset_info_st my_charset_utf32_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -37862,7 +37037,7 @@ struct charset_info_st my_charset_utf32_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
@@ -37894,7 +37069,7 @@ struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_slovak_uca_ci=
@@ -37926,7 +37101,7 @@ struct charset_info_st my_charset_utf32_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_spanish2_uca_ci=
@@ -37958,7 +37133,7 @@ struct charset_info_st my_charset_utf32_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_roman_uca_ci=
@@ -37990,7 +37165,7 @@ struct charset_info_st my_charset_utf32_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_persian_uca_ci=
@@ -38022,7 +37197,7 @@ struct charset_info_st my_charset_utf32_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_esperanto_uca_ci=
@@ -38054,7 +37229,7 @@ struct charset_info_st my_charset_utf32_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_hungarian_uca_ci=
@@ -38086,7 +37261,7 @@ struct charset_info_st my_charset_utf32_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_sinhala_uca_ci=
@@ -38118,7 +37293,7 @@ struct charset_info_st my_charset_utf32_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_german2_uca_ci=
@@ -38150,7 +37325,7 @@ struct charset_info_st my_charset_utf32_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
@@ -38182,7 +37357,7 @@ struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_croatian_uca_ci=
@@ -38214,7 +37389,7 @@ struct charset_info_st my_charset_utf32_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38247,7 +37422,7 @@ struct charset_info_st my_charset_utf32_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38280,7 +37455,7 @@ struct charset_info_st my_charset_utf32_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf32
};
@@ -38313,7 +37488,7 @@ struct charset_info_st my_charset_utf32_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38346,7 +37521,7 @@ struct charset_info_st my_charset_utf32_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_utf32_uca_handler
+ &my_uca_collation_handler_utf32
};
@@ -38379,7 +37554,7 @@ struct charset_info_st my_charset_utf32_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf32
};
@@ -38412,7 +37587,7 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf32
};
@@ -38422,21 +37597,11 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf16
-
-MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
-{
- my_coll_init_uca, /* init */
- my_strnncoll_any_uca,
- my_strnncollsp_any_uca,
- my_strnxfrm_any_uca,
- my_strnxfrmlen_any_uca,
- my_like_range_generic,
- my_wildcmp_uca,
- NULL,
- my_instr_mb,
- my_hash_sort_any_uca,
- my_propagate_complex
-};
+#include "ctype-utf16.h"
+#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf16
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf16_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#include "ctype-uca.ic"
extern MY_CHARSET_HANDLER my_charset_utf16_handler;
@@ -38473,7 +37638,7 @@ struct charset_info_st my_charset_utf16_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -38506,7 +37671,7 @@ struct charset_info_st my_charset_utf16_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_latvian_uca_ci=
@@ -38538,7 +37703,7 @@ struct charset_info_st my_charset_utf16_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_romanian_uca_ci=
@@ -38570,7 +37735,7 @@ struct charset_info_st my_charset_utf16_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_slovenian_uca_ci=
@@ -38602,7 +37767,7 @@ struct charset_info_st my_charset_utf16_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_polish_uca_ci=
@@ -38634,7 +37799,7 @@ struct charset_info_st my_charset_utf16_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_estonian_uca_ci=
@@ -38666,7 +37831,7 @@ struct charset_info_st my_charset_utf16_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_spanish_uca_ci=
@@ -38698,7 +37863,7 @@ struct charset_info_st my_charset_utf16_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_swedish_uca_ci=
@@ -38730,7 +37895,7 @@ struct charset_info_st my_charset_utf16_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_turkish_uca_ci=
@@ -38762,7 +37927,7 @@ struct charset_info_st my_charset_utf16_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_czech_uca_ci=
@@ -38794,7 +37959,7 @@ struct charset_info_st my_charset_utf16_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -38827,7 +37992,7 @@ struct charset_info_st my_charset_utf16_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
@@ -38859,7 +38024,7 @@ struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_slovak_uca_ci=
@@ -38891,7 +38056,7 @@ struct charset_info_st my_charset_utf16_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_spanish2_uca_ci=
@@ -38923,7 +38088,7 @@ struct charset_info_st my_charset_utf16_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_roman_uca_ci=
@@ -38955,7 +38120,7 @@ struct charset_info_st my_charset_utf16_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_persian_uca_ci=
@@ -38987,7 +38152,7 @@ struct charset_info_st my_charset_utf16_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_esperanto_uca_ci=
@@ -39019,7 +38184,7 @@ struct charset_info_st my_charset_utf16_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_hungarian_uca_ci=
@@ -39051,7 +38216,7 @@ struct charset_info_st my_charset_utf16_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_sinhala_uca_ci=
@@ -39083,7 +38248,7 @@ struct charset_info_st my_charset_utf16_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_german2_uca_ci=
@@ -39115,7 +38280,7 @@ struct charset_info_st my_charset_utf16_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39148,7 +38313,7 @@ struct charset_info_st my_charset_utf16_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39181,7 +38346,7 @@ struct charset_info_st my_charset_utf16_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39214,7 +38379,7 @@ struct charset_info_st my_charset_utf16_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39247,7 +38412,7 @@ struct charset_info_st my_charset_utf16_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_any_uca_handler_multilevel
+ &my_uca_collation_handler_multilevel_utf16
};
@@ -39280,7 +38445,7 @@ struct charset_info_st my_charset_utf16_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39313,7 +38478,7 @@ struct charset_info_st my_charset_utf16_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_utf16_uca_handler
+ &my_uca_collation_handler_utf16
};
@@ -39346,7 +38511,7 @@ struct charset_info_st my_charset_utf16_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf16
};
@@ -39379,7 +38544,7 @@ struct charset_info_st my_charset_utf16_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
- &my_collation_generic_uca_nopad_handler
+ &my_uca_collation_handler_nopad_utf16
};
diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic
new file mode 100644
index 00000000000..7b2ca3447dd
--- /dev/null
+++ b/strings/ctype-uca.ic
@@ -0,0 +1,763 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#ifndef MY_FUNCTION_NAME
+#error MY_FUNCTION_NAME is not defined
+#endif
+#ifndef MY_MB_WC
+#error MY_MB_WC is not defined
+#endif
+#ifndef MY_LIKE_RANGE
+#error MY_LIKE_RANGE is not defined
+#endif
+
+
+static inline int
+MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
+{
+ /*
+ Check if the weights for the previous character have been
+ already fully scanned. If yes, then get the next character and
+ initialize wbeg and wlength to its weight string.
+ */
+
+ if (scanner->wbeg[0]) /* More weights left from the previous step: */
+ return *scanner->wbeg++; /* return the next weight from expansion */
+
+ do
+ {
+ const uint16 *wpage;
+ my_wc_t wc[MY_UCA_MAX_CONTRACTION];
+ int mblen;
+
+ /* Get next character */
+ if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
+ scanner->send)) <= 0))
+ {
+ if (scanner->sbeg >= scanner->send)
+ return -1; /* No more bytes, end of line reached */
+ /*
+ There are some more bytes left. Non-positive mb_len means that
+ we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
+ */
+ if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
+ {
+ /* For safety purposes don't go beyond the string range. */
+ scanner->sbeg= scanner->send;
+ }
+ /*
+ Treat every complete or incomplete mbminlen unit as a weight which is
+ greater than weight for any possible normal character.
+ 0xFFFF is greater than any possible weight in the UCA weight table.
+ */
+ return 0xFFFF;
+ }
+
+ scanner->sbeg+= mblen;
+ if (wc[0] > scanner->level->maxchar)
+ {
+ /* Return 0xFFFD as weight for all characters outside BMP */
+ scanner->wbeg= nochar;
+ return 0xFFFD;
+ }
+
+ if (my_uca_have_contractions_quick(scanner->level))
+ {
+ uint16 *cweight;
+ /*
+ If we have scanned a character which can have previous context,
+ and there were some more characters already before,
+ then reconstruct codepoint of the previous character
+ from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
+ together form a real previous context pair.
+ Note, we support only 2-character long sequences with previous
+ context at the moment. CLDR does not have longer sequences.
+ */
+ if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
+ wc[0]) &&
+ scanner->wbeg != nochar && /* if not the very first character */
+ my_uca_can_be_previous_context_head(&scanner->level->contractions,
+ (wc[1]= ((scanner->page << 8) +
+ scanner->code))) &&
+ (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
+ {
+ scanner->page= scanner->code= 0; /* Clear for the next character */
+ return *cweight;
+ }
+ else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
+ wc[0]))
+ {
+ /* Check if w[0] starts a contraction */
+ if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
+ return *cweight;
+ }
+ }
+
+ /* Process single character */
+ scanner->page= wc[0] >> 8;
+ scanner->code= wc[0] & 0xFF;
+
+ /* If weight page for w[0] does not exist, then calculate algoritmically */
+ if (!(wpage= scanner->level->weights[scanner->page]))
+ return my_uca_scanner_next_implicit(scanner);
+
+ /* Calculate pointer to w[0]'s weight, using page and offset */
+ scanner->wbeg= wpage +
+ scanner->code * scanner->level->lengths[scanner->page];
+ } while (!scanner->wbeg[0]); /* Skip ignorable characters */
+
+ return *scanner->wbeg++;
+}
+
+
+
+/*
+ Compares two strings according to the collation
+
+ SYNOPSIS:
+ strnncoll_onelevel()
+ cs Character set information
+ level Weight level (0 primary, 1 secondary, 2 tertiary, etc)
+ s First string
+ slen First string length
+ t Second string
+ tlen Seconf string length
+ level DUCETweight level
+
+ NOTES:
+ Initializes two weight scanners and gets weights
+ corresponding to two strings in a loop. If weights are not
+ the same at some step then returns their difference.
+
+ In the while() comparison these situations are possible:
+ 1. (s_res>0) and (t_res>0) and (s_res == t_res)
+ Weights are the same so far, continue comparison
+ 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
+ A difference has been found, return.
+ 3. (s_res>0) and (t_res<0)
+ We have reached the end of the second string, or found
+ an illegal multibyte sequence in the second string.
+ Return a positive number, i.e. the first string is bigger.
+ 4. (s_res<0) and (t_res>0)
+ We have reached the end of the first string, or found
+ an illegal multibyte sequence in the first string.
+ Return a negative number, i.e. the second string is bigger.
+ 5. (s_res<0) and (t_res<0)
+ Both scanners returned -1. It means we have riched
+ the end-of-string of illegal-sequence in both strings
+ at the same time. Return 0, strings are equal.
+
+ RETURN
+ Difference between two strings, according to the collation:
+ 0 - means strings are equal
+ negative number - means the first string is smaller
+ positive number - means the first string is bigger
+*/
+
+static int
+MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ my_uca_scanner sscanner;
+ my_uca_scanner tscanner;
+ int s_res;
+ int t_res;
+
+ my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+ my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+
+ do
+ {
+ s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+ t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+ } while ( s_res == t_res && s_res >0);
+
+ return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
+}
+
+
+/*
+ One-level, PAD SPACE.
+*/
+static int
+MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
+ s, slen, t, tlen, t_is_prefix);
+}
+
+
+/*
+ Multi-level, PAD SPACE.
+*/
+static int
+MY_FUNCTION_NAME(strnncoll_multilevel)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen,
+ my_bool t_is_prefix)
+{
+ uint i, num_level= cs->levels_for_order;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
+ s, slen, t, tlen,
+ t_is_prefix);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+
+/*
+ Compares two strings according to the collation,
+ ignoring trailing spaces.
+
+ SYNOPSIS:
+ strnncollsp_onelevel()
+ cs Character set information
+ level UCA weight level
+ s First string
+ slen First string length
+ t Second string
+ tlen Seconf string length
+ level DUCETweight level
+
+ NOTES:
+ Works exactly the same with my_strnncoll_uca(),
+ but ignores trailing spaces.
+
+ In the while() comparison these situations are possible:
+ 1. (s_res>0) and (t_res>0) and (s_res == t_res)
+ Weights are the same so far, continue comparison
+ 2. (s_res>0) and (t_res>0) and (s_res!=t_res)
+ A difference has been found, return.
+ 3. (s_res>0) and (t_res<0)
+ We have reached the end of the second string, or found
+ an illegal multibyte sequence in the second string.
+ Compare the first string to an infinite array of
+ space characters until difference is found, or until
+ the end of the first string.
+ 4. (s_res<0) and (t_res>0)
+ We have reached the end of the first string, or found
+ an illegal multibyte sequence in the first string.
+ Compare the second string to an infinite array of
+ space characters until difference is found or until
+ the end of the second steing.
+ 5. (s_res<0) and (t_res<0)
+ Both scanners returned -1. It means we have riched
+ the end-of-string of illegal-sequence in both strings
+ at the same time. Return 0, strings are equal.
+
+ RETURN
+ Difference between two strings, according to the collation:
+ 0 - means strings are equal
+ negative number - means the first string is smaller
+ positive number - means the first string is bigger
+*/
+
+static int
+MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ my_uca_scanner sscanner, tscanner;
+ int s_res, t_res;
+
+ my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+ my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+
+ do
+ {
+ s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+ t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+ } while ( s_res == t_res && s_res >0);
+
+ if (s_res > 0 && t_res < 0)
+ {
+ /* Calculate weight for SPACE character */
+ t_res= my_space_weight(level);
+
+ /* compare the first string to spaces */
+ do
+ {
+ if (s_res != t_res)
+ return (s_res - t_res);
+ s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+ } while (s_res > 0);
+ return 0;
+ }
+
+ if (s_res < 0 && t_res > 0)
+ {
+ /* Calculate weight for SPACE character */
+ s_res= my_space_weight(level);
+
+ /* compare the second string to spaces */
+ do
+ {
+ if (s_res != t_res)
+ return (s_res - t_res);
+ t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+ } while (t_res > 0);
+ return 0;
+ }
+
+ return ( s_res - t_res );
+}
+
+
+/*
+ One-level, PAD SPACE
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ return MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[0],
+ s, slen, t, tlen);
+}
+
+
+/*
+ One-level, NO PAD
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nopad)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
+ s, slen, t, tlen, FALSE);
+}
+
+
+/*
+ Multi-level, PAD SPACE
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_multilevel)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+
+ uint i, num_level= cs->levels_for_order;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[i],
+ s, slen, t, tlen);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+
+/*
+ Multi-level, NO PAD
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ uint num_level= cs->levels_for_order;
+ uint i;
+ for (i= 0; i != num_level; i++)
+ {
+ int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
+ s, slen, t, tlen, FALSE);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+
+
+/*
+ Calculates hash value for the given string,
+ according to the collation, and ignoring trailing spaces.
+
+ SYNOPSIS:
+ hash_sort()
+ cs Character set information
+ s String
+ slen String's length
+ n1 First hash parameter
+ n2 Second hash parameter
+
+ NOTES:
+ Scans consequently weights and updates
+ hash parameters n1 and n2. In a case insensitive collation,
+ upper and lower case of the same letter will return the same
+ weight sequence, and thus will produce the same hash values
+ in n1 and n2.
+
+ This functions is used for one-level and for multi-level collations.
+ We intentionally use only primary level in multi-level collations.
+ This helps to have PARTITION BY KEY put primarily equal records
+ into the same partition. E.g. in utf8_thai_520_ci records that differ
+ only in tone marks go into the same partition.
+
+ RETURN
+ N/A
+*/
+
+static void
+MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ ulong *nr1, ulong *nr2)
+{
+ int s_res;
+ my_uca_scanner scanner;
+ int space_weight= my_space_weight(&cs->uca->level[0]);
+ register ulong m1= *nr1, m2= *nr2;
+
+ my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
+
+ while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
+ {
+ if (s_res == space_weight)
+ {
+ /* Combine all spaces to be able to skip end spaces */
+ uint count= 0;
+ do
+ {
+ count++;
+ if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0)
+ {
+ /* Skip strings at end of string */
+ goto end;
+ }
+ }
+ while (s_res == space_weight);
+
+ /* Add back that has for the space characters */
+ do
+ {
+ /*
+ We can't use MY_HASH_ADD_16() here as we, because of a misstake
+ in the original code, where we added the 16 byte variable the
+ opposite way. Changing this would cause old partitioned tables
+ to fail.
+ */
+ MY_HASH_ADD(m1, m2, space_weight >> 8);
+ MY_HASH_ADD(m1, m2, space_weight & 0xFF);
+ }
+ while (--count != 0);
+
+ }
+ /* See comment above why we can't use MY_HASH_ADD_16() */
+ MY_HASH_ADD(m1, m2, s_res >> 8);
+ MY_HASH_ADD(m1, m2, s_res & 0xFF);
+ }
+end:
+ *nr1= m1;
+ *nr2= m2;
+}
+
+
+static void
+MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
+ const uchar *s, size_t slen,
+ ulong *nr1, ulong *nr2)
+{
+ int s_res;
+ my_uca_scanner scanner;
+ register ulong m1= *nr1, m2= *nr2;
+
+ my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
+
+ while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
+ {
+ /* See comment above why we can't use MY_HASH_ADD_16() */
+ MY_HASH_ADD(m1, m2, s_res >> 8);
+ MY_HASH_ADD(m1, m2, s_res & 0xFF);
+ }
+ *nr1= m1;
+ *nr2= m2;
+}
+
+
+
+/*
+ For the given string creates its "binary image", suitable
+ to be used in binary comparison, i.e. in memcmp().
+
+ SYNOPSIS:
+ my_strnxfrm_uca()
+ cs Character set information
+ dst Where to write the image
+ dstlen Space available for the image, in bytes
+ src The source string
+ srclen Length of the source string, in bytes
+
+ NOTES:
+ In a loop, scans weights from the source string and writes
+ them into the binary image. In a case insensitive collation,
+ upper and lower cases of the same letter will produce the
+ same image subsequences. When we have reached the end-of-string
+ or found an illegal multibyte sequence, the loop stops.
+
+ It is impossible to restore the original string using its
+ binary image.
+
+ Binary images are used for bulk comparison purposes,
+ e.g. in ORDER BY, when it is more efficient to create
+ a binary image and use it instead of weight scanner
+ for the original strings for every comparison.
+
+ RETURN
+ Number of bytes that have been written into the binary image.
+*/
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
+ MY_UCA_WEIGHT_LEVEL *level,
+ uchar *dst, uchar *de,
+ uint *nweights,
+ const uchar *src, size_t srclen)
+{
+ my_uca_scanner scanner;
+ int s_res;
+
+ DBUG_ASSERT(src || !srclen);
+
+ my_uca_scanner_init_any(&scanner, cs, level, src, srclen);
+ for (; dst < de && *nweights &&
+ (s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--)
+ {
+ *dst++= s_res >> 8;
+ if (dst < de)
+ *dst++= s_res & 0xFF;
+ }
+ return dst;
+}
+
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_onelevel)(CHARSET_INFO *cs,
+ MY_UCA_WEIGHT_LEVEL *level,
+ uchar *dst, uchar *de, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *d0= dst;
+ dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
+ dst, de, &nweights,
+ src, srclen);
+ DBUG_ASSERT(dst <= de);
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
+ DBUG_ASSERT(dst <= de);
+ my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
+ return dst;
+}
+
+
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(CHARSET_INFO *cs,
+ MY_UCA_WEIGHT_LEVEL *level,
+ uchar *dst, uchar *de, uint nweights,
+ const uchar *src, size_t srclen,
+ uint flags)
+{
+ uchar *d0= dst;
+ dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
+ dst, de, &nweights,
+ src, srclen);
+ DBUG_ASSERT(dst <= de);
+ /* Pad with the minimum possible weight on this level */
+ if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+ dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
+ DBUG_ASSERT(dst <= de);
+ my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
+ return dst;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen, uint nweights,
+ const uchar *src, size_t srclen, uint flags)
+{
+ uchar *d0= dst;
+ uchar *de= dst + dstlen;
+
+ dst= MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, &cs->uca->level[0],
+ dst, de, nweights,
+ src, srclen, flags);
+ /*
+ This can probably be changed to memset(dst, 0, de - dst),
+ like my_strnxfrm_uca_multilevel() does.
+ */
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
+ return dst - d0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen,
+ uint nweights,
+ const uchar *src, size_t srclen,
+ uint flags)
+{
+ uchar *d0= dst;
+ uchar *de= dst + dstlen;
+
+ dst= MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, &cs->uca->level[0],
+ dst, de, nweights,
+ src, srclen, flags);
+ if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+ {
+ memset(dst, 0, de - dst);
+ dst= de;
+ }
+ return dst - d0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_multilevel)(CHARSET_INFO *cs,
+ uchar *dst, size_t dstlen,
+ uint nweights,
+ const uchar *src, size_t srclen,
+ uint flags)
+{
+ uint num_level= cs->levels_for_order;
+ uchar *d0= dst;
+ uchar *de= dst + dstlen;
+ uint current_level;
+
+ for (current_level= 0; current_level != num_level; current_level++)
+ {
+ if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
+ (flags & (MY_STRXFRM_LEVEL1 << current_level)))
+ dst= cs->state & MY_CS_NOPAD ?
+ MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs,
+ &cs->uca->level[current_level],
+ dst, de, nweights,
+ src, srclen, flags) :
+ MY_FUNCTION_NAME(strnxfrm_onelevel)(cs,
+ &cs->uca->level[current_level],
+ dst, de, nweights,
+ src, srclen, flags);
+ }
+
+ if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
+ {
+ memset(dst, 0, de - dst);
+ dst= de;
+ }
+
+ return dst - d0;
+}
+
+
+/*
+ One-level, PAD SPACE
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
+{
+ my_coll_init_uca,
+ MY_FUNCTION_NAME(strnncoll),
+ MY_FUNCTION_NAME(strnncollsp),
+ MY_FUNCTION_NAME(strnxfrm),
+ my_strnxfrmlen_any_uca,
+ MY_LIKE_RANGE,
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort),
+ my_propagate_complex
+};
+
+
+/*
+ One-level, NO PAD
+ For character sets with mbminlen==1 use MY_LIKE_RANGE=my_like_range_mb
+ For character sets with mbminlen>=2 use MY_LIKE_RANGE=my_like_range_generic
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
+{
+ my_coll_init_uca,
+ MY_FUNCTION_NAME(strnncoll),
+ MY_FUNCTION_NAME(strnncollsp_nopad),
+ MY_FUNCTION_NAME(strnxfrm_nopad),
+ my_strnxfrmlen_any_uca,
+ MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort_nopad),
+ my_propagate_complex
+};
+
+
+/*
+ Multi-level, PAD SPACE
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
+{
+ my_coll_init_uca,
+ MY_FUNCTION_NAME(strnncoll_multilevel),
+ MY_FUNCTION_NAME(strnncollsp_multilevel),
+ MY_FUNCTION_NAME(strnxfrm_multilevel),
+ my_strnxfrmlen_any_uca_multilevel,
+ MY_LIKE_RANGE,
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort),
+ my_propagate_complex
+};
+
+
+/*
+ Multi-level, NO PAD
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
+{
+ my_coll_init_uca,
+ MY_FUNCTION_NAME(strnncoll_multilevel),
+ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
+ MY_FUNCTION_NAME(strnxfrm_multilevel),
+ my_strnxfrmlen_any_uca_multilevel,
+ MY_LIKE_RANGE,
+ my_wildcmp_uca,
+ NULL, /* strcasecmp() */
+ my_instr_mb,
+ MY_FUNCTION_NAME(hash_sort),
+ my_propagate_complex
+};
+
+
+#undef MY_FUNCTION_NAME
+#undef MY_MB_WC
+#undef MY_LIKE_RANGE
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 7596b7f2168..f34b2a841e6 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1184,35 +1184,7 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
but the JSON functions needed my_utf16_uni()
so the #ifdef was moved lower.
*/
-
-
-/*
- D800..DB7F - Non-provate surrogate high (896 pages)
- DB80..DBFF - Private surrogate high (128 pages)
- DC00..DFFF - Surrogate low (1024 codes in a page)
-*/
-#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
-#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
-#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
-#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
-
-#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
-#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
-/* Test if a byte is a leading byte of a high or low surrogate head: */
-#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
-/* Test if a Unicode code point is a high or low surrogate head */
-#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
-
-#define MY_UTF16_WC2(a, b) ((a << 8) + b)
-
-/*
- a= 110110?? (<< 18)
- b= ???????? (<< 10)
- c= 110111?? (<< 8)
- d= ???????? (<< 0)
-*/
-#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
- ((c & 3) << 8) + d + 0x10000)
+#include "ctype-utf16.h"
#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0))
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
@@ -1261,32 +1233,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
- if (s + 2 > e)
- return MY_CS_TOOSMALL2;
-
- /*
- High bytes: 0xD[89AB] = B'110110??'
- Low bytes: 0xD[CDEF] = B'110111??'
- Surrogate mask: 0xFC = B'11111100'
- */
-
- if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
- {
- if (s + 4 > e)
- return MY_CS_TOOSMALL4;
-
- if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
- return MY_CS_ILSEQ;
-
- *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
- return 4;
- }
-
- if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
- return MY_CS_ILSEQ;
-
- *pwc= MY_UTF16_WC2(s[0], s[1]);
- return 2;
+ return my_mb_wc_utf16_quick(pwc, s, e);
}
@@ -2109,6 +2056,8 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
#ifdef HAVE_CHARSET_utf32
+#include "ctype-utf32.h"
+
/*
Check is b0 and b1 start a valid UTF32 four-byte sequence.
Don't accept characters greater than U+10FFFF.
@@ -2117,8 +2066,6 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1))
-#define MY_UTF32_WC4(b0,b1,b2,b3) ((((my_wc_t)b0) << 24) + (b1 << 16) + \
- (b2 << 8) + (b3))
static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
uchar b2, uchar b3)
@@ -2161,10 +2108,7 @@ static int
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
- if (s + 4 > e)
- return MY_CS_TOOSMALL4;
- *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
- return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
+ return my_mb_wc_utf32_quick(pwc, s, e);
}
@@ -2928,6 +2872,8 @@ struct charset_info_st my_charset_utf32_nopad_bin=
#ifdef HAVE_CHARSET_ucs2
+#include "ctype-ucs2.h"
+
static const uchar ctype_ucs2[] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
@@ -3037,11 +2983,7 @@ my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
- if (s+2 > e) /* Need 2 characters */
- return MY_CS_TOOSMALL2;
-
- *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
- return 2;
+ return my_mb_wc_ucs2_quick(pwc, s, e);
}
static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
diff --git a/strings/ctype-ucs2.h b/strings/ctype-ucs2.h
new file mode 100644
index 00000000000..c989324172d
--- /dev/null
+++ b/strings/ctype-ucs2.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UCS2_H
+#define _CTYPE_UCS2_H
+
+
+static inline int
+my_mb_wc_ucs2_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ if (s+2 > e) /* Need 2 characters */
+ return MY_CS_TOOSMALL2;
+ *pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
+ return 2;
+}
+
+
+#endif /* _CTYPE_UCS2_H */
diff --git a/strings/ctype-utf16.h b/strings/ctype-utf16.h
new file mode 100644
index 00000000000..d4cf4664f97
--- /dev/null
+++ b/strings/ctype-utf16.h
@@ -0,0 +1,80 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF16_H
+#define _CTYPE_UTF16_H
+
+/*
+ D800..DB7F - Non-provate surrogate high (896 pages)
+ DB80..DBFF - Private surrogate high (128 pages)
+ DC00..DFFF - Surrogate low (1024 codes in a page)
+*/
+#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
+#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
+#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
+#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
+
+#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
+#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
+/* Test if a byte is a leading byte of a high or low surrogate head: */
+#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
+/* Test if a Unicode code point is a high or low surrogate head */
+#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
+
+#define MY_UTF16_WC2(a, b) ((a << 8) + b)
+
+/*
+ a= 110110?? (<< 18)
+ b= ???????? (<< 10)
+ c= 110111?? (<< 8)
+ d= ???????? (<< 0)
+*/
+#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
+ ((c & 3) << 8) + d + 0x10000)
+
+static inline int
+my_mb_wc_utf16_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ if (s + 2 > e)
+ return MY_CS_TOOSMALL2;
+
+ /*
+ High bytes: 0xD[89AB] = B'110110??'
+ Low bytes: 0xD[CDEF] = B'110111??'
+ Surrogate mask: 0xFC = B'11111100'
+ */
+
+ if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
+ {
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+
+ if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
+ return MY_CS_ILSEQ;
+
+ *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
+ return 4;
+ }
+
+ if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
+ return MY_CS_ILSEQ;
+
+ *pwc= MY_UTF16_WC2(s[0], s[1]);
+ return 2;
+}
+
+#endif /* _CTYPE_UTF16_H */
diff --git a/strings/ctype-utf32.h b/strings/ctype-utf32.h
new file mode 100644
index 00000000000..e295dc6d081
--- /dev/null
+++ b/strings/ctype-utf32.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF32_H
+#define _CTYPE_UTF32_H
+
+#define MY_UTF32_WC4(b0,b1,b2,b3) ((((my_wc_t)b0) << 24) + (b1 << 16) + \
+ (b2 << 8) + (b3))
+
+static inline int
+my_mb_wc_utf32_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ if (s + 4 > e)
+ return MY_CS_TOOSMALL4;
+ *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
+ return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
+}
+
+#endif /* _CTYPE_UTF32_H */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index c525ee97b65..44544e38d4f 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -26,78 +26,9 @@
#define EILSEQ ENOENT
#endif
-/* Detect special bytes and sequences */
-#define IS_CONTINUATION_BYTE(c) (((uchar) (c) ^ 0x80) < 0x40)
-/*
- Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
- Use this macro if the caller already checked b0 for:
- - an MB1 character
- - an unused gap between MB1 and MB2HEAD
-*/
-#define IS_UTF8MB2_STEP2(b0,b1) (((uchar) (b0) < 0xE0) && \
- IS_CONTINUATION_BYTE((uchar) b1))
+#include "ctype-utf8.h"
-/*
- Check MB3 character assuming that b0 is already known to be
- in the valid MB3HEAD range [0xE0..0xEF].
-*/
-#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
- IS_CONTINUATION_BYTE(b2) && \
- ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
-
-/*
- Check MB3 character assuming that b0 is already known to be >= 0xE0,
- but is not checked for the high end 0xF0 yet.
- Use this macro if the caller already checked b0 for:
- - an MB1 character
- - an unused gap between MB1 and MB2HEAD
- - an MB2HEAD
-*/
-#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
- IS_UTF8MB3_STEP2(b0,b1,b2))
-
-/*
- UTF-8 quick four-byte mask:
- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
- Encoding allows to encode U+00010000..U+001FFFFF
-
- The maximum character defined in the Unicode standard is U+0010FFFF.
- Higher characters U+00110000..U+001FFFFF are not used.
-
- 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
- 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
-
- Valid codes:
- [F0][90..BF][80..BF][80..BF]
- [F1][80..BF][80..BF][80..BF]
- [F2][80..BF][80..BF][80..BF]
- [F3][80..BF][80..BF][80..BF]
- [F4][80..8F][80..BF][80..BF]
-*/
-
-/*
- Check MB4 character assuming that b0 is already
- known to be in the range [0xF0..0xF4]
-*/
-#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
- IS_CONTINUATION_BYTE(b2) && \
- IS_CONTINUATION_BYTE(b3) && \
- (b0 >= 0xf1 || b1 >= 0x90) && \
- (b0 <= 0xf3 || b1 <= 0x8F))
-#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
- IS_UTF8MB4_STEP2(b0,b1,b2,b3))
-
-/* Convert individual bytes to Unicode code points */
-#define UTF8MB2_CODE(b0,b1) (((my_wc_t) ((uchar) b0 & 0x1f) << 6) |\
- ((my_wc_t) ((uchar) b1 ^ 0x80)))
-#define UTF8MB3_CODE(b0,b1,b2) (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
- ((my_wc_t) ((uchar) b1 ^ 0x80) << 6) |\
- ((my_wc_t) ((uchar) b2 ^ 0x80)))
-#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
- ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
- ((my_wc_t) ((uchar) b2 ^ 0x80) << 6) |\
- (my_wc_t) ((uchar) b3 ^ 0x80))
/* Definitions for strcoll.ic */
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
@@ -4981,42 +4912,7 @@ static const uchar to_upper_utf8[] = {
static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
- uchar c;
-
- if (s >= e)
- return MY_CS_TOOSMALL;
-
- c= s[0];
- if (c < 0x80)
- {
- *pwc = c;
- return 1;
- }
- else if (c < 0xc2)
- return MY_CS_ILSEQ;
- else if (c < 0xe0)
- {
- if (s+2 > e) /* We need 2 characters */
- return MY_CS_TOOSMALL2;
-
- if (!(IS_CONTINUATION_BYTE(s[1])))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB2_CODE(c, s[1]);
- return 2;
- }
- else if (c < 0xf0)
- {
- if (s+3 > e) /* We need 3 characters */
- return MY_CS_TOOSMALL3;
-
- if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB3_CODE(c, s[1], s[2]);
- return 3;
- }
- return MY_CS_ILSEQ;
+ return my_mb_wc_utf8mb3_quick(pwc, s, e);
}
@@ -7379,52 +7275,7 @@ static int
my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
- uchar c;
-
- if (s >= e)
- return MY_CS_TOOSMALL;
-
- c= s[0];
- if (c < 0x80)
- {
- *pwc= c;
- return 1;
- }
- else if (c < 0xc2)
- return MY_CS_ILSEQ;
- else if (c < 0xe0)
- {
- if (s + 2 > e) /* We need 2 characters */
- return MY_CS_TOOSMALL2;
-
- if (!(IS_CONTINUATION_BYTE(s[1])))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB2_CODE(c, s[1]);
- return 2;
- }
- else if (c < 0xf0)
- {
- if (s + 3 > e) /* We need 3 characters */
- return MY_CS_TOOSMALL3;
-
- if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
- return MY_CS_ILSEQ;
-
- *pwc= UTF8MB3_CODE(c, s[1], s[2]);
- return 3;
- }
- else if (c < 0xf5)
- {
- if (s + 4 > e) /* We need 4 characters */
- return MY_CS_TOOSMALL4;
-
- if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
- return MY_CS_ILSEQ;
- *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
- return 4;
- }
- return MY_CS_ILSEQ;
+ return my_mb_wc_utf8mb4_quick(pwc, s, e);
}
diff --git a/strings/ctype-utf8.h b/strings/ctype-utf8.h
new file mode 100644
index 00000000000..9a44c1658f2
--- /dev/null
+++ b/strings/ctype-utf8.h
@@ -0,0 +1,190 @@
+/*
+ Copyright (c) 2018 MariaDB Corporation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF8_H
+#define _CTYPE_UTF8_H
+
+/* Detect special bytes and sequences */
+#define IS_CONTINUATION_BYTE(c) (((uchar) (c) ^ 0x80) < 0x40)
+
+/*
+ Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
+ Use this macro if the caller already checked b0 for:
+ - an MB1 character
+ - an unused gap between MB1 and MB2HEAD
+*/
+#define IS_UTF8MB2_STEP2(b0,b1) (((uchar) (b0) < 0xE0) && \
+ IS_CONTINUATION_BYTE((uchar) b1))
+
+/*
+ Check MB3 character assuming that b0 is already known to be
+ in the valid MB3HEAD range [0xE0..0xEF].
+*/
+#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
+ IS_CONTINUATION_BYTE(b2) && \
+ ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
+
+/*
+ Check MB3 character assuming that b0 is already known to be >= 0xE0,
+ but is not checked for the high end 0xF0 yet.
+ Use this macro if the caller already checked b0 for:
+ - an MB1 character
+ - an unused gap between MB1 and MB2HEAD
+ - an MB2HEAD
+*/
+#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
+ IS_UTF8MB3_STEP2(b0,b1,b2))
+
+/*
+ UTF-8 quick four-byte mask:
+ 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ Encoding allows to encode U+00010000..U+001FFFFF
+
+ The maximum character defined in the Unicode standard is U+0010FFFF.
+ Higher characters U+00110000..U+001FFFFF are not used.
+
+ 11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
+ 11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
+
+ Valid codes:
+ [F0][90..BF][80..BF][80..BF]
+ [F1][80..BF][80..BF][80..BF]
+ [F2][80..BF][80..BF][80..BF]
+ [F3][80..BF][80..BF][80..BF]
+ [F4][80..8F][80..BF][80..BF]
+*/
+
+/*
+ Check MB4 character assuming that b0 is already
+ known to be in the range [0xF0..0xF4]
+*/
+#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
+ IS_CONTINUATION_BYTE(b2) && \
+ IS_CONTINUATION_BYTE(b3) && \
+ (b0 >= 0xf1 || b1 >= 0x90) && \
+ (b0 <= 0xf3 || b1 <= 0x8F))
+#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
+ IS_UTF8MB4_STEP2(b0,b1,b2,b3))
+
+/* Convert individual bytes to Unicode code points */
+#define UTF8MB2_CODE(b0,b1) (((my_wc_t) ((uchar) b0 & 0x1f) << 6) |\
+ ((my_wc_t) ((uchar) b1 ^ 0x80)))
+#define UTF8MB3_CODE(b0,b1,b2) (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
+ ((my_wc_t) ((uchar) b1 ^ 0x80) << 6) |\
+ ((my_wc_t) ((uchar) b2 ^ 0x80)))
+#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
+ ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
+ ((my_wc_t) ((uchar) b2 ^ 0x80) << 6) |\
+ (my_wc_t) ((uchar) b3 ^ 0x80))
+
+static inline int
+my_mb_wc_utf8mb3_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+ uchar c;
+
+ if (s >= e)
+ return MY_CS_TOOSMALL;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc = c;
+ return 1;
+ }
+ else if (c < 0xc2)
+ return MY_CS_ILSEQ;
+ else if (c < 0xe0)
+ {
+ if (s+2 > e) /* We need 2 characters */
+ return MY_CS_TOOSMALL2;
+
+ if (!(IS_CONTINUATION_BYTE(s[1])))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB2_CODE(c, s[1]);
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (s+3 > e) /* We need 3 characters */
+ return MY_CS_TOOSMALL3;
+
+ if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB3_CODE(c, s[1], s[2]);
+ return 3;
+ }
+ return MY_CS_ILSEQ;
+}
+
+
+#ifdef HAVE_CHARSET_utf8mb4
+static inline int
+my_mb_wc_utf8mb4_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+ uchar c;
+
+ if (s >= e)
+ return MY_CS_TOOSMALL;
+
+ c= s[0];
+ if (c < 0x80)
+ {
+ *pwc= c;
+ return 1;
+ }
+ else if (c < 0xc2)
+ return MY_CS_ILSEQ;
+ else if (c < 0xe0)
+ {
+ if (s + 2 > e) /* We need 2 characters */
+ return MY_CS_TOOSMALL2;
+
+ if (!(IS_CONTINUATION_BYTE(s[1])))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB2_CODE(c, s[1]);
+ return 2;
+ }
+ else if (c < 0xf0)
+ {
+ if (s + 3 > e) /* We need 3 characters */
+ return MY_CS_TOOSMALL3;
+
+ if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
+ return MY_CS_ILSEQ;
+
+ *pwc= UTF8MB3_CODE(c, s[1], s[2]);
+ return 3;
+ }
+ else if (c < 0xf5)
+ {
+ if (s + 4 > e) /* We need 4 characters */
+ return MY_CS_TOOSMALL4;
+
+ if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
+ return MY_CS_ILSEQ;
+ *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
+ return 4;
+ }
+ return MY_CS_ILSEQ;
+}
+#endif /* HAVE_CHARSET_utf8mb4*/
+
+
+#endif /* _CTYPE_UTF8_H */