summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2021-11-18 19:18:44 +0400
committerAlexander Barkov <bar@mariadb.com>2021-11-23 11:20:54 +0400
commite98d423ced656e3cb8f3082f6ba1b14d727ae92a (patch)
treeeaaad85626411e8e75ee2a4413de277c2a5f408e
parent66ea4a628cf7612099b890d5ca8bb60a42ba7f87 (diff)
downloadmariadb-git-e98d423ced656e3cb8f3082f6ba1b14d727ae92a.tar.gz
Performance improvement
-rw-r--r--include/m_ctype.h52
-rw-r--r--strings/ctype-uca.c486
-rw-r--r--strings/ctype-uca.ic50
3 files changed, 580 insertions, 8 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 41523913c10..b3b3a8093ea 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -138,6 +138,57 @@ my_bool my_uca_can_be_contraction_tail(const MY_CONTRACTIONS *c, my_wc_t wc);
uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
my_wc_t wc1, my_wc_t wc2);
+typedef struct my_uca_weight2_t
+{
+ uint16 weight[2];
+} MY_UCA_WEIGHT2;
+
+
+/*
+ In DUCET as of Unicode-14.0.0:
+ - All characters in the range U+0000..U+007F (i.e. using one byte in utf8)
+ have not more than two weights on all weight levels.
+ - All characters in the range U+0080..U+07FF (i.e. using two bytes in utf8)
+ have not more than four weights on all weight levels.
+ Therefore the limit of 4 weights should cover all byte pairs
+ (i.e. two ASCII characters or one 2-byte character)
+ that are a subject for the "process 2 bytes at a time" optimization.
+ If some collation reorders any character from the mentioned ranges
+ in the way that it produces more weights, such character will not
+ be optimized, but will be correctly processed the slower mb_wc-based
+ method (1 character at a time).
+*/
+#define MY_UCA_2BYTES_MAX_WEIGHT_SIZE (4+1) /* Including 0 terminator */
+
+typedef struct my_uca_2bytes_item_t
+{
+ uint16 weight[MY_UCA_2BYTES_MAX_WEIGHT_SIZE];
+} MY_UCA_2BYTES_ITEM;
+
+
+typedef struct my_uca_level_booster_t
+{
+ /*
+ A helper array to process 2 bytes at a time during string comparison.
+ It maps all 2-bytes sequences that make:
+ - two ASCII characters or
+ - one 2-byte character
+ to their weights. The weight length is limited to
+ MY_UCA_2BYTES_MAX_WEIGHT_SIZE-1 weights.
+ This array is used in the main loop optimization.
+ */
+ MY_UCA_2BYTES_ITEM weight_strings_2bytes[0x10000];
+ /*
+ A helper array to process 2bytes at a time during string comparison,
+ with an even more efficient way than the above one.
+ The weight size is limited to 2 weights, so it's used for the cases
+ when 2 input bytes produce 1 or 2 weights.
+ This limit makes the code using this array even simpler and faster.
+ This array is used for prefix optimization.
+ */
+ MY_UCA_WEIGHT2 weight_strings_2bytes_to_1_or_2_weights[0x10000];
+} MY_UCA_LEVEL_BOOSTER;
+
/* Collation weights on a single level (e.g. primary, secondary, tertiarty) */
typedef struct my_uca_level_info_st
@@ -147,6 +198,7 @@ typedef struct my_uca_level_info_st
uint16 **weights;
MY_CONTRACTIONS contractions;
uint levelno;
+ MY_UCA_LEVEL_BOOSTER *booster;
} MY_UCA_WEIGHT_LEVEL;
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 45b6869c6f5..b8bc48eb6fc 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -6540,7 +6540,8 @@ MY_UCA_INFO my_uca_v400=
NULL, /* item */
NULL /* flags */
},
- 0 /* levelno */
+ 0, /* levelno */
+ NULL /* weights_2bytes */
},
{
0,
@@ -6551,7 +6552,8 @@ MY_UCA_INFO my_uca_v400=
NULL,
NULL
},
- 1 /* levelno */
+ 1, /* levelno */
+ NULL /* weights_2bytes */
},
},
@@ -30097,7 +30099,8 @@ MY_UCA_INFO my_uca_v520_th=
thai_contractions, /* item */
NULL /* flags */
},
- 0 /* levelno */
+ 0, /* levelno */
+ NULL /* weights_2bytes */
},
{
0x10FFFF, /* maxchar */
@@ -30108,7 +30111,8 @@ MY_UCA_INFO my_uca_v520_th=
thai_contractions_w2, /* item */
NULL /* flags */
},
- 1 /* levelno */
+ 1, /* levelno */
+ NULL /* weights_2bytes */
},
},
@@ -30143,7 +30147,8 @@ MY_UCA_INFO my_uca_v520=
NULL, /* item */
NULL /* flags */
},
- 0 /* levelno */
+ 0, /* levelno */
+ NULL /* weights_2bytes */
},
{
@@ -30155,7 +30160,8 @@ MY_UCA_INFO my_uca_v520=
NULL, /* item */
NULL /* flags */
},
- 1 /* levelno */
+ 1, /* levelno */
+ NULL /* weights_2bytes */
},
},
@@ -33637,8 +33643,468 @@ my_uca_generate_pages(MY_CHARSET_LOADER *loader,
}
+static size_t
+my_uca_weight_cpy(uint16 *dst, const uint16 *src)
+{
+ const uint16 *src0= src;
+ for ( ; ; dst++, src++ )
+ {
+ *dst= *src;
+ if (!dst[0])
+ break;
+ }
+ return src - src0;
+}
+
+
+#define MY_UCA_NOT_APPLICABLE_MARKER 0xFFFF
+
+static inline my_bool
+my_uca_2bytes_item_is_applicable(const MY_UCA_2BYTES_ITEM *w2)
+{
+ return w2->weight[1] != MY_UCA_NOT_APPLICABLE_MARKER;
+}
+
+
+static void
+my_uca_2bytes_item_set_not_applicable(MY_UCA_2BYTES_ITEM *dst)
+{
+ dst->weight[0]= 0;
+ dst->weight[1]= MY_UCA_NOT_APPLICABLE_MARKER;
+}
+
+
+static inline size_t
+my_uca_weight_length(const uint16 *str)
+{
+ uint res;
+ for (res= 0; str[res] ; res++)
+ { }
+ return res;
+}
+
+
+static void
+my_uca_2bytes_item_weight_cpy(MY_UCA_2BYTES_ITEM *dst, const uint16 *src)
+{
+ size_t wlen= my_uca_weight_length(src);
+ if (wlen + 1 > array_elements(dst->weight))
+ my_uca_2bytes_item_set_not_applicable(dst);
+ else
+ my_uca_weight_cpy(dst->weight, src);
+}
+
+
+static void
+my_uca_2bytes_item_weight_cpy2(MY_UCA_2BYTES_ITEM *dst,
+ const uint16 *wa,
+ const uint16 *wb)
+{
+ size_t la= my_uca_weight_length(wa);
+ size_t lb= my_uca_weight_length(wb);
+ if (la + lb + 1 > array_elements(dst->weight))
+ {
+ my_uca_2bytes_item_set_not_applicable(dst);
+ }
+ else
+ {
+ my_uca_weight_cpy(dst->weight, wa);
+ my_uca_weight_cpy(dst->weight + la, wb);
+ }
+}
+
+
+static void
+my_uca_2bytes_item_set_ascii2(MY_UCA_2BYTES_ITEM *dst,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ uint a, uint b)
+{
+ const uint16 *wa= level->weights[0] + a * level->lengths[0];
+ const uint16 *wb= level->weights[0] + b * level->lengths[0];
+ my_uca_2bytes_item_weight_cpy2(dst, wa, wb);
+}
+
+
+static void
+my_uca_2bytes_item_set_non_ascii(MY_UCA_2BYTES_ITEM *dst,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ CHARSET_INFO *cs,
+ uint a, uint b)
+{
+ uchar ch[2]= {(uchar) a, (uchar) b};
+ my_wc_t wc;
+ int rc= my_ci_mb_wc(cs, &wc, &ch[0], &ch[2]);
+ if (rc == 2)
+ {
+ /* Byte sequence 'ab' make one valid 2-byte character */
+ uint pageno= wc>>8;
+ const uint16 *w= level->weights[pageno] + (wc & 0xFF) * level->lengths[pageno];
+ my_uca_2bytes_item_weight_cpy(dst, w);
+ }
+ else /* TODO: ILSEQ vs TOO_SMALL */
+ {
+ my_uca_2bytes_item_set_not_applicable(dst);
+ }
+}
+
+
+static inline MY_UCA_2BYTES_ITEM *
+my_uca_level_booster_2bytes_item_addr(MY_UCA_LEVEL_BOOSTER *booster,
+ uint a, uint b)
+{
+ size_t w2offs= a * 256 + b;
+ DBUG_ASSERT(a < 256 && b < 256);
+ return &booster->weight_strings_2bytes[w2offs];
+}
+
+
+static inline const MY_UCA_2BYTES_ITEM *
+my_uca_level_booster_2bytes_item_addr_const(const MY_UCA_LEVEL_BOOSTER *booster,
+ uint a, uint b)
+{
+ size_t w2offs= a * 256 + b;
+ DBUG_ASSERT(a < 256 && b < 256);
+ return &booster->weight_strings_2bytes[w2offs];
+}
+
+
+static inline const MY_UCA_WEIGHT2 *
+my_uca_level_booster_simple_weight2_addr_const(
+ const MY_UCA_LEVEL_BOOSTER *booster,
+ uchar a, uchar b)
+{
+ uint offs= (uint) a * 256 + b;
+ return &booster->weight_strings_2bytes_to_1_or_2_weights[offs];
+}
+
+
+static void
+my_uca_level_booster_2bytes_disable_if_2byte_char(MY_UCA_LEVEL_BOOSTER *booster,
+ CHARSET_INFO *cs,
+ my_wc_t ch)
+{
+ uchar tmp[MY_CS_MBMAXLEN];
+ int rc= my_ci_wc_mb(cs, ch, tmp, tmp + sizeof(tmp));
+ if (rc == 2)
+ {
+ MY_UCA_2BYTES_ITEM *dst= my_uca_level_booster_2bytes_item_addr(booster,
+ tmp[0],
+ tmp[1]);
+ my_uca_2bytes_item_set_not_applicable(dst);
+ }
+}
+
+
+static inline void
+my_uca_level_booster_2bytes_set_not_applicable_by_tail(
+ MY_UCA_LEVEL_BOOSTER *booster,
+ uint tail)
+{
+ uint head;
+ for (head= 0; head < 256; head++)
+ {
+ MY_UCA_2BYTES_ITEM *dst= my_uca_level_booster_2bytes_item_addr(booster,
+ head,
+ tail);
+ my_uca_2bytes_item_set_not_applicable(dst);
+ }
+}
+
+
+static void
+my_uca_level_booster_2bytes_disable_contraction(MY_UCA_LEVEL_BOOSTER *booster,
+ const MY_CONTRACTION *c,
+ CHARSET_INFO *cs)
+{
+ DBUG_ASSERT(!c->with_context);
+
+ if (c->ch[0] < 0x80)
+ {
+ MY_UCA_2BYTES_ITEM *dst;
+ /*
+ 2-byte pairs that end with an ASCII contraction head.
+ ...xAB...
+ Suppose AB is a contraction where A is an ASCII character.
+ Disable byte pairs xA (for all x=0x00..0xFF).
+ */
+ my_uca_level_booster_2bytes_set_not_applicable_by_tail(booster, c->ch[0]);
+
+ /*
+ Disable 2-byte ASCII combinations that start
+ 3-byte (or longer) contractions.
+ */
+ if (c->ch[1] < 0x80 && c->ch[2] != 0)
+ {
+ /*
+ A 3+ character contraction that starts from two ASCII characters:
+ ...ABC...
+ */
+ dst= my_uca_level_booster_2bytes_item_addr(booster, c->ch[0], c->ch[1]);
+ my_uca_2bytes_item_set_not_applicable(dst);
+ }
+ }
+ else
+ {
+ /*
+ Disable 2-byte characters that start contractions:
+ ...[Aa][B]... MB + ASCII
+ ...[Aa][Bb].. MB + MB
+ The next weight depends on the following characters.
+ */
+ my_uca_level_booster_2bytes_disable_if_2byte_char(booster, cs, c->ch[0]);
+ }
+}
+
+
+static void
+my_uca_level_booster_2bytes_disable_previous_context(
+ MY_UCA_LEVEL_BOOSTER *booster,
+ const MY_CONTRACTION *c,
+ CHARSET_INFO *cs)
+{
+ DBUG_ASSERT(c->with_context);
+
+ if (c->ch[0] < 0x80 && c->ch[1] < 0x80)
+ {
+ DBUG_ASSERT(c->ch[2] == 0);
+ if (c->ch[2] == 0)
+ {
+ /*
+ A previous context pair with exactly two ASCII characters:
+ ...AB...
+ "A" is a look-behind character (the context).
+ "B" is a character that we need to generate a weight for.
+ The underlying code does not support handling these character
+ in a single shot yet. It works as follows at the moment:
+ - A is scanned separately from B and generates its independent weight.
+ - B is scanned separately on the next step and and generates its
+ context dependent weight (by looking behind).
+ */
+ MY_UCA_2BYTES_ITEM *dst= my_uca_level_booster_2bytes_item_addr(booster,
+ c->ch[0],
+ c->ch[1]);
+ my_uca_2bytes_item_set_not_applicable(dst);
+ }
+ }
+ else
+ {
+ /*
+ Disable 2-byte characters that start pairs with a previous context:
+ ...[Aa][B]... MB + ASCII
+ ...[Aa][Bb].. MB + MB
+ These characters can be actually scanned in a single shot,
+ but the relevant code in scanner_next() assumes previous context
+ head characters are ASCII only sets and sets the previous
+ character simply as sbeg[1].
+ */
+ my_uca_level_booster_2bytes_disable_if_2byte_char(booster, cs, c->ch[0]);
+ }
+}
+
+
+static void
+my_uca_2bytes_item_set_pair(MY_UCA_2BYTES_ITEM *dst,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ CHARSET_INFO *cs,
+ uint a, uint b)
+{
+ if (a < 0x80 && b < 0x80)
+ my_uca_2bytes_item_set_ascii2(dst, level, a, b);
+ else
+ my_uca_2bytes_item_set_non_ascii(dst, level, cs, a, b);
+}
+
+
+static void
+my_uca_level_booster_2bytes_populate_pairs(MY_UCA_LEVEL_BOOSTER *booster,
+ const MY_UCA_WEIGHT_LEVEL *level,
+ CHARSET_INFO *cs)
+{
+ uint a, b;
+ for (a= 0; a < 256; a++)
+ {
+ for (b= 0; b < 256; b++)
+ {
+ MY_UCA_2BYTES_ITEM *dst;
+ dst= my_uca_level_booster_2bytes_item_addr(booster, a, b);
+ my_uca_2bytes_item_set_pair(dst, level, cs, a, b);
+ }
+ }
+}
+
+
+/*
+ Populate contractions consisting of two ASCII letters.
+ Only true contractions are handled here so far.
+ Previous context pairs are handled separately.
+*/
+static void
+my_uca_level_booster_2bytes_pupulate_ascii2_contractions(
+ MY_UCA_LEVEL_BOOSTER *booster,
+ const MY_CONTRACTIONS *list)
+{
+ size_t i;
+ for (i= 0; i < list->nitems; i++)
+ {
+ const MY_CONTRACTION *c= &list->item[i];
+ if (c->ch[0] < 0x80 && c->ch[1] < 0x80 && c->ch[2] == 0 &&
+ !c->with_context)
+ {
+ MY_UCA_2BYTES_ITEM *dst= my_uca_level_booster_2bytes_item_addr(booster,
+ c->ch[0],
+ c->ch[1]);
+ my_uca_2bytes_item_weight_cpy(dst, c->weight);
+ }
+ }
+}
+
+
+static void
+my_uca_level_booster_2bytes_disable_context_dependent(
+ MY_UCA_LEVEL_BOOSTER *booster,
+ const MY_CONTRACTIONS *list,
+ CHARSET_INFO *cs)
+{
+ size_t i;
+ for (i= 0; i < list->nitems; i++)
+ {
+ const MY_CONTRACTION *c= &list->item[i];
+ if (c->with_context)
+ my_uca_level_booster_2bytes_disable_previous_context(booster, c, cs);
+ else
+ my_uca_level_booster_2bytes_disable_contraction(booster, c, cs);
+ }
+}
+
+
+/*
+ Populate the array of MY_UCA_WEIGHT2 for all possible byte pairs {a,b}
+ as follows:
+
+ Number of characters Number of weights WEIGHT2
+ -------------------- ----------------- ------
+ 2 (two ASCII chars) 0 (both ignorable) {0,0} [IGN]
+ 2 (two ASCII chars) 1 (e.g. Czech "ch") {X,0}
+ 2 (two ASCII chars) 1 (e.g. ignorable + non-ignorable) {X,0}
+ 2 (two ASCII chars) 2 (two ASCII chars, one weigth each) {X,0}
+ 2 (two ASCII chars) 3+ (contraction with a long expansion) {0,0} [E3]
+ 1 (one 2-byte char) 0 (ignorable) {0,0} [IGN]
+ 1 (one 2-byte char) 1 {X,0}
+ 1 (one 2-byte char) 2 (short expansion, e.g. German SZ) {X,Y}
+ 1 (one 2-byte char) 3+ (long expansion) {0,0} [E3]
+ 0 (incomplete 3/4-byte char) {0,0} [INC]
+
+ All byte pairs that depend on the context (e.g. contraction parts)
+ and that were previously marked as such by
+ my_uca_level_booster_2bytes_disable_context_dependent()
+ set WEIGHT2 to {0,0} [CTX].
+
+ After the initialization, the array contains non-zero weights for
+ the most typical simple cases of mapping from 2-bytes to weights,
+ so inside strnncoll*() we can skip equal string prefixes much faster,
+ using a cheaper simpler code.
+*/
+static void
+my_uca_level_booster_weight2_populate(MY_UCA_LEVEL_BOOSTER *booster)
+{
+ size_t i;
+ for (i= 0; i < 0x10000; i++)
+ {
+ MY_UCA_WEIGHT2 *dst= &booster->weight_strings_2bytes_to_1_or_2_weights[i];
+ MY_UCA_2BYTES_ITEM *src= &booster->weight_strings_2bytes[i];
+ if (src->weight[0] && (!src->weight[1] || !src->weight[2]))
+ {
+ /*
+ Simplest mapping:
+ - Two ASCII characters make one or two weights
+ - One 2-byte character makes one or two weights
+ Handled by the simpler loop at the comparison time.
+ */
+ dst->weight[0]= src->weight[0];
+ dst->weight[1]= src->weight[1];
+ }
+ else
+ {
+ /*
+ More complex mapping:
+ - Ignorable - see [IGN] above
+ - More than two weights - see [E3] above
+ - Incomplete (a 3-byte or 4-byte char head) - see [INC] above
+ - Not applicable (context dependent) - see [CTX] above
+ Handled by the full-featured slower loop at the comparison time.
+ */
+ dst->weight[0]= 0;
+ dst->weight[1]= 0;
+ }
+ }
+}
+
+
+static void
+my_uca_level_booster_populate(MY_UCA_LEVEL_BOOSTER *dst,
+ const MY_UCA_WEIGHT_LEVEL *src,
+ CHARSET_INFO *cs)
+{
+ my_uca_level_booster_2bytes_populate_pairs(dst, src, cs);
+ my_uca_level_booster_2bytes_pupulate_ascii2_contractions(dst,
+ &src->contractions);
+ my_uca_level_booster_2bytes_disable_context_dependent(dst,
+ &src->contractions,
+ cs);
+ my_uca_level_booster_weight2_populate(dst);
+}
+
+
+static MY_UCA_LEVEL_BOOSTER *
+my_uca_level_booster_alloc(MY_CHARSET_LOADER *loader)
+{
+ size_t nbytes= sizeof(MY_UCA_LEVEL_BOOSTER);
+ MY_UCA_LEVEL_BOOSTER *res;
+ if (!(res= (MY_UCA_LEVEL_BOOSTER *) (loader->once_alloc)(nbytes)))
+ return NULL;
+ bzero(res, nbytes);
+ return res;
+}
+
+
+static MY_UCA_LEVEL_BOOSTER *
+my_uca_level_booster_new(MY_CHARSET_LOADER *loader,
+ CHARSET_INFO *cs,
+ MY_UCA_WEIGHT_LEVEL *level)
+{
+ MY_UCA_LEVEL_BOOSTER *res;
+ if (!(res= my_uca_level_booster_alloc(loader)))
+ return NULL;
+ my_uca_level_booster_populate(res, level, cs);
+ return res;
+}
+
+
+static size_t
+my_uca_level_booster_equal_prefix_length(const MY_UCA_LEVEL_BOOSTER *booster,
+ const uchar *s, size_t slen,
+ const uchar *t, size_t tlen)
+{
+ const uchar *s0= s;
+ size_t simple_count= MY_MIN(slen, tlen) >> 1;
+ for ( ; simple_count; s+= 2, t+= 2, simple_count--)
+ {
+ const MY_UCA_WEIGHT2 *ws, *wt;
+ ws= my_uca_level_booster_simple_weight2_addr_const(booster, s[0], s[1]);
+ wt= my_uca_level_booster_simple_weight2_addr_const(booster, t[0], t[1]);
+ if (ws->weight[0] &&
+ ws->weight[0] == wt->weight[0] &&
+ ws->weight[1] == wt->weight[1])
+ continue;
+ break;
+ }
+ return s - s0;
+}
+
+
static my_bool
-init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
+init_weight_level(MY_CHARSET_LOADER *loader, CHARSET_INFO *cs,
+ MY_COLL_RULES *rules,
MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
{
MY_COLL_RULE *r, *rlast;
@@ -33736,6 +34202,10 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
memcpy(weights, item->weight, length * sizeof(uint16));
weights[length]= 0;
}
+
+ if (cs->mbminlen == 1)
+ dst->booster= my_uca_level_booster_new(loader, cs, dst);
+
return FALSE;
}
@@ -33879,7 +34349,7 @@ create_tailoring(struct charset_info_st *cs,
cs->coll_name.str, i + 1);
goto ex;
}
- if ((rc= init_weight_level(loader, &rules,
+ if ((rc= init_weight_level(loader, cs, &rules,
&new_uca.level[i], &src_uca->level[i])))
goto ex;
}
diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic
index 32da4c87fdc..5ae1696677d 100644
--- a/strings/ctype-uca.ic
+++ b/strings/ctype-uca.ic
@@ -51,6 +51,38 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
my_wc_t currwc;
const uint16 *cweight;
+#if MY_UCA_ASCII_OPTIMIZE
+ if (scanner->sbeg + 1 < scanner->send)
+ {
+ const MY_UCA_2BYTES_ITEM *ww;
+ ww= my_uca_level_booster_2bytes_item_addr_const(scanner->level->booster,
+ scanner->sbeg[0],
+ scanner->sbeg[1]);
+ if (my_uca_2bytes_item_is_applicable(ww))
+ {
+ /*
+ Byte pairs that make 2-byte head characters in previous
+ context pairs are marked as not applicable for optimization
+ during the collation initialization. So when we come here
+ sbeg[0] and sbeg[1] are:
+ - either two ASCII characters
+ - or one 2-byte character which IS NOT a previous context head
+ Just remember sbeg[1] as the previous character for simplicity.
+ This may erroneously interpret bytes 0x80..0x9F as previous context
+ head characters U+0080..U+009F. However, CLDR does not have any real
+ collations that use these characters as previous context heads.
+ */
+ scanner->page= 0;
+ scanner->code= (int) scanner->sbeg[1];
+ scanner->sbeg+= 2;
+ if ((weight= my_uca_scanner_set_weight(scanner, ww->weight)))
+ return weight;
+ continue; /* Ignorable character */
+ }
+ /* 2 byte optimization is not applicable, go the slow path */
+ }
+#endif
+
/* Get next character */
#if MY_UCA_ASCII_OPTIMIZE
/* Get next ASCII character */
@@ -195,6 +227,15 @@ MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
my_uca_scanner tscanner;
int s_res;
int t_res;
+
+#if MY_UCA_ASCII_OPTIMIZE
+{
+ size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
+ s, slen, t, tlen);
+ s+= prefix, slen-= prefix;
+ t+= prefix, tlen-= prefix;
+}
+#endif
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
@@ -301,6 +342,15 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
my_uca_scanner sscanner, tscanner;
int s_res, t_res;
+#if MY_UCA_ASCII_OPTIMIZE
+{
+ size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
+ s, slen, t, tlen);
+ s+= prefix, slen-= prefix;
+ t+= prefix, tlen-= prefix;
+}
+#endif
+
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);