5 files changed, 660 insertions, 12 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 1e7f06dce98..811b3b71a17 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -141,6 +141,58 @@ const uint16 *my_uca_contraction2_weight(const MY_CONTRACTIONS *c,
                                          my_wc_t wc1, my_wc_t wc2);
 
 
+typedef struct my_uca_weight2_t
+{
+  uint16 weight[2];
+} MY_UCA_WEIGHT2;
+
+
+/*
+  In DUCET as of Unicode-14.0.0:
+  - All characters in the range U+0000..U+007F (i.e. using one byte in utf8)
+    have not more than two weights on all weight levels.
+  - All characters in the range U+0080..U+07FF (i.e. using two bytes in utf8)
+    have not more than four weights on all weight levels.
+  Therefore the limit of 4 weights should cover all byte pairs
+  (i.e. two ASCII characters or one 2-byte character)
+  that are a subject for the "process 2 bytes at a time" optimization.
+  If some collation reorders any character from the mentioned ranges
+  in the way that it produces more weights, such character will not
+  be optimized, but will be correctly processed the slower mb_wc-based
+  method (1 character at a time).
+*/
+#define MY_UCA_2BYTES_MAX_WEIGHT_SIZE (4+1) /* Including 0 terminator */
+
+typedef struct my_uca_2bytes_item_t
+{
+  uint16 weight[MY_UCA_2BYTES_MAX_WEIGHT_SIZE];
+} MY_UCA_2BYTES_ITEM;
+
+
+typedef struct my_uca_level_booster_t
+{
+  /*
+    A helper array to process 2 bytes at a time during string comparison.
+    It maps all 2-bytes sequences that make:
+    - two ASCII characters or
+    - one 2-byte character
+    to their weights. The weight length is limited to
+    MY_UCA_2BYTES_MAX_WEIGHT_SIZE-1 weights.
+    This array is used in the main loop optimization.
+  */
+  MY_UCA_2BYTES_ITEM weight_strings_2bytes[0x10000];
+  /*
+    A helper array to process 2bytes at a time during string comparison,
+    with an even more efficient way than the above one.
+    The weight size is limited to 2 weights, so it's used for the cases
+    when 2 input bytes produce 1 or 2 weights.
+    This limit makes the code using this array even simpler and faster.
+    This array is used for prefix optimization.
+  */
+  MY_UCA_WEIGHT2 weight_strings_2bytes_to_1_or_2_weights[0x10000];
+} MY_UCA_LEVEL_BOOSTER;
+
+
 typedef struct my_uca_contraction_hash_t
 {
   size_t nitems_alloced;
@@ -157,6 +209,7 @@ typedef struct my_uca_level_info_st
   MY_CONTRACTIONS contractions;
   uint    levelno;
   MY_UCA_CONTRACTION_HASH contraction_hash;
+  MY_UCA_LEVEL_BOOSTER *booster;
 } MY_UCA_WEIGHT_LEVEL;
 
 
diff --git a/strings/ctype-uca-scanner_next.inl b/strings/ctype-uca-scanner_next.inl
index acab31f21ef..b79e0deff1a 100644
--- a/strings/ctype-uca-scanner_next.inl
+++ b/strings/ctype-uca-scanner_next.inl
@@ -78,6 +78,45 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
     my_wc_t currwc= 0;
     const uint16 *cweight;
 
+#if MY_UCA_ASCII_OPTIMIZE && !defined(SCANNER_NEXT_NCHARS)
+    if (scanner->sbeg + 1 < scanner->send)
+    {
+      const MY_UCA_2BYTES_ITEM *ww;
+      ww= my_uca_level_booster_2bytes_item_addr_const(scanner->level->booster,
+                                                      scanner->sbeg[0],
+                                                      scanner->sbeg[1]);
+      if (my_uca_2bytes_item_is_applicable(ww))
+      {
+        /*
+          Byte pairs that make 2-byte head characters in previous
+          context pairs are marked as not applicable for optimization
+          during the collation initialization. So when we come here
+          sbeg[0] and sbeg[1] are:
+          - either two ASCII characters
+          - or one 2-byte character which IS NOT a previous context head
+          Just remember sbeg[1] as the previous character for simplicity.
+          This may erroneously interpret bytes 0x80..0x9F as previous context
+          head characters U+0080..U+009F. However, CLDR does not have any real
+          collations that use these characters as previous context heads.
+        */
+        scanner->page= 0;
+        scanner->code= (int) scanner->sbeg[1];
+        scanner->sbeg+= 2;
+        if ((weight= my_uca_scanner_set_weight(scanner, ww->weight)))
+        {
+          /*
+            TODO: add support for scanner_next_with_nchars and do this:
+            SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
+          */
+          return weight;
+        }
+        continue; /* Ignorable character */
+      }
+      /* 2 byte optimization is not applicable, go the slow path */
+    }
+#endif
+
+
     /* Get next character */
 #if MY_UCA_ASCII_OPTIMIZE
     /* Get next ASCII character */
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 8d8f70903ad..38d81910053 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -6549,7 +6549,8 @@ MY_UCA_INFO my_uca_v400=
         NULL     /*   flags           */
       },
       0,         /* levelno            */
-      {0}        /* contraction_hash   */
+      {0},       /* contraction_hash   */
+      NULL       /* booster            */
     },
     {
       0,
@@ -6561,7 +6562,8 @@ MY_UCA_INFO my_uca_v400=
         NULL
       },
       1,        /* levelno            */
-      {0}       /* contraction_hash   */
+      {0},      /* contraction_hash   */
+      NULL      /* booster            */
     },
     {0}
   },
@@ -30112,7 +30114,8 @@ MY_UCA_INFO my_uca_v520_th=
           NULL               /*   flags */
       },
       0,             /* levelno */
-      {0}            /* contraction_hash   */
+      {0},           /* contraction_hash   */
+      NULL           /* booster            */
     },
     {
       0x10FFFF,      /* maxchar */
@@ -30124,7 +30127,8 @@ MY_UCA_INFO my_uca_v520_th=
           NULL                  /*   flags */
       },
       1,             /* levelno */
-      {0}            /* contraction_hash   */
+      {0},           /* contraction_hash   */
+      NULL           /* booster            */
     },
     {0}
   },
@@ -30164,7 +30168,8 @@ MY_UCA_INFO my_uca_v520=
 	NULL         /*   flags           */
       },
       0,             /* levelno */
-      {0}            /* contraction_hash   */
+      {0},           /* contraction_hash   */
+      NULL           /* booster            */
     },
 
     {
@@ -30177,7 +30182,8 @@ MY_UCA_INFO my_uca_v520=
         NULL       /*   flags */
       },
       1,           /* levelno */
-      {0}          /* contraction_hash   */
+      {0},         /* contraction_hash   */
+      NULL         /* booster            */
     },
 
     {0}
@@ -30221,7 +30227,8 @@ static MY_UCA_INFO my_uca_v1400=
         NULL         /*   flags           */
       },
       0,             /* levelno */
-      {0}            /* contraction_hash   */
+      {0},           /* contraction_hash   */
+      NULL           /* booster            */
     },
 
     {
@@ -30234,7 +30241,8 @@ static MY_UCA_INFO my_uca_v1400=
         NULL         /*   flags */
       },
       1,             /* levelno */
-      {0}            /* contraction_hash   */
+      {0},           /* contraction_hash   */
+      NULL           /* booster            */
     },
 
     {
@@ -30247,7 +30255,8 @@ static MY_UCA_INFO my_uca_v1400=
         NULL         /*   flags */
       },
       2,             /* levelno */
-      {0}            /* contraction_hash   */
+      {0},           /* contraction_hash   */
+      NULL           /* booster            */
     }
 
   },
@@ -33947,8 +33956,522 @@ my_uca_generate_pages(MY_CHARSET_LOADER *loader,
 }
 
 
+static size_t
+my_uca_weight_cpy(uint16 *dst, const uint16 *src)
+{
+  const uint16 *src0= src;
+  for ( ; ; dst++, src++ )
+  {
+    *dst= *src;
+    if (!dst[0])
+      break;
+  }
+  return src - src0;
+}
+
+
+/*
+  The value 0xFFFF does not exist in UCA weights.
+  Let's use it to mark byte pairs that have complex
+  mapping.
+*/
+#define MY_UCA_2BYTES_NOT_APPLICABLE 0xFFFF
+
+
+static inline my_bool
+my_uca_2bytes_item_is_applicable(const MY_UCA_2BYTES_ITEM *w2)
+{
+  return w2->weight[1] != MY_UCA_2BYTES_NOT_APPLICABLE;
+}
+
+
+static void
+my_uca_2bytes_item_set_not_applicable(MY_UCA_2BYTES_ITEM *dst)
+{
+  dst->weight[0]= 0;
+  dst->weight[1]= MY_UCA_2BYTES_NOT_APPLICABLE;
+}
+
+
+/* Calculate the length of a 0-terminated weight string */
+static inline size_t
+my_uca_weight_length(const uint16 *str)
+{
+  uint res;
+  for (res= 0; str[res] ; res++)
+  { }
+  return res;
+}
+
+
+/*
+  Copy a 0-terminated weight string if it fits,
+  otherwise mark the byte pair as not applicable for optimization.
+*/
+static void
+my_uca_2bytes_item_weight_cpy(MY_UCA_2BYTES_ITEM *dst, const uint16 *src)
+{
+  size_t wlen= my_uca_weight_length(src);
+  if (wlen + 1 > array_elements(dst->weight))
+    my_uca_2bytes_item_set_not_applicable(dst);
+  else
+    my_uca_weight_cpy(dst->weight, src);
+}
+
+
+/*
+  Concatenate two 0-terminated weight strings if they fit together,
+  otherwise mark the byte pair as not applicable for optimization.
+*/
+static void
+my_uca_2bytes_item_weight_cpy2(MY_UCA_2BYTES_ITEM *dst,
+                               const uint16 *wa,
+                               const uint16 *wb)
+{
+  size_t la= my_uca_weight_length(wa);
+  size_t lb= my_uca_weight_length(wb);
+  if (la + lb + 1 > array_elements(dst->weight))
+  {
+    my_uca_2bytes_item_set_not_applicable(dst);
+  }
+  else
+  {
+    my_uca_weight_cpy(dst->weight, wa);
+    my_uca_weight_cpy(dst->weight + la, wb);
+  }
+}
+
+
+/*
+  Contatenate weights of two ASCII characters if they fit together,
+  otherwise mark the byte pair as not applicable for optimization.
+*/
+static void
+my_uca_2bytes_item_set_ascii2(MY_UCA_2BYTES_ITEM *dst,
+                              const MY_UCA_WEIGHT_LEVEL *level,
+                              uchar a, uchar b)
+{
+  const uint16 *wa= level->weights[0] + (uint) a * level->lengths[0];
+  const uint16 *wb= level->weights[0] + (uint) b * level->lengths[0];
+  my_uca_2bytes_item_weight_cpy2(dst, wa, wb);
+}
+
+
+/*
+  Check if two bytes make a well-formed 2-byte character.
+  Copy its weight if it fits.
+  If the two bytes do not make a well-formed 2-byte character,
+  or the weight of a valid 2-byte character is too long, then
+  mark this byte pair as not applicable for optimization.
+*/
+static  void
+my_uca_2bytes_item_set_non_ascii2(MY_UCA_2BYTES_ITEM *dst,
+                                  const MY_UCA_WEIGHT_LEVEL *level,
+                                  CHARSET_INFO *cs,
+                                  uchar a, uchar b)
+{
+  uchar ch[2]= {a, b};
+  my_wc_t wc;
+  int rc= my_ci_mb_wc(cs, &wc, &ch[0], &ch[2]);
+  if (rc == 2)
+  {
+    /* Byte sequence 'ab' make one valid 2-byte character */
+    uint pageno= wc>>8;
+    const uint16 *w= level->weights[pageno] + (wc & 0xFF) * level->lengths[pageno];
+    my_uca_2bytes_item_weight_cpy(dst, w);
+  }
+  else
+  {
+    my_uca_2bytes_item_set_not_applicable(dst);
+  }
+}
+
+
+static inline MY_UCA_2BYTES_ITEM *
+my_uca_level_booster_2bytes_item_addr(MY_UCA_LEVEL_BOOSTER *booster,
+                                      uchar a, uchar b)
+{
+  size_t w2offs= a * 256 + b;
+  return &booster->weight_strings_2bytes[w2offs];
+}
+
+
+static inline const MY_UCA_2BYTES_ITEM *
+my_uca_level_booster_2bytes_item_addr_const(const MY_UCA_LEVEL_BOOSTER *booster,
+                                            uchar a, uchar b)
+{
+  size_t w2offs= a * 256 + b;
+  return &booster->weight_strings_2bytes[w2offs];
+}
+
+
+static inline const MY_UCA_WEIGHT2 *
+my_uca_level_booster_simple_weight2_addr_const(
+                                        const MY_UCA_LEVEL_BOOSTER *booster,
+                                        uchar a, uchar b)
+{
+  uint offs= (uint) a * 256 + b;
+  return &booster->weight_strings_2bytes_to_1_or_2_weights[offs];
+}
+
+
+static void
+my_uca_level_booster_2bytes_disable2(MY_UCA_LEVEL_BOOSTER *booster,
+                                     uchar a, uchar b)
+{
+  MY_UCA_2BYTES_ITEM *dst= my_uca_level_booster_2bytes_item_addr(booster, a, b);
+  my_uca_2bytes_item_set_not_applicable(dst);
+}
+
+
+static void
+my_uca_level_booster_2bytes_disable_if_2byte_mb(MY_UCA_LEVEL_BOOSTER *booster,
+                                                CHARSET_INFO *cs,
+                                                my_wc_t wc)
+{
+  uchar tmp[MY_CS_MBMAXLEN];
+  int rc= my_ci_wc_mb(cs, wc, tmp, tmp + sizeof(tmp));
+  if (rc == 2)
+    my_uca_level_booster_2bytes_disable2(booster, tmp[0], tmp[1]);
+}
+
+
+static inline void
+my_uca_level_booster_2bytes_set_not_applicable_by_tail(
+                                                 MY_UCA_LEVEL_BOOSTER *booster,
+                                                 uchar tail)
+{
+  uint head;
+  for (head= 0; head < 256; head++)
+    my_uca_level_booster_2bytes_disable2(booster, (uchar) head, tail);
+}
+
+
+/*
+  Mark all byte pairs whose weight depend on the surrounding context
+  because of the given true contraction.
+*/
+static void
+my_uca_level_booster_2bytes_disable_contraction(MY_UCA_LEVEL_BOOSTER *booster,
+                                                const MY_CONTRACTION *c,
+                                                CHARSET_INFO *cs)
+{
+  /* Previous context sequences are handled by a separate routine */
+  DBUG_ASSERT(!c->with_context);
+
+  if (c->ch[0] < 0x80)
+  {
+    /*
+      2-byte pairs that end with an ASCII contraction head.
+      ...xAB...
+      Suppose AB is a contraction where A is an ASCII character.
+      Disable byte pairs xA (for all x=0x00..0xFF).
+    */
+    my_uca_level_booster_2bytes_set_not_applicable_by_tail(booster,
+                                                           (uchar) c->ch[0]);
+
+    /*
+      Disable 2-byte ASCII combinations that start
+      3-character (or longer) contractions.
+    */
+    if (c->ch[1] < 0x80 && c->ch[2] != 0)
+    {
+      /*
+         A 3+ character contraction that starts with two ASCII characters:
+           ...ABx...
+      */
+      my_uca_level_booster_2bytes_disable2(booster,
+                                           (uchar) c->ch[0],
+                                           (uchar) c->ch[1]);
+    }
+  }
+  else
+  {
+    /*
+      Disable 2-byte characters that start contractions:
+        ...[Aa][B]...    MB    +  ASCII
+        ...[Aa][Bb]..    MB    +  MB2
+        ...[Aa][Bbb]..   MB    +  MB3
+        ...[Aa][Bbbb]..  MB    +  MB4
+      The weight of the character [Aa] depends on what goes after it.
+    */
+    my_uca_level_booster_2bytes_disable_if_2byte_mb(booster, cs, c->ch[0]);
+  }
+}
+
+
+/*
+  Mark all byte pairs whose weight depend on the surrounding context
+  because of the given previous context sequence.
+*/
+static void
+my_uca_level_booster_2bytes_disable_previous_context(
+                                                 MY_UCA_LEVEL_BOOSTER *booster,
+                                                 const MY_CONTRACTION *c,
+                                                 CHARSET_INFO *cs)
+{
+  /* True contractions are handled by a separate routine */
+  DBUG_ASSERT(c->with_context);
+
+  if (c->ch[0] < 0x80 && c->ch[1] < 0x80)
+  {
+    DBUG_ASSERT(c->ch[2] == 0);
+    if (c->ch[2] == 0)
+    {
+      /*
+        A previous context pair with exactly two ASCII characters:
+          ...AB...
+        "A" is a look-behind character (the context).
+        "B" is a character that we need to generate a weight for.
+        The underlying code does not support handling these character
+        in a single shot yet. It works as follows at the moment:
+        - A is scanned separately from B and generates its independent weight.
+        - B is scanned separately on the next step and and generates its
+          context dependent weight (by looking behind).
+      */
+      my_uca_level_booster_2bytes_disable2(booster,
+                                           (uchar) c->ch[0],
+                                           (uchar) c->ch[1]);
+    }
+  }
+  else
+  {
+    /*
+      Disable 2-byte characters that start pairs with a previous context:
+        ...[Aa][B]...    MB    +  ASCII
+        ...[Aa][Bb]..    MB    +  MB
+      These characters can be actually scanned in a single shot,
+      but the relevant code in scanner_next() assumes previous context
+      head characters are ASCII only, so it sets the previous
+      character simply as sbeg[1].
+    */
+    my_uca_level_booster_2bytes_disable_if_2byte_mb(booster, cs, c->ch[0]);
+  }
+}
+
+
+/*
+  Set the weight of a 2-byte sequence,
+  or mark the sequence as not applicable for optimization.
+*/
+static void
+my_uca_2bytes_item_set_pair(MY_UCA_2BYTES_ITEM *dst,
+                            const MY_UCA_WEIGHT_LEVEL *level,
+                            CHARSET_INFO *cs,
+                            uchar a, uchar b)
+{
+  if (a < 0x80 && b < 0x80)
+    my_uca_2bytes_item_set_ascii2(dst, level, a, b);
+  else
+    my_uca_2bytes_item_set_non_ascii2(dst, level, cs, a, b);
+}
+
+
+/*
+  For every byte pair [00..FF][00..FF] set its weight,
+  or mark it as not applicable for optimization.
+*/
+static void
+my_uca_level_booster_2bytes_populate_pairs(MY_UCA_LEVEL_BOOSTER *booster,
+                                           const MY_UCA_WEIGHT_LEVEL *level,
+                                           CHARSET_INFO *cs)
+{
+  uint a, b;
+  for (a= 0; a < 256; a++)
+  {
+    for (b= 0; b < 256; b++)
+    {
+      MY_UCA_2BYTES_ITEM *dst;
+      dst= my_uca_level_booster_2bytes_item_addr(booster, (uchar) a, (uchar) b);
+      my_uca_2bytes_item_set_pair(dst, level, cs, (uchar) a, (uchar) b);
+    }
+  }
+}
+
+
+/*
+  Populate contractions consisting of two ASCII letters.
+  Only true contractions are handled here so far.
+  Previous context pairs are handled separately.
+*/
+static void
+my_uca_level_booster_2bytes_pupulate_ascii2_contractions(
+                                                 MY_UCA_LEVEL_BOOSTER *booster,
+                                                 const MY_CONTRACTIONS *list)
+{
+  size_t i;
+  for (i= 0; i < list->nitems; i++)
+  {
+    const MY_CONTRACTION *c= &list->item[i];
+    if (c->ch[0] < 0x80 && c->ch[1] < 0x80 && c->ch[2] == 0 &&
+        !c->with_context)
+    {
+      MY_UCA_2BYTES_ITEM *dst;
+      dst= my_uca_level_booster_2bytes_item_addr(booster,
+                                                 (uchar) c->ch[0],
+                                                 (uchar) c->ch[1]);
+      my_uca_2bytes_item_weight_cpy(dst, c->weight);
+    }
+  }
+}
+
+
+/*
+  Mark all byte pairs whose weight depend on the context
+  (because of contractions and previous context sequences)
+  as not applicable for optimization.
+*/
+static void
+my_uca_level_booster_2bytes_disable_context_dependent(
+                                              MY_UCA_LEVEL_BOOSTER *booster,
+                                              const MY_CONTRACTIONS *list,
+                                              CHARSET_INFO *cs)
+{
+  size_t i;
+  for (i= 0; i < list->nitems; i++)
+  {
+    const MY_CONTRACTION *c= &list->item[i];
+    if (c->with_context)
+      my_uca_level_booster_2bytes_disable_previous_context(booster, c, cs);
+    else
+      my_uca_level_booster_2bytes_disable_contraction(booster, c, cs);
+  }
+}
+
+
+/*
+  Populate the array of MY_UCA_WEIGHT2 for all possible byte pairs {a,b}
+  as follows:
+
+  Number of characters        Number of weights                      WEIGHT2
+  --------------------        -----------------                      ------
+  2 (two ASCII chars)         0  (both ignorable)                    {0,0} [IGN]
+  2 (two ASCII chars)         1  (e.g. Czech "ch")                   {X,0}
+  2 (two ASCII chars)         1  (e.g. ignorable + non-ignorable)    {X,0}
+  2 (two ASCII chars)         2  (two ASCII chars, one weigth each)  {X,0}
+  2 (two ASCII chars)         3+ (contraction with a long expansion) {0,0} [E3]
+  1 (one 2-byte char)         0  (ignorable)                         {0,0} [IGN]
+  1 (one 2-byte char)         1                                      {X,0}
+  1 (one 2-byte char)         2  (short expansion, e.g. German SZ)   {X,Y}
+  1 (one 2-byte char)         3+ (long expansion)                    {0,0} [E3]
+  0 (incomplete 3/4-byte char)                                       {0,0} [INC]
+
+  All byte pairs that depend on the context (e.g. contraction parts)
+  and that were previously marked as such by
+  my_uca_level_booster_2bytes_disable_context_dependent()
+  set WEIGHT2 to {0,0} [CTX].
+
+  After the initialization, the array contains non-zero weights for
+  the most typical simple cases of mapping from 2-bytes to weights,
+  so inside strnncoll*() we can skip equal string prefixes much faster,
+  using a cheaper simpler code.
+*/
+static void
+my_uca_level_booster_weight2_populate(MY_UCA_LEVEL_BOOSTER *booster)
+{
+  size_t i;
+  for (i= 0; i < 0x10000; i++)
+  {
+    MY_UCA_WEIGHT2 *dst= &booster->weight_strings_2bytes_to_1_or_2_weights[i];
+    MY_UCA_2BYTES_ITEM *src= &booster->weight_strings_2bytes[i];
+    if (src->weight[0] && (!src->weight[1] || !src->weight[2]))
+    {
+      /*
+        Simplest mapping:
+        - Two ASCII characters make one or two weights
+        - One 2-byte character makes one or two weights
+        Handled by the simpler loop at the comparison time.
+      */
+      dst->weight[0]= src->weight[0];
+      dst->weight[1]= src->weight[1];
+    }
+    else
+    {
+      /*
+        More complex mapping:
+        - Ignorable                                 - see [IGN] above
+        - More than two weights                     - see [E3]  above
+        - Incomplete (a 3-byte or 4-byte char head) - see [INC] above
+        - Not applicable (context dependent)        - see [CTX] above
+        Handled by the full-featured slower loop at the comparison time.
+      */
+      dst->weight[0]= 0;
+      dst->weight[1]= 0;
+    }
+  }
+}
+
+
+static void
+my_uca_level_booster_populate(MY_UCA_LEVEL_BOOSTER *dst,
+                              const MY_UCA_WEIGHT_LEVEL *src,
+                              CHARSET_INFO *cs)
+{
+  my_uca_level_booster_2bytes_populate_pairs(dst, src, cs);
+  my_uca_level_booster_2bytes_pupulate_ascii2_contractions(dst,
+                                                           &src->contractions);
+  my_uca_level_booster_2bytes_disable_context_dependent(dst,
+                                                        &src->contractions,
+                                                        cs);
+  my_uca_level_booster_weight2_populate(dst);
+}
+
+
+static MY_UCA_LEVEL_BOOSTER *
+my_uca_level_booster_alloc(MY_CHARSET_LOADER *loader)
+{
+  size_t nbytes= sizeof(MY_UCA_LEVEL_BOOSTER);
+  MY_UCA_LEVEL_BOOSTER *res;
+  if (!(res= (MY_UCA_LEVEL_BOOSTER *) (loader->once_alloc)(nbytes)))
+    return NULL;
+  bzero(res, nbytes);
+  return res;
+}
+
+
+static MY_UCA_LEVEL_BOOSTER *
+my_uca_level_booster_new(MY_CHARSET_LOADER *loader,
+                         CHARSET_INFO *cs,
+                         MY_UCA_WEIGHT_LEVEL *level)
+{
+  MY_UCA_LEVEL_BOOSTER *res;
+  if (!(res= my_uca_level_booster_alloc(loader)))
+    return NULL;
+  my_uca_level_booster_populate(res, level, cs);
+  return res;
+}
+
+
+/*
+  Skip the simple equal prefix of two string using
+  "One or two bytes produce one or two weights" optimization.
+  Return the prefix length.
+*/
+static size_t
+my_uca_level_booster_equal_prefix_length(const MY_UCA_LEVEL_BOOSTER *booster,
+                                         const uchar *s, size_t slen,
+                                         const uchar *t, size_t tlen)
+{
+  const uchar *s0= s;
+  size_t simple_count= MY_MIN(slen, tlen) >> 1;
+  for ( ; simple_count; s+= 2, t+= 2, simple_count--)
+  {
+    const MY_UCA_WEIGHT2 *ws, *wt;
+    ws= my_uca_level_booster_simple_weight2_addr_const(booster, s[0], s[1]);
+    wt= my_uca_level_booster_simple_weight2_addr_const(booster, t[0], t[1]);
+    if (ws->weight[0] &&
+        ws->weight[0] == wt->weight[0] &&
+        ws->weight[1] == wt->weight[1])
+      continue;
+    break;
+  }
+  return s - s0;
+}
+
+
 static my_bool
-init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
+init_weight_level(MY_CHARSET_LOADER *loader, CHARSET_INFO *cs,
+                  MY_COLL_RULES *rules,
                   MY_UCA_WEIGHT_LEVEL *dst, const MY_UCA_WEIGHT_LEVEL *src)
 {
   MY_COLL_RULE *r, *rlast;
@@ -34055,6 +34578,9 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
     }
   }
 
+  if (cs->mbminlen == 1)
+    dst->booster= my_uca_level_booster_new(loader, cs, dst);
+
   return FALSE;
 }
 
@@ -34151,7 +34677,7 @@ my_uca_init_levels(MY_CHARSET_LOADER *loader, MY_UCA_INFO *dst,
                   cs->coll_name.str, i + 1);
       return TRUE;
     }
-    if (init_weight_level(loader, rules,
+    if (init_weight_level(loader, cs, rules,
                           &dst->level[i], &src->level[i]))
       return TRUE;
   }
diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic
index f0855355a92..f3d543be1b1 100644
--- a/strings/ctype-uca.ic
+++ b/strings/ctype-uca.ic
@@ -95,6 +95,15 @@ MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
   my_uca_scanner tscanner;
   int s_res;
   int t_res;
+
+#if MY_UCA_ASCII_OPTIMIZE
+{
+  size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
+                                                          s, slen, t, tlen);
+  s+= prefix, slen-= prefix;
+  t+= prefix, tlen-= prefix;
+}
+#endif
   
   my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
   my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
@@ -204,6 +213,15 @@ MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
   my_uca_scanner sscanner, tscanner;
   int s_res, t_res;
 
+#if MY_UCA_ASCII_OPTIMIZE
+{
+  size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
+                                                          s, slen, t, tlen);
+  s+= prefix, slen-= prefix;
+  t+= prefix, tlen-= prefix;
+}
+#endif
+
   my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
   my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
 
@@ -432,6 +450,18 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
   size_t s_nchars_left= nchars;
   size_t t_nchars_left= nchars;
 
+/*
+TODO: strnncollsp_nchars_onelevel
+#if MY_UCA_ASCII_OPTIMIZE
+{
+  size_t prefix= my_uca_level_booster_equal_prefix_length(level->booster,
+                                                          s, slen, t, tlen);
+  s+= prefix, slen-= prefix;
+  t+= prefix, tlen-= prefix;
+}
+#endif
+*/
+
   my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
   my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
 
diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c
index 7532244b0a2..9636634fb8e 100644
--- a/unittest/strings/strings-t.c
+++ b/unittest/strings/strings-t.c
@@ -1341,7 +1341,7 @@ strnncollsp_char_one(CHARSET_INFO *cs, const STRNNCOLLSP_CHAR_PARAM *p)
   str2hex(ahex, sizeof(ahex), p->a.str, p->a.length);
   str2hex(bhex, sizeof(bhex), p->b.str, p->b.length);
   diag("%-25s %-12s %-12s %3d %7d %7d%s",
-       cs->cs_name.str, ahex, bhex, (int) p->nchars, p->res, res,
+       cs->coll_name.str, ahex, bhex, (int) p->nchars, p->res, res,
        eqres(res, p->res) ? "" : " FAILED");
   if (!eqres(res, p->res))
   {