11 files changed, 2260 insertions, 1414 deletions
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 9efd7242118..8368e33cc1d 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -31158,17 +31158,6 @@ typedef struct my_uca_scanner_st
   CHARSET_INFO *cs;
 } my_uca_scanner;
 
-/*
-  Charset dependent scanner part, to optimize
-  some character sets.
-*/
-typedef struct my_uca_scanner_handler_st 
-{
-  void (*init)(my_uca_scanner *scanner, CHARSET_INFO *cs,
-               const MY_UCA_WEIGHT_LEVEL *level,
-               const uchar *str, size_t length);
-  int (*next)(my_uca_scanner *scanner);
-} my_uca_scanner_handler;
 
 static const uint16 nochar[]= {0,0};
 
@@ -31421,6 +31410,28 @@ my_uca_can_be_previous_context_tail(const MY_CONTRACTIONS *list, my_wc_t wc)
 
 
 /**
+  Check if a character needs previous/next context handling:
+  - can be a previois context tail
+  - can be a contraction start
+
+  @param level    Pointer to an UCA weight level data
+  @param wc       Code point
+
+  @return
+  @retval   FALSE - does not need context handling
+  @retval   TRUE  - needs context handing
+*/
+
+static inline my_bool
+my_uca_needs_context_handling(const MY_UCA_WEIGHT_LEVEL *level, my_wc_t wc)
+{
+  return level->contractions.nitems > 0 &&
+         level->contractions.flags[wc & MY_UCA_CNT_FLAG_MASK] &
+         (MY_UCA_PREVIOUS_CONTEXT_TAIL | MY_UCA_CNT_HEAD);
+}
+
+
+/**
   Compare two wide character strings, wide analog to strncmp().
 
   @param a      Pointer to the first string
@@ -31554,6 +31565,60 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
   return NULL;
 }
 
+
+/*
+  Find a context dependent weight of a character.
+  @param scanner - UCA weight scanner. The caller should set
+                   its members "page" and "code" to the previous character
+                   (or to zeros if there is no a previous character).
+  @param wc      - an array of wide characters which has at least
+                   MY_UCA_MAX_CONTRACTION elements, where wc[0] is set
+                   to the current character (whose weight is being resolved).
+                   The values of wc[i>0] is not important, but if wc[0]
+                   appears to be a known contraction head, the function
+                   will collect further contraction parts into wc[i>0].
+                   If wc[0] and the previous character make a previous context
+                   pair, then wc[1] is set to the previous character.
+
+  @retval          NULL if could not find any contextual weights for wc[0]
+  @retval          non null pointer to a zero-terminated weight string otherwise
+*/
+static inline uint16 *
+my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
+{
+  uint16 *cweight;
+  DBUG_ASSERT(scanner->level->contractions.nitems);
+  /*
+    If we have scanned a character which can have previous context,
+    and there were some more characters already before,
+    then reconstruct codepoint of the previous character
+    from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
+    together form a real previous context pair.
+    Note, we support only 2-character long sequences with previous
+    context at the moment. CLDR does not have longer sequences.
+  */
+  if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
+                                          wc[0]) &&
+      scanner->wbeg != nochar &&     /* if not the very first character */
+      my_uca_can_be_previous_context_head(&scanner->level->contractions,
+                                          (wc[1]= ((scanner->page << 8) +
+                                                    scanner->code))) &&
+      (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
+  {
+    scanner->page= scanner->code= 0; /* Clear for the next character */
+    return cweight;
+  }
+  else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
+                                          wc[0]))
+  {
+    /* Check if w[0] starts a contraction */
+    if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
+      return cweight;
+  }
+  return NULL;
+}
+
+
 /****************************************************************/
 
 /**
@@ -31675,223 +31740,6 @@ my_uca_scanner_init_any(my_uca_scanner *scanner,
   scanner->cs= cs;
 }
 
-static int my_uca_scanner_next_any(my_uca_scanner *scanner)
-{
-  /* 
-    Check if the weights for the previous character have been
-    already fully scanned. If yes, then get the next character and 
-    initialize wbeg and wlength to its weight string.
-  */
-
-  if (scanner->wbeg[0])      /* More weights left from the previous step: */
-    return *scanner->wbeg++; /* return the next weight from expansion     */
-
-  do
-  {
-    const uint16 *wpage;
-    my_wc_t wc[MY_UCA_MAX_CONTRACTION];
-    int mblen;
-
-    /* Get next character */
-    if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, wc,
-                                          scanner->sbeg,
-                                          scanner->send)) <= 0))
-    {
-      if (scanner->sbeg >= scanner->send)
-        return -1; /* No more bytes, end of line reached */
-      /*
-        There are some more bytes left. Non-positive mb_len means that
-        we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
-      */
-      if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
-      {
-        /* For safety purposes don't go beyond the string range. */
-        scanner->sbeg= scanner->send;
-      }
-      /*
-        Treat every complete or incomplete mbminlen unit as a weight which is
-        greater than weight for any possible normal character.
-        0xFFFF is greater than any possible weight in the UCA weight table.
-      */
-      return 0xFFFF;
-    }
-
-    scanner->sbeg+= mblen;
-    if (wc[0] > scanner->level->maxchar)
-    {
-      /* Return 0xFFFD as weight for all characters outside BMP */
-      scanner->wbeg= nochar;
-      return 0xFFFD;
-    }
-
-    if (my_uca_have_contractions_quick(scanner->level))
-    {
-      uint16 *cweight;
-      /*
-        If we have scanned a character which can have previous context,
-        and there were some more characters already before,
-        then reconstruct codepoint of the previous character
-        from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
-        together form a real previous context pair.
-        Note, we support only 2-character long sequences with previous
-        context at the moment. CLDR does not have longer sequences.
-      */
-      if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
-                                              wc[0]) &&
-          scanner->wbeg != nochar &&     /* if not the very first character */
-          my_uca_can_be_previous_context_head(&scanner->level->contractions,
-                                              (wc[1]= ((scanner->page << 8) +
-                                                        scanner->code))) &&
-          (cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
-      {
-        scanner->page= scanner->code= 0; /* Clear for the next character */
-        return *cweight;
-      }
-      else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
-                                              wc[0]))
-      {
-        /* Check if w[0] starts a contraction */
-        if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
-          return *cweight;
-      }
-    }
-
-    /* Process single character */
-    scanner->page= wc[0] >> 8;
-    scanner->code= wc[0] & 0xFF;
-
-    /* If weight page for w[0] does not exist, then calculate algoritmically */
-    if (!(wpage= scanner->level->weights[scanner->page]))
-      return my_uca_scanner_next_implicit(scanner);
-
-    /* Calculate pointer to w[0]'s weight, using page and offset */
-    scanner->wbeg= wpage +
-                   scanner->code * scanner->level->lengths[scanner->page];
-  } while (!scanner->wbeg[0]); /* Skip ignorable characters */
-
-  return *scanner->wbeg++;
-}
-
-
-static my_uca_scanner_handler my_any_uca_scanner_handler=
-{
-  my_uca_scanner_init_any,
-  my_uca_scanner_next_any
-};
-
-/*
-  Compares two strings according to the collation
-
-  SYNOPSIS:
-    my_strnncoll_uca()
-    cs		Character set information
-    s		First string
-    slen	First string length
-    t		Second string
-    tlen	Seconf string length
-    level	DUCETweight level
-  
-  NOTES:
-    Initializes two weight scanners and gets weights
-    corresponding to two strings in a loop. If weights are not
-    the same at some step then returns their difference.
-    
-    In the while() comparison these situations are possible:
-    1. (s_res>0) and (t_res>0) and (s_res == t_res)
-       Weights are the same so far, continue comparison
-    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
-       A difference has been found, return.
-    3. (s_res>0) and (t_res<0)
-       We have reached the end of the second string, or found
-       an illegal multibyte sequence in the second string.
-       Return a positive number, i.e. the first string is bigger.
-    4. (s_res<0) and (t_res>0)   
-       We have reached the end of the first string, or found
-       an illegal multibyte sequence in the first string.
-       Return a negative number, i.e. the second string is bigger.
-    5. (s_res<0) and (t_res<0)
-       Both scanners returned -1. It means we have riched
-       the end-of-string of illegal-sequence in both strings
-       at the same time. Return 0, strings are equal.
-    
-  RETURN
-    Difference between two strings, according to the collation:
-    0               - means strings are equal
-    negative number - means the first string is smaller
-    positive number - means the first string is bigger
-*/
-
-static int my_strnncoll_uca_onelevel(CHARSET_INFO *cs, 
-                                     my_uca_scanner_handler *scanner_handler,
-                                     const MY_UCA_WEIGHT_LEVEL *level,
-                                     const uchar *s, size_t slen,
-                                     const uchar *t, size_t tlen,
-                                     my_bool t_is_prefix)
-{
-  my_uca_scanner sscanner;
-  my_uca_scanner tscanner;
-  int s_res;
-  int t_res;
-  
-  scanner_handler->init(&sscanner, cs, level, s, slen);
-  scanner_handler->init(&tscanner, cs, level, t, tlen);
-  
-  do
-  {
-    s_res= scanner_handler->next(&sscanner);
-    t_res= scanner_handler->next(&tscanner);
-  } while ( s_res == t_res && s_res >0);
-  
-  return  (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
-}
-
-static int my_strnncoll_uca(CHARSET_INFO *cs, 
-                            my_uca_scanner_handler *scanner_handler,
-                            const uchar *s, size_t slen,
-                            const uchar *t, size_t tlen,
-                            my_bool t_is_prefix)
-{
-  return my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
-                                   s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncoll_uca_multilevel(CHARSET_INFO *cs, 
-                                       my_uca_scanner_handler *scanner_handler,
-                                       const uchar *s, size_t slen,
-                                       const uchar *t, size_t tlen,
-                                       my_bool t_is_prefix)
-{
-  uint num_level= cs->levels_for_order;
-  uint i;
-  for (i= 0; i != num_level; i++)
-  {
-    int ret= my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[i],
-                                       s, slen, t, tlen, t_is_prefix);
-    if (ret)
-       return ret;
-  }
-  return 0;
-}
-
-
-static int
-my_strnncollsp_generic_uca_nopad_multilevel(CHARSET_INFO *cs,
-                                            const uchar *s, size_t slen,
-                                            const uchar *t, size_t tlen)
-{
-  uint num_level= cs->levels_for_order;
-  uint i;
-  for (i= 0; i != num_level; i++)
-  {
-    int ret= my_strnncoll_uca_onelevel(cs, &my_any_uca_scanner_handler,
-                                       &cs->uca->level[i],
-                                       s, slen, t, tlen, FALSE);
-    if (ret)
-       return ret;
-  }
-  return 0;
-}
-
 
 static inline int
 my_space_weight(const MY_UCA_WEIGHT_LEVEL *level)
@@ -31924,258 +31772,6 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc)
 }
 
 
-/*
-  Compares two strings according to the collation,
-  ignoring trailing spaces.
-
-  SYNOPSIS:
-    my_strnncollsp_uca()
-    cs		Character set information
-    s		First string
-    slen	First string length
-    t		Second string
-    tlen	Seconf string length
-    level	DUCETweight level
-  
-  NOTES:
-    Works exactly the same with my_strnncoll_uca(),
-    but ignores trailing spaces.
-
-    In the while() comparison these situations are possible:
-    1. (s_res>0) and (t_res>0) and (s_res == t_res)
-       Weights are the same so far, continue comparison
-    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
-       A difference has been found, return.
-    3. (s_res>0) and (t_res<0)
-       We have reached the end of the second string, or found
-       an illegal multibyte sequence in the second string.
-       Compare the first string to an infinite array of
-       space characters until difference is found, or until
-       the end of the first string.
-    4. (s_res<0) and (t_res>0)   
-       We have reached the end of the first string, or found
-       an illegal multibyte sequence in the first string.
-       Compare the second string to an infinite array of
-       space characters until difference is found or until
-       the end of the second steing.
-    5. (s_res<0) and (t_res<0)
-       Both scanners returned -1. It means we have riched
-       the end-of-string of illegal-sequence in both strings
-       at the same time. Return 0, strings are equal.
-  
-  RETURN
-    Difference between two strings, according to the collation:
-    0               - means strings are equal
-    negative number - means the first string is smaller
-    positive number - means the first string is bigger
-*/
-
-static int my_strnncollsp_uca_onelevel(CHARSET_INFO *cs, 
-                                       my_uca_scanner_handler *scanner_handler,
-                                       const MY_UCA_WEIGHT_LEVEL *level,
-                                       const uchar *s, size_t slen,
-                                       const uchar *t, size_t tlen)
-{
-  my_uca_scanner sscanner, tscanner;
-  int s_res, t_res;
-  
-  scanner_handler->init(&sscanner, cs, level, s, slen);
-  scanner_handler->init(&tscanner, cs, level, t, tlen);
-  
-  do
-  {
-    s_res= scanner_handler->next(&sscanner);
-    t_res= scanner_handler->next(&tscanner);
-  } while ( s_res == t_res && s_res >0);
-
-  if (s_res > 0 && t_res < 0)
-  { 
-    /* Calculate weight for SPACE character */
-    t_res= my_space_weight(level);
-      
-    /* compare the first string to spaces */
-    do
-    {
-      if (s_res != t_res)
-        return (s_res - t_res);
-      s_res= scanner_handler->next(&sscanner);
-    } while (s_res > 0);
-    return 0;
-  }
-    
-  if (s_res < 0 && t_res > 0)
-  {
-    /* Calculate weight for SPACE character */
-    s_res= my_space_weight(level);
-      
-    /* compare the second string to spaces */
-    do
-    {
-      if (s_res != t_res)
-        return (s_res - t_res);
-      t_res= scanner_handler->next(&tscanner);
-    } while (t_res > 0);
-    return 0;
-  }
-  
-  return ( s_res - t_res );
-}
-
-static int my_strnncollsp_uca(CHARSET_INFO *cs, 
-                              my_uca_scanner_handler *scanner_handler,
-                              const uchar *s, size_t slen,
-                              const uchar *t, size_t tlen)
-{
-  return my_strnncollsp_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
-                                     s, slen, t, tlen);
-}
-
-static int my_strnncollsp_uca_multilevel(CHARSET_INFO *cs, 
-                                       my_uca_scanner_handler *scanner_handler,
-                                       const uchar *s, size_t slen,
-                                       const uchar *t, size_t tlen)
-{
-  uint num_level= cs->levels_for_order;
-  uint i;
-  for (i= 0; i != num_level; i++)
-  {
-    int ret= my_strnncollsp_uca_onelevel(cs, scanner_handler,
-                                         &cs->uca->level[i], s, slen, t, tlen);
-    if (ret)
-      return ret;
-  }
-  return 0;
-}
-
-/*
-  Calculates hash value for the given string,
-  according to the collation, and ignoring trailing spaces.
-  
-  SYNOPSIS:
-    my_hash_sort_uca()
-    cs		Character set information
-    s		String
-    slen	String's length
-    n1		First hash parameter
-    n2		Second hash parameter
-  
-  NOTES:
-    Scans consequently weights and updates
-    hash parameters n1 and n2. In a case insensitive collation,
-    upper and lower case of the same letter will return the same
-    weight sequence, and thus will produce the same hash values
-    in n1 and n2.
-
-    This functions is used for one-level and for multi-level collations.
-    We intentionally use only primary level in multi-level collations.
-    This helps to have PARTITION BY KEY put primarily equal records
-    into the same partition. E.g. in utf8_thai_520_ci records that differ
-    only in tone marks go into the same partition.
-
-  RETURN
-    N/A
-*/
-
-static void my_hash_sort_uca(CHARSET_INFO *cs,
-                             my_uca_scanner_handler *scanner_handler,
-                             const uchar *s, size_t slen,
-                             ulong *nr1, ulong *nr2)
-{
-  int   s_res;
-  my_uca_scanner scanner;
-  int space_weight= my_space_weight(&cs->uca->level[0]);
-  register ulong m1= *nr1, m2= *nr2;
-
-  scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
-  
-  while ((s_res= scanner_handler->next(&scanner)) >0)
-  {
-    if (s_res == space_weight)
-    {
-      /* Combine all spaces to be able to skip end spaces */
-      uint count= 0;
-      do
-      {
-        count++;
-        if ((s_res= scanner_handler->next(&scanner)) <= 0)
-        {
-          /* Skip strings at end of string */
-          goto end;
-        }
-      }
-      while (s_res == space_weight);
-
-      /* Add back that has for the space characters */
-      do
-      {
-        /*
-          We can't use MY_HASH_ADD_16() here as we, because of a misstake
-          in the original code, where we added the 16 byte variable the
-          opposite way.  Changing this would cause old partitioned tables
-          to fail.
-        */
-        MY_HASH_ADD(m1, m2, space_weight >> 8);
-        MY_HASH_ADD(m1, m2, space_weight & 0xFF);
-      }
-      while (--count != 0);
-
-    }
-    /* See comment above why we can't use MY_HASH_ADD_16() */
-    MY_HASH_ADD(m1, m2, s_res >> 8);
-    MY_HASH_ADD(m1, m2, s_res & 0xFF);
-  }
-end:
-  *nr1= m1;
-  *nr2= m2;
-}
-
-
-static void my_hash_sort_uca_nopad(CHARSET_INFO *cs,
-                                   my_uca_scanner_handler *scanner_handler,
-                                   const uchar *s, size_t slen,
-                                   ulong *nr1, ulong *nr2)
-{
-  int   s_res;
-  my_uca_scanner scanner;
-  register ulong m1= *nr1, m2= *nr2;
-
-  scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
-
-  while ((s_res= scanner_handler->next(&scanner)) >0)
-  {
-    /* See comment above why we can't use MY_HASH_ADD_16() */
-    MY_HASH_ADD(m1, m2, s_res >> 8);
-    MY_HASH_ADD(m1, m2, s_res & 0xFF);
-  }
-  *nr1= m1;
-  *nr2= m2;
-}
-
-
-static uchar *
-my_strnxfrm_uca_onelevel_internal(CHARSET_INFO *cs,
-                                  my_uca_scanner_handler *scanner_handler,
-                                  MY_UCA_WEIGHT_LEVEL *level,
-                                  uchar *dst, uchar *de, uint *nweights,
-                                  const uchar *src, size_t srclen)
-{
-  my_uca_scanner scanner;
-  int s_res;
-
-  DBUG_ASSERT(src || !srclen);
-
-  scanner_handler->init(&scanner, cs, level, src, srclen);
-  for (; dst < de && *nweights &&
-         (s_res= scanner_handler->next(&scanner)) > 0 ; (*nweights)--)
-  {
-    *dst++= s_res >> 8;
-    if (dst < de)
-      *dst++= s_res & 0xFF;
-  }
-  return dst;
-}
-
-
 static uchar *
 my_strnxfrm_uca_padn(uchar *dst, uchar *de, uint nweights, int weight)
 {
@@ -32202,27 +31798,6 @@ my_strnxfrm_uca_pad(uchar *dst, uchar *de, int weight)
 }
 
 
-static uchar *
-my_strnxfrm_uca_onelevel(CHARSET_INFO *cs,
-                         my_uca_scanner_handler *scanner_handler,
-                         MY_UCA_WEIGHT_LEVEL *level,
-                         uchar *dst, uchar *de, uint nweights,
-                         const uchar *src, size_t srclen, uint flags)
-{
-  uchar *d0= dst;
-
-  dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
-                                         dst, de, &nweights,
-                                         src, srclen);
-  DBUG_ASSERT(dst <= de);
-  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
-    dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
-  DBUG_ASSERT(dst <= de);
-  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
-  return dst;
-}
-
-
 /*
   Return the minimum possible weight on a level.
 */
@@ -32233,136 +31808,6 @@ static uint min_weight_on_level(MY_UCA_WEIGHT_LEVEL *level)
 }
 
 
-static uchar *
-my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
-                              my_uca_scanner_handler *scanner_handler,
-                              MY_UCA_WEIGHT_LEVEL *level,
-                              uchar *dst, uchar *de, uint nweights,
-                              const uchar *src, size_t srclen, uint flags)
-{
-  uchar *d0= dst;
-
-  dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
-                                         dst, de, &nweights,
-                                         src, srclen);
-  DBUG_ASSERT(dst <= de);
-  /*  Pad with the minimum possible weight on this level */
-  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
-    dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
-  DBUG_ASSERT(dst <= de);
-  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
-  return dst;
-}
-
-
-/*
-  For the given string creates its "binary image", suitable
-  to be used in binary comparison, i.e. in memcmp(). 
-  
-  SYNOPSIS:
-    my_strnxfrm_uca()
-    cs		Character set information
-    dst		Where to write the image
-    dstlen	Space available for the image, in bytes
-    src		The source string
-    srclen	Length of the source string, in bytes
-  
-  NOTES:
-    In a loop, scans weights from the source string and writes
-    them into the binary image. In a case insensitive collation,
-    upper and lower cases of the same letter will produce the
-    same image subsequences. When we have reached the end-of-string
-    or found an illegal multibyte sequence, the loop stops.
-
-    It is impossible to restore the original string using its
-    binary image. 
-    
-    Binary images are used for bulk comparison purposes,
-    e.g. in ORDER BY, when it is more efficient to create
-    a binary image and use it instead of weight scanner
-    for the original strings for every comparison.
-  
-  RETURN
-    Number of bytes that have been written into the binary image.
-*/
-
-
-static size_t
-my_strnxfrm_uca(CHARSET_INFO *cs, 
-                my_uca_scanner_handler *scanner_handler,
-                uchar *dst, size_t dstlen, uint nweights,
-                const uchar *src, size_t srclen, uint flags)
-{
-  uchar *d0= dst;
-  uchar *de= dst + dstlen;
-
-  dst= my_strnxfrm_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
-                                dst, de, nweights, src, srclen, flags);
-  /*
-    This can probably be changed to memset(dst, 0, de - dst),
-    like my_strnxfrm_uca_multilevel() does.
-  */
-  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
-    dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
-  return dst - d0;
-}
-
-
-static size_t
-my_strnxfrm_uca_nopad(CHARSET_INFO *cs,
-                      my_uca_scanner_handler *scanner_handler,
-                      uchar *dst, size_t dstlen, uint nweights,
-                      const uchar *src, size_t srclen, uint flags)
-{
-  uchar *d0= dst;
-  uchar *de= dst + dstlen;
-
-  dst= my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler, &cs->uca->level[0],
-                                      dst, de, nweights, src, srclen, flags);
-  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
-  {
-    memset(dst, 0, de - dst);
-    dst= de;
-  }
-  return dst - d0;
-}
-
-
-static size_t
-my_strnxfrm_uca_multilevel(CHARSET_INFO *cs, 
-                           my_uca_scanner_handler *scanner_handler,
-                           uchar *dst, size_t dstlen, uint nweights,
-                           const uchar *src, size_t srclen, uint flags)
-{
-  uint num_level= cs->levels_for_order;
-  uchar *d0= dst;
-  uchar *de= dst + dstlen;
-  uint current_level;
-
-  for (current_level= 0; current_level != num_level; current_level++)
-  {
-    if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
-        (flags & (MY_STRXFRM_LEVEL1 << current_level)))
-      dst= cs->state & MY_CS_NOPAD ?
-           my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler,
-                                          &cs->uca->level[current_level],
-                                          dst, de, nweights,
-                                          src, srclen, flags) :
-           my_strnxfrm_uca_onelevel(cs, scanner_handler,
-                                    &cs->uca->level[current_level],
-                                    dst, de, nweights,
-                                    src, srclen, flags);
-  }
-
-  if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
-  {
-    memset(dst, 0, de - dst);
-    dst= de;
-  }
-
-  return dst - d0;
-}
-
 /*
   This function compares if two characters are the same.
   The sign +1 or -1 does not matter. The only
@@ -32568,6 +32013,23 @@ int my_wildcmp_uca(CHARSET_INFO *cs,
 
 
 /*
+  Tests if an optimized "no contraction" handler can be used for
+  the given collation.
+*/
+static my_bool
+my_uca_collation_can_optimize_no_contractions(CHARSET_INFO *cs)
+{
+  uint i;
+  for (i= 0; i < cs->levels_for_order ; i++)
+  {
+    if (my_uca_have_contractions_quick(&cs->uca->level[i]))
+      return FALSE;
+  }
+  return TRUE;
+}
+
+
+/*
   Collation language is implemented according to
   subset of ICU Collation Customization (tailorings):
   http://icu.sourceforge.net/userguide/Collate_Customization.html
@@ -34250,8 +33712,74 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
 }
 
 
-MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel;
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel;
+static my_bool
+create_tailoring(struct charset_info_st *cs,
+                          MY_CHARSET_LOADER *loader);
+
+static my_bool
+my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
+{
+  cs->pad_char= ' ';
+  cs->ctype= my_charset_utf8_unicode_ci.ctype;
+  if (!cs->caseinfo)
+    cs->caseinfo= &my_unicase_default;
+  return create_tailoring(cs, loader);
+}
+
+
+static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
+{
+  /* UCA uses 2 bytes per weight */
+  return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
+}
+
+static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
+{
+  return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
+}
+
+
+/*
+  This structure is used at the collation initialization time, to switch
+  from a full-featured collation handler to a "no contraction" collation
+  handler if the collation is known not to have any contractions.
+*/
+typedef struct
+{
+  MY_COLLATION_HANDLER *pad;
+  MY_COLLATION_HANDLER *nopad;
+  MY_COLLATION_HANDLER *multilevel_pad;
+  MY_COLLATION_HANDLER *multilevel_nopad;
+} MY_COLLATION_HANDLER_PACKAGE;
+
+
+static void my_uca_handler_map(struct charset_info_st *cs,
+                               const MY_COLLATION_HANDLER_PACKAGE *from,
+                               const MY_COLLATION_HANDLER_PACKAGE *to)
+{
+  if (cs->coll == from->pad)                   cs->coll= to->pad;
+  else if (cs->coll == from->nopad)            cs->coll= to->nopad;
+  else if (cs->coll == from->multilevel_pad)   cs->coll= to->multilevel_pad;
+  else if (cs->coll == from->multilevel_nopad) cs->coll= to->multilevel_nopad;
+}
+
+
+/*
+  Define generic collation handlers for multi-level collations with tailoring:
+
+    my_uca_collation_handler_nopad_multilevel_generic
+    my_uca_collation_handler_multilevel_generic
+
+  TODO: Use faster character-set specific versions of MY_COLLATION_HANDLER
+  instead of generic.
+*/
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _generic
+#define MY_MB_WC(scanner, wc, beg, end) (scanner->cs->cset->mb_wc(scanner->cs, wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
 
 
 /*
@@ -34336,8 +33864,8 @@ create_tailoring(struct charset_info_st *cs,
   cs->uca[0]= new_uca;
   if (cs->levels_for_order > 1)
     cs->coll= (cs->state & MY_CS_NOPAD) ?
-               &my_collation_generic_uca_nopad_handler_multilevel :
-               &my_collation_any_uca_handler_multilevel;
+               &my_uca_collation_handler_nopad_multilevel_generic :
+               &my_uca_collation_handler_multilevel_generic;
 
 ex:
   (loader->free)(rules.rule);
@@ -34346,235 +33874,17 @@ ex:
   return rc;
 }
 
-/*
-  Universal CHARSET_INFO compatible wrappers
-  for the above internal functions.
-  Should work for any character set.
-*/
-
-static my_bool
-my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
-{
-  cs->pad_char= ' ';
-  cs->ctype= my_charset_utf8_unicode_ci.ctype;
-  if (!cs->caseinfo)
-    cs->caseinfo= &my_unicase_default;
-  return create_tailoring(cs, loader);
-}
-
-
-static int my_strnncoll_any_uca(CHARSET_INFO *cs,
-                                const uchar *s, size_t slen,
-                                const uchar *t, size_t tlen,
-                                my_bool t_is_prefix)
-{
-  return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
-                          s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncoll_any_uca_multilevel(CHARSET_INFO *cs,
-                                           const uchar *s, size_t slen,
-                                           const uchar *t, size_t tlen,
-                                           my_bool t_is_prefix)
-{
-  return my_strnncoll_uca_multilevel(cs, &my_any_uca_scanner_handler,
-                                     s, slen, t, tlen, t_is_prefix);
-}
-
-static int my_strnncollsp_any_uca(CHARSET_INFO *cs,
-                                  const uchar *s, size_t slen,
-                                  const uchar *t, size_t tlen)
-{
-  return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
-}   
-
-
-static int my_strnncollsp_generic_uca_nopad(CHARSET_INFO *cs,
-                                            const uchar *s, size_t slen,
-                                            const uchar *t, size_t tlen)
-{
-  return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
-                          s, slen, t, tlen, FALSE);
-}
-
-
-static int my_strnncollsp_any_uca_multilevel(CHARSET_INFO *cs,
-                                      const uchar *s, size_t slen,
-                                      const uchar *t, size_t tlen)
-{
-  return my_strnncollsp_uca_multilevel(cs, &my_any_uca_scanner_handler,
-                                       s, slen, t, tlen);
-}
-
-static void my_hash_sort_any_uca(CHARSET_INFO *cs,
-                                 const uchar *s, size_t slen,
-                                 ulong *n1, ulong *n2)
-{
-  my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static void my_hash_sort_generic_uca_nopad(CHARSET_INFO *cs,
-                                           const uchar *s, size_t slen,
-                                           ulong *n1, ulong *n2)
-{
-  my_hash_sort_uca_nopad(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
-}
-
-static size_t my_strnxfrm_any_uca(CHARSET_INFO *cs, 
-                                  uchar *dst, size_t dstlen, uint nweights,
-                                  const uchar *src, size_t srclen, uint flags)
-{
-  return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
-                         dst, dstlen, nweights, src, srclen, flags);
-}
-
-static size_t my_strnxfrm_generic_uca_nopad(CHARSET_INFO *cs,
-                                            uchar *dst, size_t dstlen,
-                                            uint nweights,
-                                            const uchar *src, size_t srclen,
-                                            uint flags)
-{
-  return my_strnxfrm_uca_nopad(cs, &my_any_uca_scanner_handler,
-                               dst, dstlen, nweights, src, srclen, flags);
-}
-
-static size_t my_strnxfrm_any_uca_multilevel(CHARSET_INFO *cs, 
-                                             uchar *dst, size_t dstlen,
-                                             uint nweights, const uchar *src,
-                                             size_t srclen, uint flags)
-{
-  return my_strnxfrm_uca_multilevel(cs, &my_any_uca_scanner_handler,
-                                    dst, dstlen, nweights, src, srclen,
-                                    flags);
-}
-
-static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
-{
-  /* UCA uses 2 bytes per weight */
-  return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
-}
-
-static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
-{
-  return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
-}
-
-
-/* NO PAD handler for character sets with mbminlen==1 */
-MY_COLLATION_HANDLER my_collation_mb_uca_nopad_handler =
-{
-    my_coll_init_uca,
-    my_strnncoll_any_uca,
-    my_strnncollsp_generic_uca_nopad,
-    my_strnxfrm_generic_uca_nopad,
-    my_strnxfrmlen_any_uca,
-    my_like_range_mb,
-    my_wildcmp_uca,
-    NULL,
-    my_instr_mb,
-    my_hash_sort_generic_uca_nopad,
-    my_propagate_complex
-};
-
-
-/* NO PAD handler for character sets with mbminlen>=1 */
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler =
-{
-    my_coll_init_uca,
-    my_strnncoll_any_uca,
-    my_strnncollsp_generic_uca_nopad,
-    my_strnxfrm_generic_uca_nopad,
-    my_strnxfrmlen_any_uca,
-    my_like_range_generic,
-    my_wildcmp_uca,
-    NULL,
-    my_instr_mb,
-    my_hash_sort_generic_uca_nopad,
-    my_propagate_complex
-};
-
-
-MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
-{
-    my_coll_init_uca,
-    my_strnncoll_any_uca_multilevel,
-    my_strnncollsp_any_uca_multilevel,
-    my_strnxfrm_any_uca_multilevel,
-    my_strnxfrmlen_any_uca_multilevel,
-    my_like_range_generic,
-    my_wildcmp_uca,
-    NULL,
-    my_instr_mb,
-    my_hash_sort_any_uca,
-    my_propagate_complex
-};
-
-
-MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel =
-{
-    my_coll_init_uca,
-    my_strnncoll_any_uca_multilevel,
-    my_strnncollsp_generic_uca_nopad_multilevel,
-    my_strnxfrm_any_uca_multilevel,
-    my_strnxfrmlen_any_uca_multilevel,
-    my_like_range_generic,
-    my_wildcmp_uca,
-    NULL,
-    my_instr_mb,
-    my_hash_sort_generic_uca_nopad,
-    my_propagate_complex
-};
-
 
 #ifdef HAVE_CHARSET_ucs2
-/*
-  UCS2 optimized CHARSET_INFO compatible wrappers.
-*/
-static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs,
-                                 const uchar *s, size_t slen,
-                                 const uchar *t, size_t tlen,
-                                 my_bool t_is_prefix)
-{
-  return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
-                          s, slen, t, tlen, t_is_prefix);
-}
 
-static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs,
-                                   const uchar *s, size_t slen,
-                                   const uchar *t, size_t tlen)
-{
-  return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
-}   
-
-static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs,
-                                  const uchar *s, size_t slen,
-                                  ulong *n1, ulong *n2)
-{
-  my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2); 
-}
-
-static size_t my_strnxfrm_ucs2_uca(CHARSET_INFO *cs, 
-                                   uchar *dst, size_t dstlen, uint nweights,
-                                   const uchar *src, size_t srclen, uint flags)
-{
-  return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
-                         dst, dstlen, nweights, src, srclen, flags);
-}
-
-MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
-{
-  my_coll_init_uca,	/* init */
-  my_strnncoll_ucs2_uca,
-  my_strnncollsp_ucs2_uca,
-  my_strnxfrm_ucs2_uca,
-  my_strnxfrmlen_any_uca,
-  my_like_range_generic,
-  my_wildcmp_uca,
-  NULL,
-  my_instr_mb,
-  my_hash_sort_ucs2_uca,
-  my_propagate_complex
-};
+#include "ctype-ucs2.h"
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _ucs2
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_ucs2_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
 
 
 #define MY_CS_UCS2_UCA_FLAGS (MY_CS_COMMON_UCA_FLAGS|MY_CS_NONASCII)
@@ -34609,7 +33919,7 @@ struct charset_info_st my_charset_ucs2_unicode_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
@@ -34641,7 +33951,7 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_latvian_uca_ci=
@@ -34673,7 +33983,7 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_romanian_uca_ci=
@@ -34705,7 +34015,7 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
@@ -34737,7 +34047,7 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_polish_uca_ci=
@@ -34769,7 +34079,7 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_estonian_uca_ci=
@@ -34801,7 +34111,7 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_spanish_uca_ci=
@@ -34833,7 +34143,7 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_swedish_uca_ci=
@@ -34865,7 +34175,7 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_turkish_uca_ci=
@@ -34897,7 +34207,7 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_czech_uca_ci=
@@ -34929,7 +34239,7 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -34962,7 +34272,7 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
@@ -34994,7 +34304,7 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_slovak_uca_ci=
@@ -35026,7 +34336,7 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
@@ -35058,7 +34368,7 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35091,7 +34401,7 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35124,7 +34434,7 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35157,7 +34467,7 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35190,7 +34500,7 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
@@ -35222,7 +34532,7 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35256,7 +34566,7 @@ struct charset_info_st my_charset_ucs2_german2_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
@@ -35288,7 +34598,7 @@ struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35321,7 +34631,7 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35354,7 +34664,7 @@ struct charset_info_st my_charset_ucs2_myanmar_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35387,7 +34697,7 @@ struct charset_info_st my_charset_ucs2_thai_520_w2=
     0,                   /* escape_with_backslash_is_dangerous */
     2,                   /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_any_uca_handler_multilevel
+    &my_uca_collation_handler_multilevel_ucs2
 };
 
 struct charset_info_st my_charset_ucs2_unicode_520_ci=
@@ -35419,7 +34729,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35452,7 +34762,7 @@ struct charset_info_st my_charset_ucs2_vietnamese_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_ucs2_handler,
-    &my_collation_ucs2_uca_handler
+    &my_uca_collation_handler_ucs2
 };
 
 
@@ -35485,7 +34795,7 @@ struct charset_info_st my_charset_ucs2_unicode_nopad_ci=
     0,                         /* escape_with_backslash_is_dangerous */
     1,                         /* levels_for_order */
     &my_charset_ucs2_handler,
-    &my_collation_generic_uca_nopad_handler
+    &my_uca_collation_handler_nopad_ucs2
 };
 
 
@@ -35518,7 +34828,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
     0,                          /* escape_with_backslash_is_dangerous */
     1,                          /* levels_for_order */
     &my_charset_ucs2_handler,
-    &my_collation_generic_uca_nopad_handler
+    &my_uca_collation_handler_nopad_ucs2
 };
 
 
@@ -35526,20 +34836,38 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
 
 
 #ifdef HAVE_CHARSET_utf8
-MY_COLLATION_HANDLER my_collation_any_uca_handler =
+
+static my_bool
+my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
+
+#include "ctype-utf8.h"
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _utf8mb3
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb3
+#include "ctype-uca.ic"
+
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _no_contractions_utf8mb3
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 0
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb3
+#include "ctype-uca.ic"
+
+
+static my_bool
+my_uca_coll_init_utf8mb3(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
 {
-    my_coll_init_uca,	/* init */
-    my_strnncoll_any_uca,
-    my_strnncollsp_any_uca,
-    my_strnxfrm_any_uca,
-    my_strnxfrmlen_any_uca,
-    my_like_range_mb,
-    my_wildcmp_uca,
-    NULL,
-    my_instr_mb,
-    my_hash_sort_any_uca,
-    my_propagate_complex
-};
+  if (my_coll_init_uca(cs, loader))
+    return TRUE;
+  if (my_uca_collation_can_optimize_no_contractions(cs))
+    my_uca_handler_map(cs, &my_uca_package_utf8mb3,
+                       &my_uca_package_no_contractions_utf8mb3);
+  return FALSE;
+}
 
 
 /* 
@@ -35602,7 +34930,7 @@ struct charset_info_st my_charset_utf8_unicode_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 
@@ -35635,7 +34963,7 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_latvian_uca_ci=
@@ -35667,7 +34995,7 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_romanian_uca_ci=
@@ -35699,7 +35027,7 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_slovenian_uca_ci=
@@ -35731,7 +35059,7 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_polish_uca_ci=
@@ -35763,7 +35091,7 @@ struct charset_info_st my_charset_utf8_polish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_estonian_uca_ci=
@@ -35795,7 +35123,7 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_spanish_uca_ci=
@@ -35827,7 +35155,7 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_swedish_uca_ci=
@@ -35859,7 +35187,7 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_turkish_uca_ci=
@@ -35891,7 +35219,7 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_czech_uca_ci=
@@ -35923,7 +35251,7 @@ struct charset_info_st my_charset_utf8_czech_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 
@@ -35956,7 +35284,7 @@ struct charset_info_st my_charset_utf8_danish_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
@@ -35988,7 +35316,7 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_slovak_uca_ci=
@@ -36020,7 +35348,7 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_spanish2_uca_ci=
@@ -36052,7 +35380,7 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_roman_uca_ci=
@@ -36084,7 +35412,7 @@ struct charset_info_st my_charset_utf8_roman_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_persian_uca_ci=
@@ -36116,7 +35444,7 @@ struct charset_info_st my_charset_utf8_persian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_esperanto_uca_ci=
@@ -36148,7 +35476,7 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_hungarian_uca_ci=
@@ -36180,7 +35508,7 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_sinhala_uca_ci=
@@ -36212,7 +35540,7 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 
@@ -36245,7 +35573,7 @@ struct charset_info_st my_charset_utf8_german2_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
@@ -36277,7 +35605,7 @@ struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 
@@ -36310,7 +35638,7 @@ struct charset_info_st my_charset_utf8_croatian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 
@@ -36343,7 +35671,7 @@ struct charset_info_st my_charset_utf8_myanmar_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 
@@ -36376,7 +35704,7 @@ struct charset_info_st my_charset_utf8_unicode_520_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_thai_520_w2=
@@ -36408,7 +35736,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2=
     0,                   /* escape_with_backslash_is_dangerous */
     2,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler_multilevel
+    &my_uca_collation_handler_multilevel_utf8mb3
 };
 
 struct charset_info_st my_charset_utf8_vietnamese_ci=
@@ -36440,7 +35768,7 @@ struct charset_info_st my_charset_utf8_vietnamese_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb3
 };
 
 
@@ -36473,7 +35801,7 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci=
     0,                             /* escape_with_backslash_is_dangerous */
     1,                             /* levels_for_order */
     &my_charset_utf8_handler,
-    &my_collation_mb_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf8mb3
 };
 
 
@@ -36506,7 +35834,7 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
     0,                                  /* escape_with_backslash_is_dangerous */
     1,                                  /* levels_for_order */
     &my_charset_utf8_handler,
-    &my_collation_mb_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf8mb3
 };
 
 #endif /* HAVE_CHARSET_utf8 */
@@ -36514,6 +35842,39 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
 
 #ifdef HAVE_CHARSET_utf8mb4
 
+static my_bool
+my_uca_coll_init_utf8mb4(struct charset_info_st *cs, MY_CHARSET_LOADER *loader);
+
+
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _utf8mb4
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb4
+#include "ctype-uca.ic"
+
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _no_contractions_utf8mb4
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_mb
+#define MY_UCA_ASCII_OPTIMIZE 1
+#define MY_UCA_COMPILE_CONTRACTIONS 0
+#define MY_UCA_COLL_INIT my_uca_coll_init_utf8mb4
+#include "ctype-uca.ic"
+
+
+static my_bool
+my_uca_coll_init_utf8mb4(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
+{
+  if (my_coll_init_uca(cs, loader))
+    return TRUE;
+  if (my_uca_collation_can_optimize_no_contractions(cs))
+    my_uca_handler_map(cs, &my_uca_package_utf8mb4,
+                       &my_uca_package_no_contractions_utf8mb4);
+  return FALSE;
+}
+
+
 extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler;
 
 #define MY_CS_UTF8MB4_UCA_FLAGS  (MY_CS_COMMON_UCA_FLAGS|MY_CS_UNICODE_SUPPLEMENT)
@@ -36548,7 +35909,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 
@@ -36581,7 +35942,7 @@ struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
@@ -36613,7 +35974,7 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
@@ -36645,7 +36006,7 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
@@ -36677,7 +36038,7 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
@@ -36709,7 +36070,7 @@ struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
@@ -36741,7 +36102,7 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
@@ -36773,7 +36134,7 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
@@ -36805,7 +36166,7 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
@@ -36837,7 +36198,7 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
@@ -36869,7 +36230,7 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 
@@ -36902,7 +36263,7 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
@@ -36934,7 +36295,7 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
@@ -36966,7 +36327,7 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
@@ -36998,7 +36359,7 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
@@ -37030,7 +36391,7 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
@@ -37062,7 +36423,7 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
@@ -37094,7 +36455,7 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
@@ -37126,7 +36487,7 @@ struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
@@ -37158,7 +36519,7 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
@@ -37190,7 +36551,7 @@ struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
@@ -37222,7 +36583,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 
@@ -37255,7 +36616,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 
@@ -37288,7 +36649,7 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_thai_520_w2=
@@ -37320,7 +36681,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2=
     0,                   /* escape_with_backslash_is_dangerous */
     2,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler_multilevel
+    &my_uca_collation_handler_multilevel_utf8mb4
 };
 
 struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
@@ -37352,7 +36713,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 
@@ -37385,7 +36746,7 @@ struct charset_info_st my_charset_utf8mb4_vietnamese_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf8mb4_handler,
-    &my_collation_any_uca_handler
+    &my_uca_collation_handler_utf8mb4
 };
 
 
@@ -37418,7 +36779,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci=
     0,                              /* escape_with_backslash_is_dangerous */
     1,                              /* levels_for_order */
     &my_charset_utf8mb4_handler,
-    &my_collation_mb_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf8mb4
 };
 
 
@@ -37451,7 +36812,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
     0,                              /* escape_with_backslash_is_dangerous */
     1,                              /* levels_for_order */
     &my_charset_utf8mb4_handler,
-    &my_collation_mb_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf8mb4
 };
 
 
@@ -37460,20 +36821,14 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
 
 #ifdef HAVE_CHARSET_utf32
 
-MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
-{
-    my_coll_init_uca,        /* init */
-    my_strnncoll_any_uca,
-    my_strnncollsp_any_uca,
-    my_strnxfrm_any_uca,
-    my_strnxfrmlen_any_uca,
-    my_like_range_generic,
-    my_wildcmp_uca,
-    NULL,
-    my_instr_mb,
-    my_hash_sort_any_uca,
-    my_propagate_complex
-};
+#include "ctype-utf32.h"
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _utf32
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf32_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
 
 
 extern MY_CHARSET_HANDLER my_charset_utf32_handler;
@@ -37510,7 +36865,7 @@ struct charset_info_st my_charset_utf32_unicode_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 
@@ -37543,7 +36898,7 @@ struct charset_info_st my_charset_utf32_icelandic_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_latvian_uca_ci=
@@ -37575,7 +36930,7 @@ struct charset_info_st my_charset_utf32_latvian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_romanian_uca_ci=
@@ -37607,7 +36962,7 @@ struct charset_info_st my_charset_utf32_romanian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_slovenian_uca_ci=
@@ -37639,7 +36994,7 @@ struct charset_info_st my_charset_utf32_slovenian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_polish_uca_ci=
@@ -37671,7 +37026,7 @@ struct charset_info_st my_charset_utf32_polish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_estonian_uca_ci=
@@ -37703,7 +37058,7 @@ struct charset_info_st my_charset_utf32_estonian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_spanish_uca_ci=
@@ -37735,7 +37090,7 @@ struct charset_info_st my_charset_utf32_spanish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_swedish_uca_ci=
@@ -37767,7 +37122,7 @@ struct charset_info_st my_charset_utf32_swedish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_turkish_uca_ci=
@@ -37799,7 +37154,7 @@ struct charset_info_st my_charset_utf32_turkish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_czech_uca_ci=
@@ -37831,7 +37186,7 @@ struct charset_info_st my_charset_utf32_czech_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 
@@ -37864,7 +37219,7 @@ struct charset_info_st my_charset_utf32_danish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
@@ -37896,7 +37251,7 @@ struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_slovak_uca_ci=
@@ -37928,7 +37283,7 @@ struct charset_info_st my_charset_utf32_slovak_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_spanish2_uca_ci=
@@ -37960,7 +37315,7 @@ struct charset_info_st my_charset_utf32_spanish2_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_roman_uca_ci=
@@ -37992,7 +37347,7 @@ struct charset_info_st my_charset_utf32_roman_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_persian_uca_ci=
@@ -38024,7 +37379,7 @@ struct charset_info_st my_charset_utf32_persian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_esperanto_uca_ci=
@@ -38056,7 +37411,7 @@ struct charset_info_st my_charset_utf32_esperanto_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_hungarian_uca_ci=
@@ -38088,7 +37443,7 @@ struct charset_info_st my_charset_utf32_hungarian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_sinhala_uca_ci=
@@ -38120,7 +37475,7 @@ struct charset_info_st my_charset_utf32_sinhala_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_german2_uca_ci=
@@ -38152,7 +37507,7 @@ struct charset_info_st my_charset_utf32_german2_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
@@ -38184,7 +37539,7 @@ struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 struct charset_info_st my_charset_utf32_croatian_uca_ci=
@@ -38216,7 +37571,7 @@ struct charset_info_st my_charset_utf32_croatian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 
@@ -38249,7 +37604,7 @@ struct charset_info_st my_charset_utf32_myanmar_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 
@@ -38282,7 +37637,7 @@ struct charset_info_st my_charset_utf32_thai_520_w2=
     0,                  /* escape_with_backslash_is_dangerous */
     2,                  /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_any_uca_handler_multilevel
+    &my_uca_collation_handler_multilevel_utf32
 };
 
 
@@ -38315,7 +37670,7 @@ struct charset_info_st my_charset_utf32_unicode_520_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 
@@ -38348,7 +37703,7 @@ struct charset_info_st my_charset_utf32_vietnamese_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf32_handler,
-    &my_collation_utf32_uca_handler
+    &my_uca_collation_handler_utf32
 };
 
 
@@ -38381,7 +37736,7 @@ struct charset_info_st my_charset_utf32_unicode_nopad_ci=
     0,                           /* escape_with_backslash_is_dangerous */
     1,                           /* levels_for_order */
     &my_charset_utf32_handler,
-    &my_collation_generic_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf32
 };
 
 
@@ -38414,7 +37769,7 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
     0,                           /* escape_with_backslash_is_dangerous */
     1,                           /* levels_for_order */
     &my_charset_utf32_handler,
-    &my_collation_generic_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf32
 };
 
 
@@ -38424,21 +37779,14 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
 
 #ifdef HAVE_CHARSET_utf16
 
-
-MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
-{
-    my_coll_init_uca,        /* init */
-    my_strnncoll_any_uca,
-    my_strnncollsp_any_uca,
-    my_strnxfrm_any_uca,
-    my_strnxfrmlen_any_uca,
-    my_like_range_generic,
-    my_wildcmp_uca,
-    NULL,
-    my_instr_mb,
-    my_hash_sort_any_uca,
-    my_propagate_complex
-};
+#include "ctype-utf16.h"
+#define MY_FUNCTION_NAME(x)   my_uca_ ## x ## _utf16
+#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf16_quick(wc, beg, end))
+#define MY_LIKE_RANGE my_like_range_generic
+#define MY_UCA_ASCII_OPTIMIZE 0
+#define MY_UCA_COMPILE_CONTRACTIONS 1
+#define MY_UCA_COLL_INIT my_coll_init_uca
+#include "ctype-uca.ic"
 
 
 extern MY_CHARSET_HANDLER my_charset_utf16_handler;
@@ -38475,7 +37823,7 @@ struct charset_info_st my_charset_utf16_unicode_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -38508,7 +37856,7 @@ struct charset_info_st my_charset_utf16_icelandic_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_latvian_uca_ci=
@@ -38540,7 +37888,7 @@ struct charset_info_st my_charset_utf16_latvian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_romanian_uca_ci=
@@ -38572,7 +37920,7 @@ struct charset_info_st my_charset_utf16_romanian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_slovenian_uca_ci=
@@ -38604,7 +37952,7 @@ struct charset_info_st my_charset_utf16_slovenian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_polish_uca_ci=
@@ -38636,7 +37984,7 @@ struct charset_info_st my_charset_utf16_polish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_estonian_uca_ci=
@@ -38668,7 +38016,7 @@ struct charset_info_st my_charset_utf16_estonian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_spanish_uca_ci=
@@ -38700,7 +38048,7 @@ struct charset_info_st my_charset_utf16_spanish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_swedish_uca_ci=
@@ -38732,7 +38080,7 @@ struct charset_info_st my_charset_utf16_swedish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_turkish_uca_ci=
@@ -38764,7 +38112,7 @@ struct charset_info_st my_charset_utf16_turkish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_czech_uca_ci=
@@ -38796,7 +38144,7 @@ struct charset_info_st my_charset_utf16_czech_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -38829,7 +38177,7 @@ struct charset_info_st my_charset_utf16_danish_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
@@ -38861,7 +38209,7 @@ struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_slovak_uca_ci=
@@ -38893,7 +38241,7 @@ struct charset_info_st my_charset_utf16_slovak_uca_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_spanish2_uca_ci=
@@ -38925,7 +38273,7 @@ struct charset_info_st my_charset_utf16_spanish2_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_roman_uca_ci=
@@ -38957,7 +38305,7 @@ struct charset_info_st my_charset_utf16_roman_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_persian_uca_ci=
@@ -38989,7 +38337,7 @@ struct charset_info_st my_charset_utf16_persian_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_esperanto_uca_ci=
@@ -39021,7 +38369,7 @@ struct charset_info_st my_charset_utf16_esperanto_uca_ci=
     0,                  /* escape_with_backslash_is_dangerous */
     1,                  /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_hungarian_uca_ci=
@@ -39053,7 +38401,7 @@ struct charset_info_st my_charset_utf16_hungarian_uca_ci=
     0,                 /* escape_with_backslash_is_dangerous */
     1,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_sinhala_uca_ci=
@@ -39085,7 +38433,7 @@ struct charset_info_st my_charset_utf16_sinhala_uca_ci=
     0,                 /* escape_with_backslash_is_dangerous */
     1,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 struct charset_info_st my_charset_utf16_german2_uca_ci=
@@ -39117,7 +38465,7 @@ struct charset_info_st my_charset_utf16_german2_uca_ci=
     0,                 /* escape_with_backslash_is_dangerous */
     1,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -39150,7 +38498,7 @@ struct charset_info_st my_charset_utf16_croatian_mysql561_uca_ci=
     0,                 /* escape_with_backslash_is_dangerous */
     1,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -39183,7 +38531,7 @@ struct charset_info_st my_charset_utf16_croatian_uca_ci=
     0,                 /* escape_with_backslash_is_dangerous */
     1,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -39216,7 +38564,7 @@ struct charset_info_st my_charset_utf16_myanmar_uca_ci=
     0,                 /* escape_with_backslash_is_dangerous */
     1,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -39249,7 +38597,7 @@ struct charset_info_st my_charset_utf16_thai_520_w2=
     0,                 /* escape_with_backslash_is_dangerous */
     2,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_any_uca_handler_multilevel
+    &my_uca_collation_handler_multilevel_utf16
 };
 
 
@@ -39282,7 +38630,7 @@ struct charset_info_st my_charset_utf16_unicode_520_ci=
     0,                   /* escape_with_backslash_is_dangerous */
     1,                   /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -39315,7 +38663,7 @@ struct charset_info_st my_charset_utf16_vietnamese_ci=
     0,                 /* escape_with_backslash_is_dangerous */
     1,                 /* levels_for_order   */
     &my_charset_utf16_handler,
-    &my_collation_utf16_uca_handler
+    &my_uca_collation_handler_utf16
 };
 
 
@@ -39348,7 +38696,7 @@ struct charset_info_st my_charset_utf16_unicode_nopad_ci=
     0,                           /* escape_with_backslash_is_dangerous */
     1,                           /* levels_for_order */
     &my_charset_utf16_handler,
-    &my_collation_generic_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf16
 };
 
 
@@ -39381,7 +38729,7 @@ struct charset_info_st my_charset_utf16_unicode_520_nopad_ci=
     0,                           /* escape_with_backslash_is_dangerous */
     1,                           /* levels_for_order */
     &my_charset_utf16_handler,
-    &my_collation_generic_uca_nopad_handler
+    &my_uca_collation_handler_nopad_utf16
 };
 
 
diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic
new file mode 100644
index 00000000000..70c10199e3e
--- /dev/null
+++ b/strings/ctype-uca.ic
@@ -0,0 +1,839 @@
+/*
+  Copyright (c) 2018 MariaDB Corporation
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+
+#ifndef MY_FUNCTION_NAME
+#error MY_FUNCTION_NAME is not defined
+#endif
+#ifndef MY_MB_WC
+#error MY_MB_WC is not defined
+#endif
+#ifndef MY_LIKE_RANGE
+#error MY_LIKE_RANGE is not defined
+#endif
+#ifndef MY_UCA_ASCII_OPTIMIZE
+#error MY_ASCII_OPTIMIZE is not defined
+#endif
+#ifndef MY_UCA_COMPILE_CONTRACTIONS
+#error MY_UCA_COMPILE_CONTRACTIONS is not defined
+#endif
+#ifndef MY_UCA_COLL_INIT
+#error MY_UCA_COLL_INIT is not defined
+#endif
+
+
+static inline int
+MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
+{
+  /*
+    Check if the weights for the previous character have been
+    already fully scanned. If yes, then get the next character and
+    initialize wbeg and wlength to its weight string.
+  */
+
+  if (scanner->wbeg[0])      /* More weights left from the previous step: */
+    return *scanner->wbeg++; /* return the next weight from expansion     */
+
+  do
+  {
+    const uint16 *wpage;
+    my_wc_t wc[MY_UCA_MAX_CONTRACTION];
+    int mblen;
+
+    /* Get next character */
+#if MY_UCA_ASCII_OPTIMIZE
+    /* Get next ASCII character */
+    if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
+    {
+      wc[0]= scanner->sbeg[0];
+      scanner->sbeg+= 1;
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+      if (my_uca_needs_context_handling(scanner->level, wc[0]))
+      {
+        uint16 *cweight= my_uca_context_weight_find(scanner, wc);
+        if (cweight)
+          return *cweight;
+      }
+#endif
+
+      scanner->page= 0;
+      scanner->code= (int) wc[0];
+      scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
+      if (scanner->wbeg[0])
+        return *scanner->wbeg++;
+      continue;
+    }
+    else
+#endif
+    /* Get next MB character */
+    if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
+                                       scanner->send)) <= 0))
+    {
+      if (scanner->sbeg >= scanner->send)
+        return -1; /* No more bytes, end of line reached */
+      /*
+        There are some more bytes left. Non-positive mb_len means that
+        we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
+      */
+      if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
+      {
+        /* For safety purposes don't go beyond the string range. */
+        scanner->sbeg= scanner->send;
+      }
+      /*
+        Treat every complete or incomplete mbminlen unit as a weight which is
+        greater than weight for any possible normal character.
+        0xFFFF is greater than any possible weight in the UCA weight table.
+      */
+      return 0xFFFF;
+    }
+
+    scanner->sbeg+= mblen;
+    if (wc[0] > scanner->level->maxchar)
+    {
+      /* Return 0xFFFD as weight for all characters outside BMP */
+      scanner->wbeg= nochar;
+      return 0xFFFD;
+    }
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+    if (my_uca_needs_context_handling(scanner->level, wc[0]))
+    {
+      uint16 *cweight= my_uca_context_weight_find(scanner, wc);
+      if (cweight)
+        return *cweight;
+    }
+#endif
+
+    /* Process single character */
+    scanner->page= wc[0] >> 8;
+    scanner->code= wc[0] & 0xFF;
+
+    /* If weight page for w[0] does not exist, then calculate algoritmically */
+    if (!(wpage= scanner->level->weights[scanner->page]))
+      return my_uca_scanner_next_implicit(scanner);
+
+    /* Calculate pointer to w[0]'s weight, using page and offset */
+    scanner->wbeg= wpage +
+                   scanner->code * scanner->level->lengths[scanner->page];
+  } while (!scanner->wbeg[0]); /* Skip ignorable characters */
+
+  return *scanner->wbeg++;
+}
+
+
+
+/*
+  Compares two strings according to the collation
+
+  SYNOPSIS:
+    strnncoll_onelevel()
+    cs		Character set information
+    level       Weight level (0 primary, 1 secondary, 2 tertiary, etc)
+    s		First string
+    slen	First string length
+    t		Second string
+    tlen	Seconf string length
+    level	DUCETweight level
+  
+  NOTES:
+    Initializes two weight scanners and gets weights
+    corresponding to two strings in a loop. If weights are not
+    the same at some step then returns their difference.
+    
+    In the while() comparison these situations are possible:
+    1. (s_res>0) and (t_res>0) and (s_res == t_res)
+       Weights are the same so far, continue comparison
+    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
+       A difference has been found, return.
+    3. (s_res>0) and (t_res<0)
+       We have reached the end of the second string, or found
+       an illegal multibyte sequence in the second string.
+       Return a positive number, i.e. the first string is bigger.
+    4. (s_res<0) and (t_res>0)   
+       We have reached the end of the first string, or found
+       an illegal multibyte sequence in the first string.
+       Return a negative number, i.e. the second string is bigger.
+    5. (s_res<0) and (t_res<0)
+       Both scanners returned -1. It means we have riched
+       the end-of-string of illegal-sequence in both strings
+       at the same time. Return 0, strings are equal.
+    
+  RETURN
+    Difference between two strings, according to the collation:
+    0               - means strings are equal
+    negative number - means the first string is smaller
+    positive number - means the first string is bigger
+*/
+
+static int
+MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs, 
+                                     const MY_UCA_WEIGHT_LEVEL *level,
+                                     const uchar *s, size_t slen,
+                                     const uchar *t, size_t tlen,
+                                     my_bool t_is_prefix)
+{
+  my_uca_scanner sscanner;
+  my_uca_scanner tscanner;
+  int s_res;
+  int t_res;
+  
+  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+  
+  do
+  {
+    s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+    t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+  } while ( s_res == t_res && s_res >0);
+  
+  return  (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
+}
+
+
+/*
+  One-level, PAD SPACE.
+*/
+static int
+MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs,
+                            const uchar *s, size_t slen,
+                            const uchar *t, size_t tlen,
+                            my_bool t_is_prefix)
+{
+  return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
+                                              s, slen, t, tlen, t_is_prefix);
+}
+
+
+/*
+  Multi-level, PAD SPACE.
+*/
+static int
+MY_FUNCTION_NAME(strnncoll_multilevel)(CHARSET_INFO *cs,
+                                       const uchar *s, size_t slen,
+                                       const uchar *t, size_t tlen,
+                                       my_bool t_is_prefix)
+{
+  uint i, num_level= cs->levels_for_order;
+  for (i= 0; i != num_level; i++)
+  {
+    int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
+                                                  s, slen, t, tlen,
+                                                  t_is_prefix);
+    if (ret)
+       return ret;
+  }
+  return 0;
+}
+
+
+/*
+  Compares two strings according to the collation,
+  ignoring trailing spaces.
+
+  SYNOPSIS:
+    strnncollsp_onelevel()
+    cs		Character set information
+    level       UCA weight level
+    s		First string
+    slen	First string length
+    t		Second string
+    tlen	Seconf string length
+    level	DUCETweight level
+
+  NOTES:
+    Works exactly the same with my_strnncoll_uca(),
+    but ignores trailing spaces.
+
+    In the while() comparison these situations are possible:
+    1. (s_res>0) and (t_res>0) and (s_res == t_res)
+       Weights are the same so far, continue comparison
+    2. (s_res>0) and (t_res>0) and (s_res!=t_res)
+       A difference has been found, return.
+    3. (s_res>0) and (t_res<0)
+       We have reached the end of the second string, or found
+       an illegal multibyte sequence in the second string.
+       Compare the first string to an infinite array of
+       space characters until difference is found, or until
+       the end of the first string.
+    4. (s_res<0) and (t_res>0)
+       We have reached the end of the first string, or found
+       an illegal multibyte sequence in the first string.
+       Compare the second string to an infinite array of
+       space characters until difference is found or until
+       the end of the second steing.
+    5. (s_res<0) and (t_res<0)
+       Both scanners returned -1. It means we have riched
+       the end-of-string of illegal-sequence in both strings
+       at the same time. Return 0, strings are equal.
+
+  RETURN
+    Difference between two strings, according to the collation:
+    0               - means strings are equal
+    negative number - means the first string is smaller
+    positive number - means the first string is bigger
+*/
+
+static int
+MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
+                                       const MY_UCA_WEIGHT_LEVEL *level,
+                                       const uchar *s, size_t slen,
+                                       const uchar *t, size_t tlen)
+{
+  my_uca_scanner sscanner, tscanner;
+  int s_res, t_res;
+
+  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+
+  do
+  {
+    s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+    t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+  } while ( s_res == t_res && s_res >0);
+
+  if (s_res > 0 && t_res < 0)
+  {
+    /* Calculate weight for SPACE character */
+    t_res= my_space_weight(level);
+
+    /* compare the first string to spaces */
+    do
+    {
+      if (s_res != t_res)
+        return (s_res - t_res);
+      s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
+    } while (s_res > 0);
+    return 0;
+  }
+
+  if (s_res < 0 && t_res > 0)
+  {
+    /* Calculate weight for SPACE character */
+    s_res= my_space_weight(level);
+
+    /* compare the second string to spaces */
+    do
+    {
+      if (s_res != t_res)
+        return (s_res - t_res);
+      t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
+    } while (t_res > 0);
+    return 0;
+  }
+
+  return ( s_res - t_res );
+}
+
+
+/*
+  One-level, PAD SPACE
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs,
+                              const uchar *s, size_t slen,
+                              const uchar *t, size_t tlen)
+{
+  return MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[0],
+                                                s, slen, t, tlen);
+}
+
+
+/*
+  One-level, NO PAD
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nopad)(CHARSET_INFO *cs,
+                                    const uchar *s, size_t slen,
+                                    const uchar *t, size_t tlen)
+{
+  return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
+                                              s, slen, t, tlen, FALSE);
+}
+
+
+/*
+  Multi-level, PAD SPACE
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_multilevel)(CHARSET_INFO *cs,
+                                         const uchar *s, size_t slen,
+                                         const uchar *t, size_t tlen)
+{
+
+  uint i, num_level= cs->levels_for_order;
+  for (i= 0; i != num_level; i++)
+  {
+    int ret= MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[i],
+                                                    s, slen, t, tlen);
+    if (ret)
+      return ret;
+  }
+  return 0;
+}
+
+
+/*
+  Multi-level, NO PAD
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
+                                               const uchar *s, size_t slen,
+                                               const uchar *t, size_t tlen)
+{
+  uint num_level= cs->levels_for_order;
+  uint i;
+  for (i= 0; i != num_level; i++)
+  {
+    int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
+                                                  s, slen, t, tlen, FALSE);
+    if (ret)
+       return ret;
+  }
+  return 0;
+}
+
+
+
+/*
+  Calculates hash value for the given string,
+  according to the collation, and ignoring trailing spaces.
+
+  SYNOPSIS:
+    hash_sort()
+    cs		Character set information
+    s		String
+    slen	String's length
+    n1		First hash parameter
+    n2		Second hash parameter
+
+  NOTES:
+    Scans consequently weights and updates
+    hash parameters n1 and n2. In a case insensitive collation,
+    upper and lower case of the same letter will return the same
+    weight sequence, and thus will produce the same hash values
+    in n1 and n2.
+
+    This functions is used for one-level and for multi-level collations.
+    We intentionally use only primary level in multi-level collations.
+    This helps to have PARTITION BY KEY put primarily equal records
+    into the same partition. E.g. in utf8_thai_520_ci records that differ
+    only in tone marks go into the same partition.
+
+  RETURN
+    N/A
+*/
+
+static void
+MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
+                            const uchar *s, size_t slen,
+                            ulong *nr1, ulong *nr2)
+{
+  int   s_res;
+  my_uca_scanner scanner;
+  int space_weight= my_space_weight(&cs->uca->level[0]);
+  register ulong m1= *nr1, m2= *nr2;
+
+  my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
+
+  while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
+  {
+    if (s_res == space_weight)
+    {
+      /* Combine all spaces to be able to skip end spaces */
+      uint count= 0;
+      do
+      {
+        count++;
+        if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0)
+        {
+          /* Skip strings at end of string */
+          goto end;
+        }
+      }
+      while (s_res == space_weight);
+
+      /* Add back that has for the space characters */
+      do
+      {
+        /*
+          We can't use MY_HASH_ADD_16() here as we, because of a misstake
+          in the original code, where we added the 16 byte variable the
+          opposite way.  Changing this would cause old partitioned tables
+          to fail.
+        */
+        MY_HASH_ADD(m1, m2, space_weight >> 8);
+        MY_HASH_ADD(m1, m2, space_weight & 0xFF);
+      }
+      while (--count != 0);
+
+    }
+    /* See comment above why we can't use MY_HASH_ADD_16() */
+    MY_HASH_ADD(m1, m2, s_res >> 8);
+    MY_HASH_ADD(m1, m2, s_res & 0xFF);
+  }
+end:
+  *nr1= m1;
+  *nr2= m2;
+}
+
+
+static void
+MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
+                                  const uchar *s, size_t slen,
+                                  ulong *nr1, ulong *nr2)
+{
+  int   s_res;
+  my_uca_scanner scanner;
+  register ulong m1= *nr1, m2= *nr2;
+
+  my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
+
+  while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
+  {
+    /* See comment above why we can't use MY_HASH_ADD_16() */
+    MY_HASH_ADD(m1, m2, s_res >> 8);
+    MY_HASH_ADD(m1, m2, s_res & 0xFF);
+  }
+  *nr1= m1;
+  *nr2= m2;
+}
+
+
+
+/*
+  For the given string creates its "binary image", suitable
+  to be used in binary comparison, i.e. in memcmp(). 
+  
+  SYNOPSIS:
+    my_strnxfrm_uca()
+    cs		Character set information
+    dst		Where to write the image
+    dstlen	Space available for the image, in bytes
+    src		The source string
+    srclen	Length of the source string, in bytes
+  
+  NOTES:
+    In a loop, scans weights from the source string and writes
+    them into the binary image. In a case insensitive collation,
+    upper and lower cases of the same letter will produce the
+    same image subsequences. When we have reached the end-of-string
+    or found an illegal multibyte sequence, the loop stops.
+
+    It is impossible to restore the original string using its
+    binary image. 
+    
+    Binary images are used for bulk comparison purposes,
+    e.g. in ORDER BY, when it is more efficient to create
+    a binary image and use it instead of weight scanner
+    for the original strings for every comparison.
+  
+  RETURN
+    Number of bytes that have been written into the binary image.
+*/
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
+                                             MY_UCA_WEIGHT_LEVEL *level,
+                                             uchar *dst, uchar *de,
+                                             uint *nweights,
+                                             const uchar *src, size_t srclen)
+{
+  my_uca_scanner scanner;
+  int s_res;
+
+  DBUG_ASSERT(src || !srclen);
+
+#if MY_UCA_ASCII_OPTIMIZE && !MY_UCA_COMPILE_CONTRACTIONS
+ /*
+    Fast path for the ASCII range with no contractions.
+  */
+  {
+    const uchar *de2= de - 1; /* Last position where 2 bytes fit */
+    const uint16 *weights0= level->weights[0];
+    uint lengths0= level->lengths[0];
+    for ( ; ; src++, srclen--)
+    {
+      const uint16 *weight;
+      if (!srclen || !*nweights)
+        return dst;         /* Done */
+      if (*src > 0x7F)
+        break;              /* Non-ASCII */
+
+      weight= weights0 + (((uint) *src) * lengths0);
+      if (!(s_res= *weight))
+        continue;           /* Ignorable */
+      if (weight[1])        /* Expansion (e.g. in a user defined collation */
+        break;
+
+      /* Here we have a character with extactly one 2-byte UCA weight */
+      if (dst < de2)        /* Most typical case is when both bytes fit */
+      {
+        *dst++= s_res >> 8;
+        *dst++= s_res & 0xFF;
+        (*nweights)--;
+        continue;
+      }
+      if (dst >= de)        /* No space left in "dst" */
+        return dst;
+      *dst++= s_res >> 8;   /* There is space only for one byte */
+      (*nweights)--;
+      return dst;
+    }
+  }
+#endif
+
+  my_uca_scanner_init_any(&scanner, cs, level, src, srclen);
+  for (; dst < de && *nweights &&
+         (s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--)
+  {
+    *dst++= s_res >> 8;
+    if (dst < de)
+      *dst++= s_res & 0xFF;
+  }
+  return dst;
+}
+
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_onelevel)(CHARSET_INFO *cs,
+                                    MY_UCA_WEIGHT_LEVEL *level,
+                                    uchar *dst, uchar *de, uint nweights,
+                                    const uchar *src, size_t srclen, uint flags)
+{
+  uchar *d0= dst;
+  dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
+                                                    dst, de, &nweights,
+                                                    src, srclen);
+  DBUG_ASSERT(dst <= de);
+  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+    dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
+  DBUG_ASSERT(dst <= de);
+  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
+  return dst;
+}
+
+
+
+static uchar *
+MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(CHARSET_INFO *cs,
+                                          MY_UCA_WEIGHT_LEVEL *level,
+                                          uchar *dst, uchar *de, uint nweights,
+                                          const uchar *src, size_t srclen,
+                                          uint flags)
+{
+  uchar *d0= dst;
+  dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
+                                                    dst, de, &nweights,
+                                                    src, srclen);
+  DBUG_ASSERT(dst <= de);
+  /*  Pad with the minimum possible weight on this level */
+  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+    dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
+  DBUG_ASSERT(dst <= de);
+  my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
+  return dst;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+                           uchar *dst, size_t dstlen, uint nweights,
+                           const uchar *src, size_t srclen, uint flags)
+{
+  uchar *d0= dst;
+  uchar *de= dst + dstlen;
+
+  /*
+    There are two ways to handle trailing spaces for PAD SPACE collations:
+    1. Keep trailing spaces as they are, so have strnxfrm_onelevel() scan
+       spaces as normal characters. This will call scanner_next() for every
+       trailing space and calculate its weight using UCA weights.
+    2. Strip trailing spaces before calling strnxfrm_onelevel(), as it will
+       append weights for implicit spaces anyway, up to the desired key size.
+       This will effectively generate exactly the same sortable key result.
+    The latter is much faster.
+  */
+
+  if (flags & MY_STRXFRM_PAD_WITH_SPACE)
+    srclen= cs->cset->lengthsp(cs, (const char*) src, srclen);
+  dst= MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, &cs->uca->level[0],
+                                           dst, de, nweights,
+                                           src, srclen, flags);
+  /*
+    This can probably be changed to memset(dst, 0, de - dst),
+    like my_strnxfrm_uca_multilevel() does.
+  */
+  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+    dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
+  return dst - d0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
+                                 uchar *dst, size_t dstlen,
+                                 uint nweights,
+                                 const uchar *src, size_t srclen,
+                                 uint flags)
+{
+  uchar *d0= dst;
+  uchar *de= dst + dstlen;
+
+  dst= MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, &cs->uca->level[0],
+                                                 dst, de, nweights,
+                                                 src, srclen, flags);
+  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+  {
+    memset(dst, 0, de - dst);
+    dst= de;
+  }
+  return dst - d0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_multilevel)(CHARSET_INFO *cs, 
+                                      uchar *dst, size_t dstlen,
+                                      uint nweights,
+                                      const uchar *src, size_t srclen,
+                                      uint flags)
+{
+  uint num_level= cs->levels_for_order;
+  uchar *d0= dst;
+  uchar *de= dst + dstlen;
+  uint current_level;
+
+  for (current_level= 0; current_level != num_level; current_level++)
+  {
+    if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
+        (flags & (MY_STRXFRM_LEVEL1 << current_level)))
+      dst= cs->state & MY_CS_NOPAD ?
+           MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs,
+                                          &cs->uca->level[current_level],
+                                          dst, de, nweights,
+                                          src, srclen, flags) :
+           MY_FUNCTION_NAME(strnxfrm_onelevel)(cs,
+                                    &cs->uca->level[current_level],
+                                    dst, de, nweights,
+                                    src, srclen, flags);
+  }
+
+  if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
+  {
+    memset(dst, 0, de - dst);
+    dst= de;
+  }
+
+  return dst - d0;
+}
+
+
+/*
+  One-level, PAD SPACE
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
+{
+  MY_UCA_COLL_INIT,
+  MY_FUNCTION_NAME(strnncoll),
+  MY_FUNCTION_NAME(strnncollsp),
+  MY_FUNCTION_NAME(strnxfrm),
+  my_strnxfrmlen_any_uca,
+  MY_LIKE_RANGE,
+  my_wildcmp_uca,
+  NULL,                                /* strcasecmp() */
+  my_instr_mb,
+  MY_FUNCTION_NAME(hash_sort),
+  my_propagate_complex
+};
+
+
+/*
+  One-level, NO PAD
+  For character sets with mbminlen==1 use MY_LIKE_RANGE=my_like_range_mb
+  For character sets with mbminlen>=2 use MY_LIKE_RANGE=my_like_range_generic
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
+{
+  MY_UCA_COLL_INIT,
+  MY_FUNCTION_NAME(strnncoll),
+  MY_FUNCTION_NAME(strnncollsp_nopad),
+  MY_FUNCTION_NAME(strnxfrm_nopad),
+  my_strnxfrmlen_any_uca,
+  MY_LIKE_RANGE,    /* my_like_range_mb or my_like_range_generic */
+  my_wildcmp_uca,
+  NULL,                                /* strcasecmp() */
+  my_instr_mb,
+  MY_FUNCTION_NAME(hash_sort_nopad),
+  my_propagate_complex
+};
+
+
+/*
+  Multi-level, PAD SPACE
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
+{
+  MY_UCA_COLL_INIT,
+  MY_FUNCTION_NAME(strnncoll_multilevel),
+  MY_FUNCTION_NAME(strnncollsp_multilevel),
+  MY_FUNCTION_NAME(strnxfrm_multilevel),
+  my_strnxfrmlen_any_uca_multilevel,
+  MY_LIKE_RANGE,
+  my_wildcmp_uca,
+  NULL,                                /* strcasecmp() */
+  my_instr_mb,
+  MY_FUNCTION_NAME(hash_sort),
+  my_propagate_complex
+};
+
+
+/*
+  Multi-level, NO PAD
+*/
+MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
+{
+  MY_UCA_COLL_INIT,
+  MY_FUNCTION_NAME(strnncoll_multilevel),
+  MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
+  MY_FUNCTION_NAME(strnxfrm_multilevel),
+  my_strnxfrmlen_any_uca_multilevel,
+  MY_LIKE_RANGE,
+  my_wildcmp_uca,
+  NULL,                                /* strcasecmp() */
+  my_instr_mb,
+  MY_FUNCTION_NAME(hash_sort),
+  my_propagate_complex
+};
+
+
+MY_COLLATION_HANDLER_PACKAGE MY_FUNCTION_NAME(package)=
+{
+  &MY_FUNCTION_NAME(collation_handler),
+  &MY_FUNCTION_NAME(collation_handler_nopad),
+  &MY_FUNCTION_NAME(collation_handler_multilevel),
+  &MY_FUNCTION_NAME(collation_handler_nopad_multilevel)
+};
+
+
+#undef MY_FUNCTION_NAME
+#undef MY_MB_WC
+#undef MY_LIKE_RANGE
+#undef MY_UCA_ASCII_OPTIMIZE
+#undef MY_UCA_COMPILE_CONTRACTIONS
+#undef MY_UCA_COLL_INIT
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 7596b7f2168..28e7def3ddf 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -23,6 +23,8 @@
 #include <my_sys.h>
 #include <stdarg.h>
 
+#include "ctype-unidata.h"
+
 
 #if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
 #define HAVE_CHARSET_mb2
@@ -1184,35 +1186,7 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
   but the JSON functions needed my_utf16_uni()
   so the #ifdef was moved lower.
 */
-
-
-/*
-  D800..DB7F - Non-provate surrogate high (896 pages)
-  DB80..DBFF - Private surrogate high     (128 pages)
-  DC00..DFFF - Surrogate low              (1024 codes in a page)
-*/
-#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
-#define MY_UTF16_SURROGATE_HIGH_LAST  0xDBFF
-#define MY_UTF16_SURROGATE_LOW_FIRST  0xDC00
-#define MY_UTF16_SURROGATE_LOW_LAST   0xDFFF
-
-#define MY_UTF16_HIGH_HEAD(x)      ((((uchar) (x)) & 0xFC) == 0xD8)
-#define MY_UTF16_LOW_HEAD(x)       ((((uchar) (x)) & 0xFC) == 0xDC)
-/* Test if a byte is a leading byte of a high or low surrogate head: */
-#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
-/* Test if a Unicode code point is a high or low surrogate head */
-#define MY_UTF16_SURROGATE(x)      (((x) & 0xF800) == 0xD800)
-
-#define MY_UTF16_WC2(a, b)         ((a << 8) + b)
-
-/*
-  a= 110110??  (<< 18)
-  b= ????????  (<< 10)
-  c= 110111??  (<<  8)
-  d= ????????  (<<  0)
-*/
-#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
-                                  ((c & 3) << 8) + d + 0x10000)
+#include "ctype-utf16.h"
 
 #define IS_MB2_CHAR(b0,b1)       (!MY_UTF16_SURROGATE_HEAD(b0))
 #define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
@@ -1220,10 +1194,17 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
 static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
 {
   my_wc_t wc= MY_UTF16_WC2(b0, b1);
-  MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
+  MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
   return (int) (page ? page[wc & 0xFF].sort : wc);
 }
 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf16_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           0
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            my_unicase_default_page00
+#define UNICASE_PAGES            my_unicase_default_pages
 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
 #define WEIGHT_MB2(b0,b1)        my_weight_mb2_utf16mb2_general_ci(b0,b1)
 #define WEIGHT_MB4(b0,b1,b2,b3)  MY_CS_REPLACEMENT_CHARACTER
@@ -1261,32 +1242,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
 my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
              my_wc_t *pwc, const uchar *s, const uchar *e)
 {
-  if (s + 2 > e)
-    return MY_CS_TOOSMALL2;
-  
-  /*
-    High bytes: 0xD[89AB] = B'110110??'
-    Low bytes:  0xD[CDEF] = B'110111??'
-    Surrogate mask:  0xFC = B'11111100'
-  */
-
-  if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
-  {
-    if (s + 4 > e)
-      return MY_CS_TOOSMALL4;
-
-    if (!MY_UTF16_LOW_HEAD(s[2]))  /* Broken surrigate pair */
-      return MY_CS_ILSEQ;
-
-    *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
-    return 4;
-  }
-
-  if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
-    return MY_CS_ILSEQ;
-
-  *pwc= MY_UTF16_WC2(s[0], s[1]);
-  return 2;
+  return my_mb_wc_utf16_quick(pwc, s, e);
 }
 
 
@@ -1546,7 +1502,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16_general_ci,
   my_strnncollsp_utf16_general_ci,
-  my_strnxfrm_unicode,
+  my_strnxfrm_utf16_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
   my_wildcmp_utf16_ci,
@@ -1578,7 +1534,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16_general_ci,
   my_strnncollsp_utf16_general_nopad_ci,
-  my_strnxfrm_unicode_nopad,
+  my_strnxfrm_nopad_utf16_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
   my_wildcmp_utf16_ci,
@@ -1775,6 +1731,13 @@ struct charset_info_st my_charset_utf16_nopad_bin=
 #define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3))
 
 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf16le_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e)  (cs->cset->mb_wc(cs, pwc, s, e))
+#define OPTIMIZE_ASCII           0
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            my_unicase_default_page00
+#define UNICASE_PAGES            my_unicase_default_pages
 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
 #define WEIGHT_MB2(b0,b1)        my_weight_mb2_utf16mb2_general_ci(b1,b0)
 #define WEIGHT_MB4(b0,b1,b2,b3)  MY_CS_REPLACEMENT_CHARACTER
@@ -1879,7 +1842,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16le_general_ci,
   my_strnncollsp_utf16le_general_ci,
-  my_strnxfrm_unicode,
+  my_strnxfrm_utf16le_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
   my_wildcmp_utf16_ci,
@@ -1911,7 +1874,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16le_general_ci,
   my_strnncollsp_utf16le_general_nopad_ci,
-  my_strnxfrm_unicode_nopad,
+  my_strnxfrm_nopad_utf16le_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
   my_wildcmp_utf16_ci,
@@ -2109,6 +2072,8 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
 
 #ifdef HAVE_CHARSET_utf32
 
+#include "ctype-utf32.h"
+
 /*
   Check is b0 and b1 start a valid UTF32 four-byte sequence.
   Don't accept characters greater than U+10FFFF.
@@ -2117,8 +2082,6 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
 
 #define IS_MB4_CHAR(b0,b1,b2,b3)   (IS_UTF32_MBHEAD4(b0,b1))
 
-#define MY_UTF32_WC4(b0,b1,b2,b3)  ((((my_wc_t)b0) << 24) + (b1 << 16) + \
-                                                (b2 << 8) + (b3))
 
 static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
                                              uchar b2, uchar b3)
@@ -2126,12 +2089,19 @@ static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
   my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3);
   if (wc <= 0xFFFF)
   {
-    MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
+    MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
     return (int) (page ? page[wc & 0xFF].sort : wc);
   }
   return MY_CS_REPLACEMENT_CHARACTER;
 }
 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf32_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf32_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           0
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            my_unicase_default_page00
+#define UNICASE_PAGES            my_unicase_default_pages
 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
 #define WEIGHT_MB4(b0,b1,b2,b3)  my_weight_utf32_general_ci(b0, b1, b2, b3)
 #include "strcoll.ic"
@@ -2161,10 +2131,7 @@ static int
 my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
              my_wc_t *pwc, const uchar *s, const uchar *e)
 {
-  if (s + 4 > e)
-    return MY_CS_TOOSMALL4;
-  *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
-  return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
+  return my_mb_wc_utf32_quick(pwc, s, e);
 }
 
 
@@ -2698,7 +2665,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
   NULL, /* init */
   my_strnncoll_utf32_general_ci,
   my_strnncollsp_utf32_general_ci,
-  my_strnxfrm_unicode,
+  my_strnxfrm_utf32_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
   my_wildcmp_utf32_ci,
@@ -2730,7 +2697,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
   NULL, /* init */
   my_strnncoll_utf32_general_ci,
   my_strnncollsp_utf32_general_nopad_ci,
-  my_strnxfrm_unicode_nopad,
+  my_strnxfrm_nopad_utf32_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
   my_wildcmp_utf32_ci,
@@ -2928,6 +2895,8 @@ struct charset_info_st my_charset_utf32_nopad_bin=
 
 #ifdef HAVE_CHARSET_ucs2
 
+#include "ctype-ucs2.h"
+
 static const uchar ctype_ucs2[] = {
     0,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
@@ -2995,20 +2964,30 @@ static const uchar to_upper_ucs2[] = {
 static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
 {
   my_wc_t wc= UCS2_CODE(b0, b1);
-  MY_UNICASE_CHARACTER *page= my_unicase_default.page[wc >> 8];
+  MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
   return (int) (page ? page[wc & 0xFF].sort : wc);
 }
 
 
-#define MY_FUNCTION_NAME(x)    my_ ## x ## _ucs2_general_ci
-#define WEIGHT_ILSEQ(x)        (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB2(b0,b1)      my_weight_mb2_ucs2_general_ci(b0,b1)
+#define MY_FUNCTION_NAME(x)      my_ ## x ## _ucs2_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_ucs2_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           0
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            my_unicase_default_page00
+#define UNICASE_PAGES            my_unicase_default_pages
+#define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB2(b0,b1)        my_weight_mb2_ucs2_general_ci(b0,b1)
 #include "strcoll.ic"
 
 
-#define MY_FUNCTION_NAME(x)    my_ ## x ## _ucs2_bin
-#define WEIGHT_ILSEQ(x)        (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB2(b0,b1)      UCS2_CODE(b0,b1)
+#define MY_FUNCTION_NAME(x)      my_ ## x ## _ucs2_bin
+#define DEFINE_STRNXFRM_UNICODE_BIN2
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_ucs2_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           0
+#define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB2(b0,b1)        UCS2_CODE(b0,b1)
 #include "strcoll.ic"
 
 
@@ -3037,11 +3016,7 @@ my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
 static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
 		       my_wc_t * pwc, const uchar *s, const uchar *e)
 {
-  if (s+2 > e) /* Need 2 characters */
-    return MY_CS_TOOSMALL2;
-  
-  *pwc= ((uchar)s[0]) * 256  + ((uchar)s[1]);
-  return 2;
+  return my_mb_wc_ucs2_quick(pwc, s, e);
 }
 
 static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
@@ -3280,7 +3255,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_general_ci,
     my_strnncollsp_ucs2_general_ci,
-    my_strnxfrm_unicode,
+    my_strnxfrm_ucs2_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
     my_wildcmp_ucs2_ci,
@@ -3296,7 +3271,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_bin,
     my_strnncollsp_ucs2_bin,
-    my_strnxfrm_unicode,
+    my_strnxfrm_ucs2_bin,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
     my_wildcmp_ucs2_bin,
@@ -3312,7 +3287,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_general_ci,
     my_strnncollsp_ucs2_general_nopad_ci,
-    my_strnxfrm_unicode_nopad,
+    my_strnxfrm_nopad_ucs2_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
     my_wildcmp_ucs2_ci,
@@ -3328,7 +3303,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_bin,
     my_strnncollsp_ucs2_nopad_bin,
-    my_strnxfrm_unicode_nopad,
+    my_strnxfrm_nopad_ucs2_bin,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
     my_wildcmp_ucs2_bin,
diff --git a/strings/ctype-ucs2.h b/strings/ctype-ucs2.h
new file mode 100644
index 00000000000..c989324172d
--- /dev/null
+++ b/strings/ctype-ucs2.h
@@ -0,0 +1,32 @@
+/*
+  Copyright (c) 2018 MariaDB Corporation
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UCS2_H
+#define _CTYPE_UCS2_H
+
+
+static inline int
+my_mb_wc_ucs2_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+  if (s+2 > e) /* Need 2 characters */
+    return MY_CS_TOOSMALL2;
+  *pwc= ((uchar)s[0]) * 256  + ((uchar)s[1]);
+  return 2;
+}
+
+
+#endif /* _CTYPE_UCS2_H */
diff --git a/strings/ctype-unidata.h b/strings/ctype-unidata.h
new file mode 100644
index 00000000000..6712f5e1d79
--- /dev/null
+++ b/strings/ctype-unidata.h
@@ -0,0 +1,31 @@
+#ifndef CTYPE_UNIDATA_H_INCLUDED
+#define CTYPE_UNIDATA_H_INCLUDED
+/*
+  Copyright (c) 2018 MariaDB Corporation
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#define MY_UNICASE_INFO_DEFAULT_MAXCHAR 0xFFFF
+extern MY_UNICASE_CHARACTER my_unicase_default_page00[256];
+extern MY_UNICASE_CHARACTER *my_unicase_default_pages[256];
+
+size_t my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights);
+size_t my_strxfrm_pad_unicode(uchar *str, uchar *strend);
+
+
+#define PUT_WC_BE2_HAVE_1BYTE(dst, de, wc) \
+  do { *dst++= (uchar) (wc >> 8); if (dst < de) *dst++= (uchar) (wc & 0xFF); } while(0)
+
+#endif /* CTYPE_UNIDATA_H_INCLUDED */
diff --git a/strings/ctype-utf16.h b/strings/ctype-utf16.h
new file mode 100644
index 00000000000..d4cf4664f97
--- /dev/null
+++ b/strings/ctype-utf16.h
@@ -0,0 +1,80 @@
+/*
+  Copyright (c) 2018 MariaDB Corporation
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF16_H
+#define _CTYPE_UTF16_H
+
+/*
+  D800..DB7F - Non-provate surrogate high (896 pages)
+  DB80..DBFF - Private surrogate high     (128 pages)
+  DC00..DFFF - Surrogate low              (1024 codes in a page)
+*/
+#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
+#define MY_UTF16_SURROGATE_HIGH_LAST  0xDBFF
+#define MY_UTF16_SURROGATE_LOW_FIRST  0xDC00
+#define MY_UTF16_SURROGATE_LOW_LAST   0xDFFF
+
+#define MY_UTF16_HIGH_HEAD(x)      ((((uchar) (x)) & 0xFC) == 0xD8)
+#define MY_UTF16_LOW_HEAD(x)       ((((uchar) (x)) & 0xFC) == 0xDC)
+/* Test if a byte is a leading byte of a high or low surrogate head: */
+#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
+/* Test if a Unicode code point is a high or low surrogate head */
+#define MY_UTF16_SURROGATE(x)      (((x) & 0xF800) == 0xD800)
+
+#define MY_UTF16_WC2(a, b)         ((a << 8) + b)
+
+/*
+  a= 110110??  (<< 18)
+  b= ????????  (<< 10)
+  c= 110111??  (<<  8)
+  d= ????????  (<<  0)
+*/
+#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
+                                  ((c & 3) << 8) + d + 0x10000)
+
+static inline int
+my_mb_wc_utf16_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+  if (s + 2 > e)
+    return MY_CS_TOOSMALL2;
+
+  /*
+    High bytes: 0xD[89AB] = B'110110??'
+    Low bytes:  0xD[CDEF] = B'110111??'
+    Surrogate mask:  0xFC = B'11111100'
+  */
+
+  if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
+  {
+    if (s + 4 > e)
+      return MY_CS_TOOSMALL4;
+
+    if (!MY_UTF16_LOW_HEAD(s[2]))  /* Broken surrigate pair */
+      return MY_CS_ILSEQ;
+
+    *pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
+    return 4;
+  }
+
+  if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
+    return MY_CS_ILSEQ;
+
+  *pwc= MY_UTF16_WC2(s[0], s[1]);
+  return 2;
+}
+
+#endif /* _CTYPE_UTF16_H */
diff --git a/strings/ctype-utf32.h b/strings/ctype-utf32.h
new file mode 100644
index 00000000000..e295dc6d081
--- /dev/null
+++ b/strings/ctype-utf32.h
@@ -0,0 +1,33 @@
+/*
+  Copyright (c) 2018 MariaDB Corporation
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF32_H
+#define _CTYPE_UTF32_H
+
+#define MY_UTF32_WC4(b0,b1,b2,b3)  ((((my_wc_t)b0) << 24) + (b1 << 16) + \
+                                                (b2 << 8) + (b3))
+
+static inline int
+my_mb_wc_utf32_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+  if (s + 4 > e)
+    return MY_CS_TOOSMALL4;
+  *pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
+  return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
+}
+
+#endif /* _CTYPE_UTF32_H */
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 4ef376dccc8..4ddb086b734 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -26,78 +26,10 @@
 #define EILSEQ ENOENT
 #endif
 
-/* Detect special bytes and sequences */
-#define IS_CONTINUATION_BYTE(c)   (((uchar) (c) ^ 0x80) < 0x40)
 
-/*
-  Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
-  Use this macro if the caller already checked b0 for:
-  - an MB1 character
-  - an unused gap between MB1 and MB2HEAD
-*/
-#define IS_UTF8MB2_STEP2(b0,b1)     (((uchar) (b0) < 0xE0) && \
-                                     IS_CONTINUATION_BYTE((uchar) b1))
+#include "ctype-utf8.h"
+#include "ctype-unidata.h"
 
-/*
-  Check MB3 character assuming that b0 is already known to be
-  in the valid MB3HEAD range [0xE0..0xEF].
-*/
-#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
-                                    IS_CONTINUATION_BYTE(b2) && \
-                                    ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
-
-/*
-  Check MB3 character assuming that b0 is already known to be >= 0xE0,
-  but is not checked for the high end 0xF0 yet.
-  Use this macro if the caller already checked b0 for:
-  - an MB1 character
-  - an unused gap between MB1 and MB2HEAD
-  - an MB2HEAD
-*/
-#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
-                                    IS_UTF8MB3_STEP2(b0,b1,b2))
-
-/*
-  UTF-8 quick four-byte mask:
-  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-  Encoding allows to encode U+00010000..U+001FFFFF
-
-  The maximum character defined in the Unicode standard is U+0010FFFF.
-  Higher characters U+00110000..U+001FFFFF are not used.
-
-  11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
-  11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
-
-  Valid codes:
-  [F0][90..BF][80..BF][80..BF]
-  [F1][80..BF][80..BF][80..BF]
-  [F2][80..BF][80..BF][80..BF]
-  [F3][80..BF][80..BF][80..BF]
-  [F4][80..8F][80..BF][80..BF]
-*/
-
-/*
-  Check MB4 character assuming that b0 is already
-  known to be in the range [0xF0..0xF4]
-*/
-#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
-                                       IS_CONTINUATION_BYTE(b2) && \
-                                       IS_CONTINUATION_BYTE(b3) && \
-                                       (b0 >= 0xf1 || b1 >= 0x90) && \
-                                       (b0 <= 0xf3 || b1 <= 0x8F))
-#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
-                                       IS_UTF8MB4_STEP2(b0,b1,b2,b3))
-
-/* Convert individual bytes to Unicode code points */
-#define UTF8MB2_CODE(b0,b1)       (((my_wc_t) ((uchar) b0 & 0x1f) << 6)  |\
-                                   ((my_wc_t) ((uchar) b1 ^ 0x80)))
-#define UTF8MB3_CODE(b0,b1,b2)    (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
-                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 6)  |\
-                                   ((my_wc_t) ((uchar) b2 ^ 0x80)))
-#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
-                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
-                                   ((my_wc_t) ((uchar) b2 ^ 0x80) << 6)  |\
-                                    (my_wc_t) ((uchar) b3 ^ 0x80))
 
 /* Definitions for strcoll.ic */
 #define IS_MB1_CHAR(x)              ((uchar) (x) < 0x80)
@@ -180,7 +112,7 @@ int my_valid_mbcharlen_utf8mb3(const uchar *s, const uchar *e)
 
 #include "my_uctype.h"
 
-static MY_UNICASE_CHARACTER plane00[]={
+MY_UNICASE_CHARACTER my_unicase_default_page00[]={
   {0x0000,0x0000,0x0000},  {0x0001,0x0001,0x0001},
   {0x0002,0x0002,0x0002},  {0x0003,0x0003,0x0003},
   {0x0004,0x0004,0x0004},  {0x0005,0x0005,0x0005},
@@ -313,7 +245,7 @@ static MY_UNICASE_CHARACTER plane00[]={
 
 
 /*
-  Almost similar to plane00, but maps sorting order
+  Almost similar to my_unicase_default_page00, but maps sorting order
   for U+00DF to 0x00DF instead of 0x0053.
 */
 static MY_UNICASE_CHARACTER plane00_mysql500[]={
@@ -1759,9 +1691,10 @@ static MY_UNICASE_CHARACTER planeFF[]={
 };
 
 
-static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
+MY_UNICASE_CHARACTER *my_unicase_default_pages[256]=
 {
- plane00, plane01, plane02, plane03, plane04, plane05,    NULL,    NULL,
+    my_unicase_default_page00,
+             plane01, plane02, plane03, plane04, plane05, NULL,    NULL,
     NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
     NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,    NULL,
     NULL,    NULL,    NULL,    NULL,    NULL,    NULL, plane1E, plane1F,
@@ -1798,8 +1731,8 @@ static MY_UNICASE_CHARACTER *my_unicase_pages_default[256]=
 
 MY_UNICASE_INFO my_unicase_default=
 {
-  0xFFFF,
-  my_unicase_pages_default
+  MY_UNICASE_INFO_DEFAULT_MAXCHAR,
+  my_unicase_default_pages
 };
 
 
@@ -4646,7 +4579,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs,
   @return Result length
 */
 
-static size_t
+size_t
 my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights)
 {
   uchar *str0;
@@ -4675,7 +4608,7 @@ my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights)
   @return Result length
 */
 
-static size_t
+size_t
 my_strxfrm_pad_unicode(uchar *str, uchar *strend)
 {
   uchar *str0= str;
@@ -4690,95 +4623,6 @@ my_strxfrm_pad_unicode(uchar *str, uchar *strend)
 }
 
 
-size_t my_strnxfrm_unicode_internal(CHARSET_INFO *cs,
-                                    uchar *dst, uchar *de, uint *nweights,
-                                    const uchar *src, const uchar *se)
-{
-  my_wc_t UNINIT_VAR(wc);
-  int res;
-  uchar *dst0= dst;
-  MY_UNICASE_INFO *uni_plane= (cs->state & MY_CS_BINSORT) ?
-                               NULL : cs->caseinfo;
-
-  DBUG_ASSERT(src || !se);
-
-  for (; dst < de && *nweights; (*nweights)--)
-  {
-    if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0)
-      break;
-    src+= res;
-
-    if (uni_plane)
-      my_tosort_unicode(uni_plane, &wc, cs->state);
-
-    *dst++= (uchar) (wc >> 8);
-    if (dst < de)
-      *dst++= (uchar) (wc & 0xFF);
-  }
-  return dst - dst0;
-}
-
-
-/*
-  Store sorting weights using 2 bytes per character.
-
-  This function is shared between
-  - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
-    which support BMP only (U+0000..U+FFFF).
-  - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
-    which map all supplementary characters to weight 0xFFFD.
-*/
-size_t
-my_strnxfrm_unicode(CHARSET_INFO *cs,
-                    uchar *dst, size_t dstlen, uint nweights,
-                    const uchar *src, size_t srclen, uint flags)
-{
-  uchar *dst0= dst;
-  uchar *de= dst + dstlen;
-  dst+= my_strnxfrm_unicode_internal(cs, dst, de, &nweights,
-                                         src, src + srclen);
-  DBUG_ASSERT(dst <= de); /* Safety */
-
-  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
-    dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
-
-  my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
-
-  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
-    dst+= my_strxfrm_pad_unicode(dst, de);
-  return dst - dst0;
-}
-
-
-size_t
-my_strnxfrm_unicode_nopad(CHARSET_INFO *cs,
-                          uchar *dst, size_t dstlen, uint nweights,
-                          const uchar *src, size_t srclen, uint flags)
-{
-  uchar *dst0= dst;
-  uchar *de= dst + dstlen;
-  dst+= my_strnxfrm_unicode_internal(cs, dst, de, &nweights,
-                                         src, src + srclen);
-  DBUG_ASSERT(dst <= de); /* Safety */
-
-  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
-  {
-    size_t len= de - dst;
-    set_if_smaller(len, nweights * 2);
-    memset(dst, 0x00, len);
-    dst+= len;
-  }
-
-  my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
-
-  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
-  {
-    memset(dst, 0x00, de - dst);
-    dst= de;
-  }
-  return dst - dst0;
-}
-
 /*
   For BMP-only collations that use 2 bytes per weight.
 */
@@ -4977,42 +4821,7 @@ static const uchar to_upper_utf8[] = {
 static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
                        my_wc_t * pwc, const uchar *s, const uchar *e)
 {
-  uchar c;
-
-  if (s >= e)
-    return MY_CS_TOOSMALL;
-
-  c= s[0];
-  if (c < 0x80)
-  {
-    *pwc = c;
-    return 1;
-  }
-  else if (c < 0xc2)
-    return MY_CS_ILSEQ;
-  else if (c < 0xe0)
-  {
-    if (s+2 > e) /* We need 2 characters */
-      return MY_CS_TOOSMALL2;
-
-    if (!(IS_CONTINUATION_BYTE(s[1])))
-      return MY_CS_ILSEQ;
-
-    *pwc= UTF8MB2_CODE(c, s[1]);
-    return 2;
-  }
-  else if (c < 0xf0)
-  {
-    if (s+3 > e) /* We need 3 characters */
-      return MY_CS_TOOSMALL3;
-
-    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
-      return MY_CS_ILSEQ;
-
-    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
-    return 3;
-  }
-  return MY_CS_ILSEQ;
+  return my_mb_wc_utf8mb3_quick(pwc, s, e);
 }
 
 
@@ -5308,7 +5117,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
         It represents a single byte character.
         Convert it into weight according to collation.
       */
-      s_wc= plane00[(uchar) s[0]].tolower;
+      s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
       s++;
     }
     else
@@ -5350,7 +5159,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
     if ((uchar) t[0] < 128)
     {
       /* Convert single byte character into weight */
-      t_wc= plane00[(uchar) t[0]].tolower;
+      t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
       t++;
     }
     else
@@ -5413,14 +5222,14 @@ int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
 
 static inline int my_weight_mb1_utf8_general_ci(uchar b)
 {
-  return (int) plane00[b & 0xFF].sort;
+  return (int) my_unicase_default_page00[b & 0xFF].sort;
 }
 
 
 static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1)
 {
   my_wc_t wc= UTF8MB2_CODE(b0, b1);
-  MY_UNICASE_CHARACTER *page= my_unicase_pages_default[wc >> 8];
+  MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
   return (int) (page ? page[wc & 0xFF].sort : wc);
 }
 
@@ -5428,16 +5237,23 @@ static inline int my_weight_mb2_utf8_general_ci(uchar b0, uchar b1)
 static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2)
 {
   my_wc_t wc= UTF8MB3_CODE(b0, b1, b2);
-  MY_UNICASE_CHARACTER *page= my_unicase_pages_default[wc >> 8];
+  MY_UNICASE_CHARACTER *page= my_unicase_default_pages[wc >> 8];
   return (int) (page ? page[wc & 0xFF].sort : wc);
 }
 
 
-#define MY_FUNCTION_NAME(x)    my_ ## x ## _utf8_general_ci
-#define WEIGHT_ILSEQ(x)        (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB1(x)          my_weight_mb1_utf8_general_ci(x)
-#define WEIGHT_MB2(x,y)        my_weight_mb2_utf8_general_ci(x,y)
-#define WEIGHT_MB3(x,y,z)      my_weight_mb3_utf8_general_ci(x,y,z)
+#define MY_FUNCTION_NAME(x)      my_ ## x ## _utf8_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf8mb3_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           1
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            my_unicase_default_page00
+#define UNICASE_PAGES            my_unicase_default_pages
+#define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x)            my_weight_mb1_utf8_general_ci(x)
+#define WEIGHT_MB2(x,y)          my_weight_mb2_utf8_general_ci(x,y)
+#define WEIGHT_MB3(x,y,z)        my_weight_mb3_utf8_general_ci(x,y,z)
 #include "strcoll.ic"
 
 
@@ -5473,19 +5289,28 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
 }
 
 
-#define MY_FUNCTION_NAME(x)    my_ ## x ## _utf8_general_mysql500_ci
-#define WEIGHT_ILSEQ(x)        (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB1(x)          my_weight_mb1_utf8_general_mysql500_ci(x)
-#define WEIGHT_MB2(x,y)        my_weight_mb2_utf8_general_mysql500_ci(x,y)
-#define WEIGHT_MB3(x,y,z)      my_weight_mb3_utf8_general_mysql500_ci(x,y,z)
+#define MY_FUNCTION_NAME(x)      my_ ## x ## _utf8_general_mysql500_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf8mb3_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           1
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            plane00_mysql500
+#define UNICASE_PAGES            my_unicase_pages_mysql500
+#define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x)            my_weight_mb1_utf8_general_mysql500_ci(x)
+#define WEIGHT_MB2(x,y)          my_weight_mb2_utf8_general_mysql500_ci(x,y)
+#define WEIGHT_MB3(x,y,z)        my_weight_mb3_utf8_general_mysql500_ci(x,y,z)
 #include "strcoll.ic"
 
 
-#define MY_FUNCTION_NAME(x)    my_ ## x ## _utf8_bin
-#define WEIGHT_ILSEQ(x)        (0xFF0000 + (uchar) (x))
-#define WEIGHT_MB1(x)          ((int) (uchar) (x))
-#define WEIGHT_MB2(x,y)        ((int) UTF8MB2_CODE(x,y))
-#define WEIGHT_MB3(x,y,z)      ((int) UTF8MB3_CODE(x,y,z))
+#define MY_FUNCTION_NAME(x)      my_ ## x ## _utf8_bin
+#define DEFINE_STRNXFRM_UNICODE_BIN2
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf8mb3_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           1
+#define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x)            ((int) (uchar) (x))
+#define WEIGHT_MB2(x,y)          ((int) UTF8MB2_CODE(x,y))
+#define WEIGHT_MB3(x,y,z)        ((int) UTF8MB3_CODE(x,y,z))
 #include "strcoll.ic"
 
 
@@ -5534,7 +5359,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
     NULL,               /* init */
     my_strnncoll_utf8_general_ci,
     my_strnncollsp_utf8_general_ci,
-    my_strnxfrm_unicode,
+    my_strnxfrm_utf8_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
     my_wildcmp_utf8,
@@ -5550,7 +5375,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
     NULL,               /* init */
     my_strnncoll_utf8_general_mysql500_ci,
     my_strnncollsp_utf8_general_mysql500_ci,
-    my_strnxfrm_unicode,
+    my_strnxfrm_utf8_general_mysql500_ci,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
     my_wildcmp_utf8,
@@ -5566,7 +5391,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
     NULL,		/* init */
     my_strnncoll_utf8_bin,
     my_strnncollsp_utf8_bin,
-    my_strnxfrm_unicode,
+    my_strnxfrm_utf8_bin,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
     my_wildcmp_mb_bin,
@@ -5582,7 +5407,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
   NULL,               /* init */
   my_strnncoll_utf8_general_ci,
   my_strnncollsp_utf8_general_nopad_ci,
-  my_strnxfrm_unicode_nopad,
+  my_strnxfrm_nopad_utf8_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
   my_wildcmp_utf8,
@@ -5598,7 +5423,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler =
   NULL,		/* init */
   my_strnncoll_utf8_bin,
   my_strnncollsp_utf8_nopad_bin,
-  my_strnxfrm_unicode_nopad,
+  my_strnxfrm_nopad_utf8_bin,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
   my_wildcmp_mb_bin,
@@ -5927,7 +5752,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler =
     NULL,		/* init */
     my_strnncoll_utf8_cs,
     my_strnncollsp_utf8_cs,
-    my_strnxfrm_unicode,
+    my_strnxfrm_utf8_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_simple,
     my_wildcmp_mb,
@@ -7212,13 +7037,30 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
 #undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
 /* my_well_formed_char_length_filename */
 
+#define MY_FUNCTION_NAME(x)      my_ ## x ## _filename
+#define DEFINE_STRNNCOLL         0
+#define DEFINE_STRNXFRM_UNICODE
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_filename(cs, pwc, s, e)
+#define OPTIMIZE_ASCII           0
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            my_unicase_default_page00
+#define UNICASE_PAGES            my_unicase_default_pages
+
+/*
+#define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
+#define WEIGHT_MB1(x)            my_weight_mb1_utf8_general_ci(x)
+#define WEIGHT_MB2(x,y)          my_weight_mb2_utf8_general_ci(x,y)
+#define WEIGHT_MB3(x,y,z)        my_weight_mb3_utf8_general_ci(x,y,z)
+*/
+#include "strcoll.ic"
+
 
 static MY_COLLATION_HANDLER my_collation_filename_handler =
 {
     NULL,               /* init */
     my_strnncoll_simple,
     my_strnncollsp_simple,
-    my_strnxfrm_unicode,
+    my_strnxfrm_filename,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
     my_wildcmp_utf8,
@@ -7375,52 +7217,7 @@ static int
 my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
                  my_wc_t * pwc, const uchar *s, const uchar *e)
 {
-  uchar c;
-
-  if (s >= e)
-    return MY_CS_TOOSMALL;
-
-  c= s[0];
-  if (c < 0x80)
-  {
-    *pwc= c;
-    return 1;
-  }
-  else if (c < 0xc2)
-    return MY_CS_ILSEQ;
-  else if (c < 0xe0)
-  {
-    if (s + 2 > e) /* We need 2 characters */
-      return MY_CS_TOOSMALL2;
-
-    if (!(IS_CONTINUATION_BYTE(s[1])))
-      return MY_CS_ILSEQ;
-
-    *pwc= UTF8MB2_CODE(c, s[1]);
-    return 2;
-  }
-  else if (c < 0xf0)
-  {
-    if (s + 3 > e) /* We need 3 characters */
-      return MY_CS_TOOSMALL3;
-
-    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
-      return MY_CS_ILSEQ;
-
-    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
-    return 3;
-  }
-  else if (c < 0xf5)
-  {
-    if (s + 4 > e) /* We need 4 characters */
-      return MY_CS_TOOSMALL4;
-
-    if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
-      return MY_CS_ILSEQ;
-    *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
-    return 4;
-  }
-  return MY_CS_ILSEQ;
+  return my_mb_wc_utf8mb4_quick(pwc, s, e);
 }
 
 
@@ -7752,7 +7549,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
         It represents a single byte character.
         Convert it into weight according to collation.
       */
-      s_wc= plane00[(uchar) s[0]].tolower;
+      s_wc= my_unicase_default_page00[(uchar) s[0]].tolower;
       s++;
     }
     else
@@ -7776,7 +7573,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
     if ((uchar) t[0] < 128)
     {
       /* Convert single byte character into weight */
-      t_wc= plane00[(uchar) t[0]].tolower;
+      t_wc= my_unicase_default_page00[(uchar) t[0]].tolower;
       t++;
     }
     else
@@ -7847,6 +7644,13 @@ my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
 
 
 #define MY_FUNCTION_NAME(x)      my_ ## x ## _utf8mb4_general_ci
+#define DEFINE_STRNXFRM_UNICODE
+#define DEFINE_STRNXFRM_UNICODE_NOPAD
+#define MY_MB_WC(cs, pwc, s, e)  my_mb_wc_utf8mb4_quick(pwc, s, e)
+#define OPTIMIZE_ASCII           1
+#define UNICASE_MAXCHAR          MY_UNICASE_INFO_DEFAULT_MAXCHAR
+#define UNICASE_PAGE0            my_unicase_default_page00
+#define UNICASE_PAGES            my_unicase_default_pages
 #define IS_MB4_CHAR(b0,b1,b2,b3) IS_UTF8MB4_STEP3(b0,b1,b2,b3)
 #define WEIGHT_ILSEQ(x)          (0xFF0000 + (uchar) (x))
 #define WEIGHT_MB1(b0)           my_weight_mb1_utf8_general_ci(b0)
@@ -7897,7 +7701,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
   NULL,               /* init */
   my_strnncoll_utf8mb4_general_ci,
   my_strnncollsp_utf8mb4_general_ci,
-  my_strnxfrm_unicode,
+  my_strnxfrm_utf8mb4_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
   my_wildcmp_utf8mb4,
@@ -7929,7 +7733,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler=
   NULL,               /* init */
   my_strnncoll_utf8mb4_general_ci,
   my_strnncollsp_utf8mb4_general_nopad_ci,
-  my_strnxfrm_unicode_nopad,
+  my_strnxfrm_nopad_utf8mb4_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
   my_wildcmp_utf8mb4,
diff --git a/strings/ctype-utf8.h b/strings/ctype-utf8.h
new file mode 100644
index 00000000000..9a44c1658f2
--- /dev/null
+++ b/strings/ctype-utf8.h
@@ -0,0 +1,190 @@
+/*
+  Copyright (c) 2018 MariaDB Corporation
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef _CTYPE_UTF8_H
+#define _CTYPE_UTF8_H
+
+/* Detect special bytes and sequences */
+#define IS_CONTINUATION_BYTE(c)   (((uchar) (c) ^ 0x80) < 0x40)
+
+/*
+  Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
+  Use this macro if the caller already checked b0 for:
+  - an MB1 character
+  - an unused gap between MB1 and MB2HEAD
+*/
+#define IS_UTF8MB2_STEP2(b0,b1)     (((uchar) (b0) < 0xE0) && \
+                                     IS_CONTINUATION_BYTE((uchar) b1))
+
+/*
+  Check MB3 character assuming that b0 is already known to be
+  in the valid MB3HEAD range [0xE0..0xEF].
+*/
+#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
+                                    IS_CONTINUATION_BYTE(b2) && \
+                                    ((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
+
+/*
+  Check MB3 character assuming that b0 is already known to be >= 0xE0,
+  but is not checked for the high end 0xF0 yet.
+  Use this macro if the caller already checked b0 for:
+  - an MB1 character
+  - an unused gap between MB1 and MB2HEAD
+  - an MB2HEAD
+*/
+#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
+                                    IS_UTF8MB3_STEP2(b0,b1,b2))
+
+/*
+  UTF-8 quick four-byte mask:
+  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+  Encoding allows to encode U+00010000..U+001FFFFF
+
+  The maximum character defined in the Unicode standard is U+0010FFFF.
+  Higher characters U+00110000..U+001FFFFF are not used.
+
+  11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
+  11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
+
+  Valid codes:
+  [F0][90..BF][80..BF][80..BF]
+  [F1][80..BF][80..BF][80..BF]
+  [F2][80..BF][80..BF][80..BF]
+  [F3][80..BF][80..BF][80..BF]
+  [F4][80..8F][80..BF][80..BF]
+*/
+
+/*
+  Check MB4 character assuming that b0 is already
+  known to be in the range [0xF0..0xF4]
+*/
+#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
+                                       IS_CONTINUATION_BYTE(b2) && \
+                                       IS_CONTINUATION_BYTE(b3) && \
+                                       (b0 >= 0xf1 || b1 >= 0x90) && \
+                                       (b0 <= 0xf3 || b1 <= 0x8F))
+#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
+                                       IS_UTF8MB4_STEP2(b0,b1,b2,b3))
+
+/* Convert individual bytes to Unicode code points */
+#define UTF8MB2_CODE(b0,b1)       (((my_wc_t) ((uchar) b0 & 0x1f) << 6)  |\
+                                   ((my_wc_t) ((uchar) b1 ^ 0x80)))
+#define UTF8MB3_CODE(b0,b1,b2)    (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
+                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 6)  |\
+                                   ((my_wc_t) ((uchar) b2 ^ 0x80)))
+#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
+                                   ((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
+                                   ((my_wc_t) ((uchar) b2 ^ 0x80) << 6)  |\
+                                    (my_wc_t) ((uchar) b3 ^ 0x80))
+
+static inline int
+my_mb_wc_utf8mb3_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
+{
+  uchar c;
+
+  if (s >= e)
+    return MY_CS_TOOSMALL;
+
+  c= s[0];
+  if (c < 0x80)
+  {
+    *pwc = c;
+    return 1;
+  }
+  else if (c < 0xc2)
+    return MY_CS_ILSEQ;
+  else if (c < 0xe0)
+  {
+    if (s+2 > e) /* We need 2 characters */
+      return MY_CS_TOOSMALL2;
+
+    if (!(IS_CONTINUATION_BYTE(s[1])))
+      return MY_CS_ILSEQ;
+
+    *pwc= UTF8MB2_CODE(c, s[1]);
+    return 2;
+  }
+  else if (c < 0xf0)
+  {
+    if (s+3 > e) /* We need 3 characters */
+      return MY_CS_TOOSMALL3;
+
+    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
+      return MY_CS_ILSEQ;
+
+    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
+    return 3;
+  }
+  return MY_CS_ILSEQ;
+}
+
+
+#ifdef HAVE_CHARSET_utf8mb4
+static inline int
+my_mb_wc_utf8mb4_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
+{
+  uchar c;
+
+  if (s >= e)
+    return MY_CS_TOOSMALL;
+
+  c= s[0];
+  if (c < 0x80)
+  {
+    *pwc= c;
+    return 1;
+  }
+  else if (c < 0xc2)
+    return MY_CS_ILSEQ;
+  else if (c < 0xe0)
+  {
+    if (s + 2 > e) /* We need 2 characters */
+      return MY_CS_TOOSMALL2;
+
+    if (!(IS_CONTINUATION_BYTE(s[1])))
+      return MY_CS_ILSEQ;
+
+    *pwc= UTF8MB2_CODE(c, s[1]);
+    return 2;
+  }
+  else if (c < 0xf0)
+  {
+    if (s + 3 > e) /* We need 3 characters */
+      return MY_CS_TOOSMALL3;
+
+    if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
+      return MY_CS_ILSEQ;
+
+    *pwc= UTF8MB3_CODE(c, s[1], s[2]);
+    return 3;
+  }
+  else if (c < 0xf5)
+  {
+    if (s + 4 > e) /* We need 4 characters */
+      return MY_CS_TOOSMALL4;
+
+    if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
+      return MY_CS_ILSEQ;
+    *pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
+    return 4;
+  }
+  return MY_CS_ILSEQ;
+}
+#endif /* HAVE_CHARSET_utf8mb4*/
+
+
+#endif /* _CTYPE_UTF8_H */
diff --git a/strings/json_lib.c b/strings/json_lib.c
index 24c79cb9044..3763ac4ed54 100644
--- a/strings/json_lib.c
+++ b/strings/json_lib.c
@@ -1845,3 +1845,252 @@ int json_path_compare(const json_path_t *a, const json_path_t *b,
   return json_path_parts_compare(a->steps+1, a->last_step,
                                  b->steps+1, b->last_step, vt);
 }
+
+
+static enum json_types smart_read_value(json_engine_t *je,
+                                        const char **value, int *value_len)
+{
+  if (json_read_value(je))
+    goto err_return;
+
+  *value= (char *) je->value;
+
+  if (json_value_scalar(je))
+    *value_len= je->value_len;
+  else
+  {
+    if (json_skip_level(je))
+      goto err_return;
+
+    *value_len= (int) ((char *) je->s.c_str - *value);
+  }
+
+  return je->value_type;
+
+err_return:
+  return JSV_BAD_JSON;
+}
+
+
+enum json_types json_type(const char *js, const char *js_end,
+                          const char **value, int *value_len)
+{
+  json_engine_t je;
+
+  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+                  (const uchar *) js_end);
+
+  return smart_read_value(&je, value, value_len);
+}
+
+
+enum json_types json_get_array_item(const char *js, const char *js_end,
+                                    int n_item,
+                                    const char **value, int *value_len)
+{
+  json_engine_t je;
+  int c_item= 0;
+
+  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+                  (const uchar *) js_end);
+
+  if (json_read_value(&je) ||
+      je.value_type != JSON_VALUE_ARRAY)
+    goto err_return;
+
+  while (!json_scan_next(&je))
+  {
+    switch (je.state)
+    {
+    case JST_VALUE:
+      if (c_item == n_item)
+        return smart_read_value(&je, value, value_len);
+
+      if (json_skip_key(&je))
+        goto err_return;
+
+      c_item++;
+      break;
+
+    case JST_ARRAY_END:
+      *value= (const char *) (je.s.c_str - je.sav_c_len);
+      *value_len= c_item;
+      return JSV_NOTHING;
+    }
+  }
+
+err_return:
+  return JSV_BAD_JSON;
+}
+
+
+/** Simple json lookup for a value by the key.
+
+  Expects JSON object.
+  Only scans the 'first level' of the object, not
+  the nested structures.
+
+  @param js          [in]       json object to search in
+  @param js_end      [in]       end of json string
+  @param key         [in]       key to search for
+  @param key_end     [in]         - " -
+  @param value_start [out]      pointer into js (value or closing })
+  @param value_len   [out]      length of the value found or number of keys
+
+  @retval the type of the key value
+  @retval JSV_BAD_JSON - syntax error found reading JSON.
+                         or not JSON object.
+  @retval JSV_NOTHING - no such key found.
+*/
+enum json_types json_get_object_key(const char *js, const char *js_end,
+                                    const char *key,
+                                    const char **value, int *value_len)
+{
+  const char *key_end= key + strlen(key);
+  json_engine_t je;
+  json_string_t key_name;
+  int n_keys= 0;
+
+  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
+
+  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+                  (const uchar *) js_end);
+
+  if (json_read_value(&je) ||
+      je.value_type != JSON_VALUE_OBJECT)
+    goto err_return;
+
+  while (!json_scan_next(&je))
+  {
+    switch (je.state)
+    {
+    case JST_KEY:
+      n_keys++;
+      json_string_set_str(&key_name, (const uchar *) key,
+                          (const uchar *) key_end);
+      if (json_key_matches(&je, &key_name))
+        return smart_read_value(&je, value, value_len);
+
+      if (json_skip_key(&je))
+        goto err_return;
+
+      break;
+
+    case JST_OBJ_END:
+      *value= (const char *) (je.s.c_str - je.sav_c_len);
+      *value_len= n_keys;
+      return JSV_NOTHING;
+    }
+  }
+
+err_return:
+  return JSV_BAD_JSON;
+}
+
+
+enum json_types json_get_object_nkey(const char *js,const char *js_end, int nkey,
+                       const char **keyname, const char **keyname_end,
+                       const char **value, int *value_len)
+{
+  return JSV_NOTHING;
+}
+
+
+/** Check if json is valid (well-formed)
+
+  @retval 0 - success, json is well-formed
+  @retval 1 - error, json is invalid
+*/
+int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs)
+{
+  json_engine_t je;
+  json_scan_start(&je, cs, (const uchar *) js, (const uchar *) js + js_len);
+  while (json_scan_next(&je) == 0) /* no-op */ ;
+  return je.s.error == 0;
+}
+
+
+/*
+  Expects the JSON object as an js argument, and the key name.
+  Looks for this key in the object and returns
+  the location of all the text related to it.
+  The text includes the comma, separating this key.
+
+  comma_pos - the hint where the comma is. It is important
+       if you plan to replace the key rather than just cut.
+    1  - comma is on the left
+    2  - comma is on the right.
+    0  - no comma at all (the object has just this single key)
+ 
+  if no such key found *key_start is set to NULL.
+*/
+int json_locate_key(const char *js, const char *js_end,
+                    const char *kname,
+                    const char **key_start, const char **key_end,
+                    int *comma_pos)
+{
+  const char *kname_end= kname + strlen(kname);
+  json_engine_t je;
+  json_string_t key_name;
+  int t_next, c_len, match_result;
+
+  json_string_set_cs(&key_name, &my_charset_utf8mb4_bin);
+
+  json_scan_start(&je, &my_charset_utf8mb4_bin,(const uchar *) js,
+                  (const uchar *) js_end);
+
+  if (json_read_value(&je) ||
+      je.value_type != JSON_VALUE_OBJECT)
+    goto err_return;
+
+  *key_start= (const char *) je.s.c_str;
+  *comma_pos= 0;
+
+  while (!json_scan_next(&je))
+  {
+    switch (je.state)
+    {
+    case JST_KEY:
+      json_string_set_str(&key_name, (const uchar *) kname,
+                          (const uchar *) kname_end);
+      match_result= json_key_matches(&je, &key_name);
+      if (json_skip_key(&je))
+        goto err_return;
+      get_first_nonspace(&je.s, &t_next, &c_len);
+      je.s.c_str-= c_len;
+
+      if (match_result)
+      {
+        *key_end= (const char *) je.s.c_str;
+
+        if (*comma_pos == 1)
+          return 0;
+
+        DBUG_ASSERT(*comma_pos == 0);
+
+        if (t_next == C_COMMA)
+        {
+          *key_end+= c_len;
+          *comma_pos= 2;
+        }
+        else if (t_next == C_RCURB)
+          *comma_pos= 0;
+        else
+          goto err_return;
+        return 0;
+      }
+
+      *key_start= (const char *) je.s.c_str;
+      *comma_pos= 1;
+      break;
+
+    case JST_OBJ_END:
+      *key_start= NULL;
+      return 0;
+    }
+  }
+
+err_return:
+  return 1;
+
+}
diff --git a/strings/strcoll.ic b/strings/strcoll.ic
index c647a5ef57e..9dfccb9018c 100644
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -15,11 +15,18 @@
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
-
 #ifndef MY_FUNCTION_NAME
 #error MY_FUNCTION_NAME is not defined
 #endif
 
+/*
+  Define strnncoll() and strnncollsp() by default,
+  unless "#define DEFINE_STRNNCOLL 0" is specified.
+*/
+#ifndef DEFINE_STRNNCOLL
+#define DEFINE_STRNNCOLL 1
+#endif
+
 
 /*
   The weight for automatically padded spaces when comparing strings with
@@ -54,6 +61,8 @@
 #endif
 
 
+#if DEFINE_STRNNCOLL
+
 /**
   Scan a valid character, or a bad byte, or an auto-padded space
   from a string and calculate the weight of the scanned sequence.
@@ -278,6 +287,8 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
 }
 #endif
 
+#endif /* DEFINE_STRNNCOLL */
+
 
 #ifdef DEFINE_STRNXFRM
 #ifndef WEIGHT_MB2_FRM
@@ -322,11 +333,261 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
 #endif /* DEFINE_STRNXFRM */
 
 
+#if defined(DEFINE_STRNXFRM_UNICODE) || defined(DEFINE_STRNXFRM_UNICODE_NOPAD)
+
+/*
+  Store sorting weights using 2 bytes per character.
+
+  This function is shared between
+  - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
+    which support BMP only (U+0000..U+FFFF).
+  - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
+    which map all supplementary characters to weight 0xFFFD.
+*/
+
+#ifndef MY_MB_WC
+#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef OPTIMIZE_ASCII
+#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef UNICASE_MAXCHAR
+#error UNICASE_MAXCHAR must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef UNICASE_PAGE0
+#error UNICASE_PAGE0 must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+#ifndef UNICASE_PAGES
+#error UNICASE_PAGES must be defined for DEFINE_STRNXFRM_UNICODE
+#endif
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs,
+                                    uchar *dst, uchar *de,
+                                    uint *nweights,
+                                    const uchar *src, const uchar *se)
+{
+  my_wc_t UNINIT_VAR(wc);
+  uchar *dst0= dst;
+
+  DBUG_ASSERT(src || !se);
+  DBUG_ASSERT((cs->state & MY_CS_LOWER_SORT) == 0);
+  DBUG_ASSERT(0x7F <= UNICASE_MAXCHAR);
+
+  for (; dst < de && *nweights; (*nweights)--)
+  {
+    int res;
+#if OPTIMIZE_ASCII
+    if (src >= se)
+      break;
+    if (src[0] <= 0x7F)
+    {
+      wc= UNICASE_PAGE0[*src++].sort;
+      PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+      continue;
+    }
+#endif
+    if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
+      break;
+    src+= res;
+    if (wc <= UNICASE_MAXCHAR)
+    {
+      MY_UNICASE_CHARACTER *page;
+      if ((page= UNICASE_PAGES[wc >> 8]))
+        wc= page[wc & 0xFF].sort;
+    }
+    else
+      wc= MY_CS_REPLACEMENT_CHARACTER;
+    PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+  }
+  return dst - dst0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+                           uchar *dst, size_t dstlen, uint nweights,
+                           const uchar *src, size_t srclen, uint flags)
+{
+  uchar *dst0= dst;
+  uchar *de= dst + dstlen;
+  dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+                                            src, src + srclen);
+  DBUG_ASSERT(dst <= de); /* Safety */
+
+  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+    dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
+
+  my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+    dst+= my_strxfrm_pad_unicode(dst, de);
+  return dst - dst0;
+}
+
+
+#ifdef DEFINE_STRNXFRM_UNICODE_NOPAD
+static size_t
+MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
+                                 uchar *dst, size_t dstlen,
+                                 uint nweights,
+                                 const uchar *src, size_t srclen, uint flags)
+{
+  uchar *dst0= dst;
+  uchar *de= dst + dstlen;
+  dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+                                            src, src + srclen);
+  DBUG_ASSERT(dst <= de); /* Safety */
+
+  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+  {
+    size_t len= de - dst;
+    set_if_smaller(len, nweights * 2);
+    memset(dst, 0x00, len);
+    dst+= len;
+  }
+
+  my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+  {
+    memset(dst, 0x00, de - dst);
+    dst= de;
+  }
+  return dst - dst0;
+}
+#endif
+
+#endif /* DEFINE_STRNXFRM_UNICODE || DEFINE_STRNXFRM_UNICODE_NOPAD */
+
+
+
+#ifdef DEFINE_STRNXFRM_UNICODE_BIN2
+
+/*
+  Store sorting weights using 2 bytes per character.
+
+  These functions are shared between
+  - utf8mb3_general_ci, utf8_bin, ucs2_general_ci, ucs2_bin
+    which support BMP only (U+0000..U+FFFF).
+  - utf8mb4_general_ci, utf16_general_ci, utf32_general_ci,
+    which map all supplementary characters to weight 0xFFFD.
+*/
+
+#ifndef MY_MB_WC
+#error MY_MB_WC must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
+#endif
+
+#ifndef OPTIMIZE_ASCII
+#error OPTIMIZE_ASCII must be defined for DEFINE_STRNXFRM_UNICODE_BIN2
+#endif
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_internal)(CHARSET_INFO *cs,
+                                    uchar *dst, uchar *de,
+                                    uint *nweights,
+                                    const uchar *src,
+                                    const uchar *se)
+{
+  my_wc_t UNINIT_VAR(wc);
+  uchar *dst0= dst;
+
+  DBUG_ASSERT(src || !se);
+
+  for (; dst < de && *nweights; (*nweights)--)
+  {
+    int res;
+#if OPTIMIZE_ASCII
+    if (src >= se)
+      break;
+    if (src[0] <= 0x7F)
+    {
+      wc= *src++;
+      PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+      continue;
+    }
+#endif
+    if ((res= MY_MB_WC(cs, &wc, src, se)) <= 0)
+      break;
+    src+= res;
+    if (wc > 0xFFFF)
+      wc= MY_CS_REPLACEMENT_CHARACTER;
+    PUT_WC_BE2_HAVE_1BYTE(dst, de, wc);
+  }
+  return dst - dst0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
+                           uchar *dst, size_t dstlen, uint nweights,
+                           const uchar *src, size_t srclen, uint flags)
+{
+  uchar *dst0= dst;
+  uchar *de= dst + dstlen;
+  dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+                                            src, src + srclen);
+  DBUG_ASSERT(dst <= de); /* Safety */
+
+  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+    dst+= my_strxfrm_pad_nweights_unicode(dst, de, nweights);
+
+  my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+    dst+= my_strxfrm_pad_unicode(dst, de);
+  return dst - dst0;
+}
+
+
+static size_t
+MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
+                                 uchar *dst, size_t dstlen, uint nweights,
+                                 const uchar *src, size_t srclen, uint flags)
+{
+  uchar *dst0= dst;
+  uchar *de= dst + dstlen;
+  dst+= MY_FUNCTION_NAME(strnxfrm_internal)(cs, dst, de, &nweights,
+                                            src, src + srclen);
+  DBUG_ASSERT(dst <= de); /* Safety */
+
+  if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
+  {
+    size_t len= de - dst;
+    set_if_smaller(len, nweights * 2);
+    memset(dst, 0x00, len);
+    dst+= len;
+  }
+
+  my_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
+
+  if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
+  {
+    memset(dst, 0x00, de - dst);
+    dst= de;
+  }
+  return dst - dst0;
+}
+
+#endif /* DEFINE_STRNXFRM_UNICODE_BIN2 */
+
+
 /*
   We usually include this file at least two times from the same source file,
   for the _ci and the _bin collations. Prepare for the second inclusion.
 */
 #undef MY_FUNCTION_NAME
+#undef MY_MB_WC
+#undef OPTIMIZE_ASCII
+#undef UNICASE_MAXCHAR
+#undef UNICASE_PAGE0
+#undef UNICASE_PAGES
 #undef WEIGHT_ILSEQ
 #undef WEIGHT_MB1
 #undef WEIGHT_MB2
@@ -335,4 +596,8 @@ MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
 #undef WEIGHT_PAD_SPACE
 #undef WEIGHT_MB2_FRM
 #undef DEFINE_STRNXFRM
+#undef DEFINE_STRNXFRM_UNICODE
+#undef DEFINE_STRNXFRM_UNICODE_NOPAD
+#undef DEFINE_STRNXFRM_UNICODE_BIN2
+#undef DEFINE_STRNNCOLL
 #undef DEFINE_STRNNCOLLSP_NOPAD