MDEV-25904 New collation functions to compare InnoDB style trimmed NO PAD stringsbb-10.4-bar-MDEV-25904

author: Alexander Barkov <bar@mariadb.com> 2021-09-29 15:13:57 +0400
committer: Alexander Barkov <bar@mariadb.com> 2022-01-21 12:16:07 +0400
commit: b915f79e4e004fde4f6ac8f341afee980e11792b (patch)
tree: 2568032d75c7af9a72c6669b306fda4418b5ed20
parent: db574173d19731f1e5dc75d325f72398afac8d59 (diff)
download: mariadb-git-bb-10.4-bar-MDEV-25904.tar.gz
25 files changed, 1150 insertions, 144 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 0f6e6a11666..187c8710929 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -330,6 +330,60 @@ struct my_collation_handler_st
 		       const uchar *, size_t, const uchar *, size_t, my_bool);
   int     (*strnncollsp)(CHARSET_INFO *,
                          const uchar *, size_t, const uchar *, size_t);
+  /*
+    strnncollsp_nchars() - similar to strnncollsp() but assumes that both
+                           strings were originally CHAR(N) values with the
+                           same N, then were optionally space-padded,
+                           or optionally space-trimmed.
+
+                           In other words, this function compares in the way
+                           if we insert both values into a CHAR(N) column
+                           and then compare the two column values.
+
+    It compares the same amount of characters from the two strings.
+    This is especially important for NOPAD collations.
+
+    If CHAR_LENGTH of the two strings are different,
+    the shorter string is virtually padded with trailing spaces
+    up to CHAR_LENGTH of the longer string, to guarantee that the
+    same amount of characters are compared.
+    This is important if the two CHAR(N) strings are space-trimmed 
+    (e.g. like in InnoDB compact format for CHAR).
+
+    The function compares not more than "nchars" characters only.
+    This can be useful to compare CHAR(N) space-padded strings
+    (when the exact N is known) without having to truncate them before
+    the comparison.
+
+    For example, Field_string stores a "CHAR(3) CHARACTER SET utf8mb4" value
+    of "aaa" as 12 bytes in a record buffer:
+    - 3 bytes of the actual data, followed by
+    - 9 bytes of spaces (just fillers, not real data)
+    The caller can pass nchars=3 to compare CHAR(3) record values.
+    In such case, the comparator won't go inside the 9 bytes of the fillers.
+
+    If N is not known, the caller can pass max(len1,len2) as the "nchars" value
+    (i.e. the maximum of the OCTET_LENGTH of the two strings).
+
+    Notes on complex collations.
+
+    This function counts contraction parts as individual characters.
+    For example, the Czech letter 'ch' (in Czech collations)
+    is ordinarily counted by the "nchars" limit as TWO characters
+    (although it is only one letter).
+    This corresponds to what CHAR(N) does in INSERT.
+
+    If the "nchars" limit tears apart a contraction, only the part fitting
+    into "nchars" characters is used. For example, in case of a Czech collation,
+    the string "ach" with nchars=2 is compared as 'ac': the contraction
+    'ch' is torn apart and the letter 'c' acts as an individual character.
+    This emulates the same comparison result with the scenario when we insert
+    'ach' into a CHAR(2) column and then compare it.
+  */
+  int     (*strnncollsp_nchars)(CHARSET_INFO *,
+                                const uchar *str1, size_t len1,
+                                const uchar *str2, size_t len2,
+                                size_t nchars);
   size_t     (*strnxfrm)(CHARSET_INFO *,
                          uchar *dst, size_t dstlen, uint nweights,
                          const uchar *src, size_t srclen, uint flags);
diff --git a/sql/field.cc b/sql/field.cc
index 2226137b043..e3aa7d149a0 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -7433,23 +7433,10 @@ Field_string::compatible_field_size(uint field_metadata,
 
 int Field_string::cmp(const uchar *a_ptr, const uchar *b_ptr)
 {
-  size_t a_len, b_len;
-
-  if (field_charset->mbmaxlen != 1)
-  {
-    size_t char_len= field_length/field_charset->mbmaxlen;
-    a_len= my_charpos(field_charset, a_ptr, a_ptr + field_length, char_len);
-    b_len= my_charpos(field_charset, b_ptr, b_ptr + field_length, char_len);
-  }
-  else
-    a_len= b_len= field_length;
-  /*
-    We have to remove end space to be able to compare multi-byte-characters
-    like in latin_de 'ae' and 0xe4
-  */
-  return field_charset->coll->strnncollsp(field_charset,
-                                          a_ptr, a_len,
-                                          b_ptr, b_len);
+  return field_charset->coll->strnncollsp_nchars(field_charset,
+                                                 a_ptr, field_length,
+                                                 b_ptr, field_length,
+                                                 Field_string::char_length());
 }
 
 
@@ -7848,19 +7835,6 @@ int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr)
 }
 
 
-static int cmp_str_prefix(const uchar *ua, size_t alen, const uchar *ub,
-                          size_t blen, size_t prefix, CHARSET_INFO *cs)
-{
-  const char *a= (char*)ua, *b= (char*)ub;
-  MY_STRCOPY_STATUS status;
-  prefix/= cs->mbmaxlen;
-  alen= cs->cset->well_formed_char_length(cs, a, a + alen, prefix, &status);
-  blen= cs->cset->well_formed_char_length(cs, b, b + blen, prefix, &status);
-  return cs->coll->strnncollsp(cs, ua, alen, ub, blen);
-}
-
-
-
 int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
                                 size_t prefix_len)
 {
@@ -7880,8 +7854,12 @@ int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
     a_length= uint2korr(a_ptr);
     b_length= uint2korr(b_ptr);
   }
-  return cmp_str_prefix(a_ptr+length_bytes, a_length, b_ptr+length_bytes,
-                        b_length, prefix_len, field_charset);
+  return field_charset->coll->strnncollsp_nchars(field_charset,
+                                                 a_ptr + length_bytes,
+                                                 a_length,
+                                                 b_ptr + length_bytes,
+                                                 b_length,
+                                                 prefix_len / field_charset->mbmaxlen);
 }
 
 
@@ -8659,7 +8637,10 @@ int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
   memcpy(&blob1, a_ptr+packlength, sizeof(char*));
   memcpy(&blob2, b_ptr+packlength, sizeof(char*));
   size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr);
-  return cmp_str_prefix(blob1, a_len, blob2, b_len, prefix_len, field_charset);
+  return field_charset->coll->strnncollsp_nchars(field_charset,
+                                                 blob1, a_len,
+                                                 blob2, b_len,
+                                                 prefix_len / field_charset->mbmaxlen);
 }
 
 
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 3991a219ab5..fdaa34eeaf0 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6711,6 +6711,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
   NULL,			/* init */
   my_strnncoll_big5_chinese_ci,
   my_strnncollsp_big5_chinese_ci,
+  my_strnncollsp_nchars_big5_chinese_ci,
   my_strnxfrm_big5_chinese_ci,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -6727,6 +6728,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
   NULL,	                /* init */
   my_strnncoll_big5_bin,
   my_strnncollsp_big5_bin,
+  my_strnncollsp_nchars_big5_bin,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -6743,6 +6745,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_nopad_ci=
   NULL,			/* init */
   my_strnncoll_big5_chinese_ci,
   my_strnncollsp_big5_chinese_nopad_ci,
+  my_strnncollsp_nchars_big5_chinese_nopad_ci,
   my_strnxfrm_big5_chinese_nopad_ci,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -6759,6 +6762,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_nopad_bin=
   NULL,	                /* init */
   my_strnncoll_big5_bin,
   my_strnncollsp_big5_nopad_bin,
+  my_strnncollsp_nchars_big5_nopad_bin,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index bc0d794db3d..2893aadd99f 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -125,6 +125,17 @@ static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
 }
 
 
+static int my_strnncollsp_nchars_binary(CHARSET_INFO * cs __attribute__((unused)),
+                                        const uchar *s, size_t slen,
+                                        const uchar *t, size_t tlen,
+                                        size_t nchars)
+{
+  set_if_smaller(slen, nchars);
+  set_if_smaller(tlen, nchars);
+  return my_strnncoll_binary(cs, s, slen, t, tlen, 0);
+}
+
+
 static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
                                  const uchar *s, size_t slen,
                                  const uchar *t, size_t tlen,
@@ -199,6 +210,17 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
 }
 
 
+static int my_strnncollsp_nchars_8bit_bin(CHARSET_INFO * cs,
+                                          const uchar *a, size_t a_length,
+                                          const uchar *b, size_t b_length,
+                                          size_t nchars)
+{
+  set_if_smaller(a_length, nchars);
+  set_if_smaller(b_length, nchars);
+  return my_strnncollsp_8bit_bin(cs, a, a_length, b, b_length);
+}
+
+
 static int my_strnncollsp_8bit_nopad_bin(CHARSET_INFO * cs
                                          __attribute__((unused)),
                                          const uchar *a, size_t a_length,
@@ -487,6 +509,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
   my_coll_init_8bit_bin,
   my_strnncoll_8bit_bin,
   my_strnncollsp_8bit_bin,
+  my_strnncollsp_nchars_8bit_bin,
   my_strnxfrm_8bit_bin,
   my_strnxfrmlen_simple,
   my_like_range_simple,
@@ -503,6 +526,7 @@ MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler =
   my_coll_init_8bit_bin,
   my_strnncoll_8bit_bin,
   my_strnncollsp_8bit_nopad_bin,
+  my_strnncollsp_nchars_8bit_bin,
   my_strnxfrm_8bit_nopad_bin,
   my_strnxfrmlen_simple,
   my_like_range_simple,
@@ -519,6 +543,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
   NULL,			/* init */
   my_strnncoll_binary,
   my_strnncollsp_binary,
+  my_strnncollsp_nchars_binary,
   my_strnxfrm_8bit_bin,
   my_strnxfrmlen_simple,
   my_like_range_simple,
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index bf97d1feb83..94450af4b91 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -34667,6 +34667,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci=
   NULL,                  /* init */
   my_strnncoll_cp932_japanese_ci,
   my_strnncollsp_cp932_japanese_ci,
+  my_strnncollsp_nchars_cp932_japanese_ci,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -34683,6 +34684,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
   NULL,	                /* init */
   my_strnncoll_cp932_bin,
   my_strnncollsp_cp932_bin,
+  my_strnncollsp_nchars_cp932_bin,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -34699,6 +34701,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_nopad_ci=
   NULL,                  /* init */
   my_strnncoll_cp932_japanese_ci,
   my_strnncollsp_cp932_japanese_nopad_ci,
+  my_strnncollsp_nchars_cp932_japanese_nopad_ci,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -34715,6 +34718,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_nopad_bin=
   NULL,	                /* init */
   my_strnncoll_cp932_bin,
   my_strnncollsp_cp932_nopad_bin,
+  my_strnncollsp_nchars_cp932_nopad_bin,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index 17c4c98c24e..33d43d4dd4e 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -610,6 +610,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
   NULL,			/* init */
   my_strnncoll_czech,
   my_strnncollsp_czech,
+  my_strnncollsp_nchars_generic_8bit,
   my_strnxfrm_czech,
   my_strnxfrmlen_czech,
   my_like_range_czech,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index deb13957900..22f8c4ec7c0 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -9957,6 +9957,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci=
   NULL,                 /* init */
   my_strnncoll_euckr_korean_ci,
   my_strnncollsp_euckr_korean_ci,
+  my_strnncollsp_nchars_euckr_korean_ci,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -9973,6 +9974,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
   NULL,                 /* init */
   my_strnncoll_euckr_bin,
   my_strnncollsp_euckr_bin,
+  my_strnncollsp_nchars_euckr_bin,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -9989,6 +9991,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_nopad_ci=
   NULL,                 /* init */
   my_strnncoll_euckr_korean_ci,
   my_strnncollsp_euckr_korean_nopad_ci,
+  my_strnncollsp_nchars_euckr_korean_nopad_ci,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -10005,6 +10008,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_nopad_bin=
   NULL,                 /* init */
   my_strnncoll_euckr_bin,
   my_strnncollsp_euckr_nopad_bin,
+  my_strnncollsp_nchars_euckr_nopad_bin,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 118e8286703..58ea37d36e6 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -67495,6 +67495,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler =
     NULL,		/* init */
     my_strnncoll_eucjpms_japanese_ci,
     my_strnncollsp_eucjpms_japanese_ci,
+    my_strnncollsp_nchars_eucjpms_japanese_ci,
     my_strnxfrm_mb,	/* strnxfrm     */
     my_strnxfrmlen_simple,
     my_like_range_mb,   /* like_range   */
@@ -67511,6 +67512,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
     NULL,		/* init */
     my_strnncoll_eucjpms_bin,
     my_strnncollsp_eucjpms_bin,
+    my_strnncollsp_nchars_eucjpms_bin,
     my_strnxfrm_mb,
     my_strnxfrmlen_simple,
     my_like_range_mb,
@@ -67527,6 +67529,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_nopad_ci_handler =
     NULL,		/* init */
     my_strnncoll_eucjpms_japanese_ci,
     my_strnncollsp_eucjpms_japanese_nopad_ci,
+    my_strnncollsp_nchars_eucjpms_japanese_nopad_ci,
     my_strnxfrm_mb_nopad,	/* strnxfrm     */
     my_strnxfrmlen_simple,
     my_like_range_mb,   /* like_range   */
@@ -67543,6 +67546,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_nopad_bin_handler =
     NULL,		/* init */
     my_strnncoll_eucjpms_bin,
     my_strnncollsp_eucjpms_nopad_bin,
+    my_strnncollsp_nchars_eucjpms_nopad_bin,
     my_strnxfrm_mb_nopad,
     my_strnxfrmlen_simple,
     my_like_range_mb,
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 166619bf5cc..84246ad6671 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -6362,6 +6362,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci=
   NULL,                 /* init */
   my_strnncoll_gb2312_chinese_ci,
   my_strnncollsp_gb2312_chinese_ci,
+  my_strnncollsp_nchars_gb2312_chinese_ci,
   my_strnxfrm_mb,       /* strnxfrm   */
   my_strnxfrmlen_simple,
   my_like_range_mb,     /* like_range */
@@ -6378,6 +6379,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
   NULL,	                /* init */
   my_strnncoll_gb2312_bin,
   my_strnncollsp_gb2312_bin,
+  my_strnncollsp_nchars_gb2312_bin,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -6394,6 +6396,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_nopad_ci=
   NULL,                 /* init */
   my_strnncoll_gb2312_chinese_ci,
   my_strnncollsp_gb2312_chinese_nopad_ci,
+  my_strnncollsp_nchars_gb2312_chinese_nopad_ci,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -6410,6 +6413,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_nopad_bin=
   NULL,	                /* init */
   my_strnncoll_gb2312_bin,
   my_strnncollsp_gb2312_nopad_bin,
+  my_strnncollsp_nchars_gb2312_nopad_bin,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index efaa2e5c728..d7ea47c409f 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -10645,6 +10645,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
   NULL,                 /* init */
   my_strnncoll_gbk_chinese_ci,
   my_strnncollsp_gbk_chinese_ci,
+  my_strnncollsp_nchars_gbk_chinese_ci,
   my_strnxfrm_gbk_chinese_ci,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -10661,6 +10662,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
   NULL,                 /* init */
   my_strnncoll_gbk_bin,
   my_strnncollsp_gbk_bin,
+  my_strnncollsp_nchars_gbk_bin,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -10677,6 +10679,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_nopad_ci=
   NULL,                 /* init */
   my_strnncoll_gbk_chinese_ci,
   my_strnncollsp_gbk_chinese_nopad_ci,
+  my_strnncollsp_nchars_gbk_chinese_nopad_ci,
   my_strnxfrm_gbk_chinese_nopad_ci,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -10693,6 +10696,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_nopad_bin=
   NULL,                 /* init */
   my_strnncoll_gbk_bin,
   my_strnncollsp_gbk_nopad_bin,
+  my_strnncollsp_nchars_gbk_nopad_bin,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index f9fa1488aa6..bcf1cc6c9f1 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -726,6 +726,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
   NULL,			/* init */
   my_strnncoll_latin1_de,
   my_strnncollsp_latin1_de,
+  my_strnncollsp_nchars_generic_8bit,
   my_strnxfrm_latin1_de,
   my_strnxfrmlen_simple,
   my_like_range_simple,
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 9c6cb34137d..d150e457673 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -208,6 +208,18 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
 }
 
 
+static int
+my_strnncollsp_nchars_simple(CHARSET_INFO * cs,
+                             const uchar *a, size_t a_length,
+                             const uchar *b, size_t b_length,
+                             size_t nchars)
+{
+  set_if_smaller(a_length, nchars);
+  set_if_smaller(b_length, nchars);
+  return my_strnncollsp_simple(cs, a, a_length, b, b_length);
+}
+
+
 int my_strnncollsp_simple_nopad(CHARSET_INFO * cs,
                                 const uchar *a, size_t a_length,
                                 const uchar *b, size_t b_length)
@@ -2096,6 +2108,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
     my_coll_init_simple,	/* init */
     my_strnncoll_simple,
     my_strnncollsp_simple,
+    my_strnncollsp_nchars_simple,
     my_strnxfrm_simple,
     my_strnxfrmlen_simple,
     my_like_range_simple,
@@ -2112,6 +2125,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler =
     my_coll_init_simple,	/* init */
     my_strnncoll_simple,
     my_strnncollsp_simple_nopad,
+    my_strnncollsp_nchars_simple,
     my_strnxfrm_simple_nopad,
     my_strnxfrmlen_simple,
     my_like_range_simple,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 902034b435d..bd2bf432a34 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -34046,6 +34046,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
   NULL,                 /* init */
   my_strnncoll_sjis_japanese_ci,
   my_strnncollsp_sjis_japanese_ci,
+  my_strnncollsp_nchars_sjis_japanese_ci,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -34062,6 +34063,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
   NULL,                 /* init */
   my_strnncoll_sjis_bin,
   my_strnncollsp_sjis_bin,
+  my_strnncollsp_nchars_sjis_bin,
   my_strnxfrm_mb,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -34078,6 +34080,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_nopad_ci=
   NULL,                 /* init */
   my_strnncoll_sjis_japanese_ci,
   my_strnncollsp_sjis_japanese_nopad_ci,
+  my_strnncollsp_nchars_sjis_japanese_nopad_ci,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
@@ -34094,6 +34097,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_nopad_bin=
   NULL,                 /* init */
   my_strnncoll_sjis_bin,
   my_strnncollsp_sjis_nopad_bin,
+  my_strnncollsp_nchars_sjis_nopad_bin,
   my_strnxfrm_mb_nopad,
   my_strnxfrmlen_simple,
   my_like_range_mb,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 9760ea25162..d5367393c86 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -852,6 +852,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
     NULL,		/* init */
     my_strnncoll_tis620,
     my_strnncollsp_tis620,
+    my_strnncollsp_nchars_generic_8bit,
     my_strnxfrm_tis620,
     my_strnxfrmlen_simple,
     my_like_range_simple,
@@ -867,6 +868,7 @@ static MY_COLLATION_HANDLER my_collation_nopad_ci_handler =
     NULL,		/* init */
     my_strnncoll_tis620,
     my_strnncollsp_tis620_nopad,
+    my_strnncollsp_nchars_generic_8bit,
     my_strnxfrm_tis620_nopad,
     my_strnxfrmlen_simple,
     my_like_range_simple,
diff --git a/strings/ctype-uca-scanner_next.inl b/strings/ctype-uca-scanner_next.inl
new file mode 100644
index 00000000000..79d25487b42
--- /dev/null
+++ b/strings/ctype-uca-scanner_next.inl
@@ -0,0 +1,179 @@
+/* Copyright (c) 2004, 2013, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2021, MariaDB   
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public
+   License as published by the Free Software Foundation; version 2
+   of the License.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with this library; if not, write to the Free
+   Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
+   MA 02110-1335  USA */
+
+
+#ifdef SCANNER_NEXT_NCHARS
+
+#define SCANNER_NEXT_RETURN(_w,_n) \
+  do { weight_and_nchars_t rc= {_w, _n}; return rc; } while(0)
+
+#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \
+  do { \
+    weight_and_nchars_t rc= { _cnt->weight[0], \
+                              _ignorable_nchars + \
+                              my_contraction_char_length(_cnt) }; \
+     return rc; \
+  } while(0)
+
+#else
+
+#define SCANNER_NEXT_RETURN(_w,_n) do { return _w; } while (0)
+
+#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \
+  do { return _cnt->weight[0]; } while(0)
+
+#endif
+
+static inline
+#ifdef SCANNER_NEXT_NCHARS
+weight_and_nchars_t
+MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner,
+                                           size_t nchars)
+#else
+int
+MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
+#endif
+{
+#ifdef SCANNER_NEXT_NCHARS
+  uint ignorable_nchars;
+#define LOCAL_MAX_CONTRACTION_LENGTH nchars
+#else
+#define LOCAL_MAX_CONTRACTION_LENGTH MY_UCA_MAX_CONTRACTION
+#endif
+  /*
+    Check if the weights for the previous character have been
+    already fully scanned. If yes, then get the next character and
+    initialize wbeg and wlength to its weight string.
+  */
+
+  if (scanner->wbeg[0])
+  {
+    /*
+      More weights left from the previous step.
+      Return the next weight from the current expansion.
+      Return "0" as "nchars". The real nchars was set on a previous
+      iteration.
+    */
+    SCANNER_NEXT_RETURN(*scanner->wbeg++, 0);
+  }
+
+#ifdef SCANNER_NEXT_NCHARS
+  for (ignorable_nchars= 0 ; ; ignorable_nchars++)
+#else
+  for ( ; ; )
+#endif
+  {
+    const uint16 *wpage;
+    my_wc_t wc[MY_UCA_MAX_CONTRACTION];
+    int mblen;
+
+    /* Get next character */
+#if MY_UCA_ASCII_OPTIMIZE
+    /* Get next ASCII character */
+    if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
+    {
+      wc[0]= scanner->sbeg[0];
+      scanner->sbeg+= 1;
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+      if (my_uca_needs_context_handling(scanner->level, wc[0]))
+      {
+        const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
+                                                  LOCAL_MAX_CONTRACTION_LENGTH);
+        if (cnt)
+          SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+      }
+#endif
+
+      scanner->page= 0;
+      scanner->code= (int) wc[0];
+      scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
+      if (scanner->wbeg[0])
+        SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
+      continue;
+    }
+    else
+#endif
+    /* Get next MB character */
+    if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
+                                       scanner->send)) <= 0))
+    {
+      if (scanner->sbeg >= scanner->send)
+      {
+        /* No more bytes, end of line reached */
+        SCANNER_NEXT_RETURN(-1, ignorable_nchars);
+      }
+      /*
+        There are some more bytes left. Non-positive mb_len means that
+        we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
+      */
+      if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
+      {
+        /* For safety purposes don't go beyond the string range. */
+        scanner->sbeg= scanner->send;
+      }
+      /*
+        Treat every complete or incomplete mbminlen unit as a weight which is
+        greater than weight for any possible normal character.
+        0xFFFF is greater than any possible weight in the UCA weight table.
+      */
+      SCANNER_NEXT_RETURN(0xFFFF, ignorable_nchars + 1);
+    }
+
+    scanner->sbeg+= mblen;
+    if (wc[0] > scanner->level->maxchar)
+    {
+      /* Return 0xFFFD as weight for all characters outside BMP */
+      scanner->wbeg= nochar;
+      SCANNER_NEXT_RETURN(0xFFFD, ignorable_nchars + 1);
+    }
+
+#if MY_UCA_COMPILE_CONTRACTIONS
+    if (my_uca_needs_context_handling(scanner->level, wc[0]))
+    {
+      const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
+                                                LOCAL_MAX_CONTRACTION_LENGTH);
+      if (cnt)
+        SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+    }
+#endif
+
+    /* Process single character */
+    scanner->page= wc[0] >> 8;
+    scanner->code= wc[0] & 0xFF;
+
+    /* If weight page for w[0] does not exist, then calculate algoritmically */
+    if (!(wpage= scanner->level->weights[scanner->page]))
+      SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner),
+                          ignorable_nchars + 1);
+
+    /* Calculate pointer to w[0]'s weight, using page and offset */
+    scanner->wbeg= wpage +
+                   scanner->code * scanner->level->lengths[scanner->page];
+    if (scanner->wbeg[0])
+      break;
+    /* Skip ignorable character and continue the loop */
+  }
+
+  SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
+}
+
+#undef SCANNER_NEXT_NCHARS
+#undef SCANNER_NEXT_RETURN
+#undef SCANNER_NEXT_RETURN_CONTRACTION
+#undef LOCAL_MAX_CONTRACTION_LENGTH
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index 161830088a5..551efd8b0be 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -35,6 +35,12 @@
 #include "strings_def.h"
 #include <m_ctype.h>
 
+typedef struct
+{
+  int weight;
+  uint nchars;
+} weight_and_nchars_t;
+
 #define  MY_CS_COMMON_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NON1TO1)
 
 #define MY_UCA_CNT_FLAG_SIZE 4096
@@ -31450,6 +31456,21 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
 }
 
 
+/*
+  Return the number of characters in a contraction.
+*/
+static inline uint my_contraction_char_length(const MY_CONTRACTION *cnt)
+{
+  uint i;
+  for (i= 2; i < array_elements(cnt->ch); i++)
+  {
+    if (cnt->ch[i] == 0)
+      return i;
+  }
+  return array_elements(cnt->ch);
+}
+
+
 /**
   Check if a string is a contraction,
   and return its weight array on success.
@@ -31487,8 +31508,9 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
   a contraction part. Then try to find real contraction among the
   candidates, starting from the longest.
 
-  @param scanner  Pointer to UCA scanner
-  @param[OUT] *wc Where to store the scanned string
+  @param scanner         Pointer to UCA scanner
+  @param[OUT] *wc        Where to store the scanned string
+  @param max_char_length The longest contraction character length allowed
 
   @return         Weight array
   @retval         NULL - no contraction found
@@ -31496,7 +31518,8 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
 */
 
 static const MY_CONTRACTION *
-my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
+my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc,
+                                size_t max_char_length)
 {
   size_t clen= 1;
   int flag;
@@ -31505,7 +31528,7 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
 
   /* Scan all contraction candidates */
   for (s= scanner->sbeg, flag= MY_UCA_CNT_MID1;
-       clen < MY_UCA_MAX_CONTRACTION;
+       clen < max_char_length;
        flag<<= 1)
   {
     int mblen;
@@ -31582,11 +31605,14 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
                    If wc[0] and the previous character make a previous context
                    pair, then wc[1] is set to the previous character.
 
+  @param max_char_length - the longest contraction character length allowed.
+
   @retval          NULL if could not find any contextual weights for wc[0]
   @retval          non null pointer - the address of MY_CONTRACTION found
 */
 static inline const MY_CONTRACTION *
-my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
+my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc,
+                           size_t max_char_length)
 {
   const MY_CONTRACTION *cnt;
   DBUG_ASSERT(scanner->level->contractions.nitems);
@@ -31614,7 +31640,7 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
                                           wc[0]))
   {
     /* Check if w[0] starts a contraction */
-    if ((cnt= my_uca_scanner_contraction_find(scanner, wc)))
+    if ((cnt= my_uca_scanner_contraction_find(scanner, wc, max_char_length)))
       return cnt;
   }
   return NULL;
diff --git a/strings/ctype-uca.ic b/strings/ctype-uca.ic
index bb0eee85886..7c9d34d217e 100644
--- a/strings/ctype-uca.ic
+++ b/strings/ctype-uca.ic
@@ -35,108 +35,9 @@
 #error MY_UCA_COLL_INIT is not defined
 #endif
 
-
-static inline int
-MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
-{
-  /*
-    Check if the weights for the previous character have been
-    already fully scanned. If yes, then get the next character and
-    initialize wbeg and wlength to its weight string.
-  */
-
-  if (scanner->wbeg[0])      /* More weights left from the previous step: */
-    return *scanner->wbeg++; /* return the next weight from expansion     */
-
-  do
-  {
-    const uint16 *wpage;
-    my_wc_t wc[MY_UCA_MAX_CONTRACTION];
-    int mblen;
-
-    /* Get next character */
-#if MY_UCA_ASCII_OPTIMIZE
-    /* Get next ASCII character */
-    if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
-    {
-      wc[0]= scanner->sbeg[0];
-      scanner->sbeg+= 1;
-
-#if MY_UCA_COMPILE_CONTRACTIONS
-      if (my_uca_needs_context_handling(scanner->level, wc[0]))
-      {
-        const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc);
-        if (cnt)
-          return cnt->weight[0];
-      }
-#endif
-
-      scanner->page= 0;
-      scanner->code= (int) wc[0];
-      scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
-      if (scanner->wbeg[0])
-        return *scanner->wbeg++;
-      continue;
-    }
-    else
-#endif
-    /* Get next MB character */
-    if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
-                                       scanner->send)) <= 0))
-    {
-      if (scanner->sbeg >= scanner->send)
-        return -1; /* No more bytes, end of line reached */
-      /*
-        There are some more bytes left. Non-positive mb_len means that
-        we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
-      */
-      if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
-      {
-        /* For safety purposes don't go beyond the string range. */
-        scanner->sbeg= scanner->send;
-      }
-      /*
-        Treat every complete or incomplete mbminlen unit as a weight which is
-        greater than weight for any possible normal character.
-        0xFFFF is greater than any possible weight in the UCA weight table.
-      */
-      return 0xFFFF;
-    }
-
-    scanner->sbeg+= mblen;
-    if (wc[0] > scanner->level->maxchar)
-    {
-      /* Return 0xFFFD as weight for all characters outside BMP */
-      scanner->wbeg= nochar;
-      return 0xFFFD;
-    }
-
-#if MY_UCA_COMPILE_CONTRACTIONS
-    if (my_uca_needs_context_handling(scanner->level, wc[0]))
-    {
-      const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc);
-      if (cnt)
-        return cnt->weight[0];
-    }
-#endif
-
-    /* Process single character */
-    scanner->page= wc[0] >> 8;
-    scanner->code= wc[0] & 0xFF;
-
-    /* If weight page for w[0] does not exist, then calculate algoritmically */
-    if (!(wpage= scanner->level->weights[scanner->page]))
-      return my_uca_scanner_next_implicit(scanner);
-
-    /* Calculate pointer to w[0]'s weight, using page and offset */
-    scanner->wbeg= wpage +
-                   scanner->code * scanner->level->lengths[scanner->page];
-  } while (!scanner->wbeg[0]); /* Skip ignorable characters */
-
-  return *scanner->wbeg++;
-}
-
-
+#include "ctype-uca-scanner_next.inl"
+#define SCANNER_NEXT_NCHARS
+#include "ctype-uca-scanner_next.inl"
 
 /*
   Compares two strings according to the collation
@@ -409,6 +310,173 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
 }
 
 
+/*
+  Scan the next weight and perform space padding
+  or trimming according to "nchars".
+*/
+static inline weight_and_nchars_t
+MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
+                                        size_t nchars,
+                                        uint *generated)
+{
+  weight_and_nchars_t res;
+  if (nchars > 0 ||
+      scanner->wbeg[0] /* Some weights from a previous expansion left */)
+  {
+    if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner,
+                                                         nchars)).weight < 0)
+    {
+      /*
+        We reached the end of the string, but the caller wants more weights.
+        Perform space padding.
+      */
+      res.weight= my_space_weight(scanner->level);
+      res.nchars= 1;
+      (*generated)++;
+    }
+    else if (res.nchars > nchars)
+    {
+      /*
+        We scanned the next collation element, but it does not fit into
+        the "nchars" limit. This is possible in case of:
+        - A contraction, e.g. Czech 'ch' with nchars=1
+        - A sequence of ignorable characters followed by non-ignorable ones,
+          e.g. CONCAT(x'00','a') with nchars=1.
+        Perform trimming.
+      */
+      res.weight= scanner->cs->state & MY_CS_NOPAD ?
+                  0 : my_space_weight(scanner->level);
+      res.nchars= (uint) nchars;
+      (*generated)++;
+    }
+  }
+  else
+  {
+    /* The caller wants nchars==0. Perform trimming. */
+    res.weight= scanner->cs->state & MY_CS_NOPAD ?
+                0 : my_space_weight(scanner->level);
+    res.nchars= 0;
+    (*generated)++;
+  }
+  return res;
+}
+
+
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
+                                              const MY_UCA_WEIGHT_LEVEL *level,
+                                              const uchar *s, size_t slen,
+                                              const uchar *t, size_t tlen,
+                                              size_t nchars)
+{
+  my_uca_scanner sscanner;
+  my_uca_scanner tscanner;
+  size_t s_nchars_left= nchars;
+  size_t t_nchars_left= nchars;
+
+  my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
+  my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
+
+  for ( ; ; )
+  {
+    weight_and_nchars_t s_res;
+    weight_and_nchars_t t_res;
+    uint generated= 0;
+    int diff;
+
+    s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left,
+                                                   &generated);
+    t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left,
+                                                   &generated);
+    if ((diff= (s_res.weight - t_res.weight)))
+      return diff;
+
+    if (generated == 2)
+    {
+      if (cs->state & MY_CS_NOPAD)
+      {
+        /*
+          Both values are auto-generated. There's no real data any more.
+          We need to handle the remaining virtual trailing spaces.
+          The two strings still have s_nchars_left and t_nchars_left imaginary
+          trailing spaces at the end. If s_nchars_left != t_nchars_left,
+          the strings will be not equal in case of a NOPAD collation.
+
+          Example:
+          "B" is German "U+00DF LATIN SMALL LETTER SHARP S"
+          When we have these values in a
+          CHAR(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_nopad_ci
+          column:
+          'B  '        (one character, two trailing spaces)
+          'ss '        (two characters, one trailing space)
+          The 'B  ' is greater than the 'ss '.
+          They are compared in the following steps:
+            1. 'B' == 'ss'
+            2. ' ' == ' '
+            3. ' ' >   ''
+
+          We need to emulate the same behavior in this function even if
+          it's called with strings 'B' and 'ss' (with space trimmed).
+          The side which has more remaining virtual spaces at the end
+          is greater.
+        */
+        if (s_nchars_left < t_nchars_left)
+          return -1;
+        if (s_nchars_left > t_nchars_left)
+          return +1;
+      }
+      return 0;
+    }
+
+    DBUG_ASSERT(s_nchars_left >= s_res.nchars);
+    DBUG_ASSERT(t_nchars_left >= t_res.nchars);
+    s_nchars_left-= s_res.nchars;
+    t_nchars_left-= t_res.nchars;
+  }
+
+  return 0;
+}
+
+
+/*
+  One-level collations.
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs,
+                                     const uchar *s, size_t slen,
+                                     const uchar *t, size_t tlen,
+                                     size_t nchars)
+{
+  return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0],
+                                                       s, slen, t, tlen,
+                                                       nchars);
+}
+
+
+/*
+  Multi-level collations.
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs,
+                                                const uchar *s, size_t slen,
+                                                const uchar *t, size_t tlen,
+                                                size_t nchars)
+{
+  uint num_level= cs->levels_for_order;
+  uint i;
+  for (i= 0; i != num_level; i++)
+  {
+    int ret= MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs,
+                                                           &cs->uca->level[i],
+                                                           s, slen,
+                                                           t, tlen,
+                                                           nchars);
+    if (ret)
+       return ret;
+  }
+  return 0;
+}
+
 
 /*
   Calculates hash value for the given string,
@@ -752,6 +820,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
   MY_UCA_COLL_INIT,
   MY_FUNCTION_NAME(strnncoll),
   MY_FUNCTION_NAME(strnncollsp),
+  MY_FUNCTION_NAME(strnncollsp_nchars),
   MY_FUNCTION_NAME(strnxfrm),
   my_strnxfrmlen_any_uca,
   MY_LIKE_RANGE,
@@ -773,6 +842,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
   MY_UCA_COLL_INIT,
   MY_FUNCTION_NAME(strnncoll),
   MY_FUNCTION_NAME(strnncollsp_nopad),
+  MY_FUNCTION_NAME(strnncollsp_nchars),
   MY_FUNCTION_NAME(strnxfrm_nopad),
   my_strnxfrmlen_any_uca,
   MY_LIKE_RANGE,    /* my_like_range_mb or my_like_range_generic */
@@ -792,6 +862,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
   MY_UCA_COLL_INIT,
   MY_FUNCTION_NAME(strnncoll_multilevel),
   MY_FUNCTION_NAME(strnncollsp_multilevel),
+  MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
   MY_FUNCTION_NAME(strnxfrm_multilevel),
   my_strnxfrmlen_any_uca_multilevel,
   MY_LIKE_RANGE,
@@ -811,6 +882,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
   MY_UCA_COLL_INIT,
   MY_FUNCTION_NAME(strnncoll_multilevel),
   MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
+  MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
   MY_FUNCTION_NAME(strnxfrm_multilevel),
   my_strnxfrmlen_any_uca_multilevel,
   MY_LIKE_RANGE,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 0c153793e8e..36ab6f5c0b1 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1505,6 +1505,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16_general_ci,
   my_strnncollsp_utf16_general_ci,
+  my_strnncollsp_nchars_utf16_general_ci,
   my_strnxfrm_utf16_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
@@ -1521,6 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
   NULL,                /* init */
   my_strnncoll_utf16_bin,
   my_strnncollsp_utf16_bin,
+  my_strnncollsp_nchars_utf16_bin,
   my_strnxfrm_unicode_full_bin,
   my_strnxfrmlen_unicode_full_bin,
   my_like_range_generic,
@@ -1537,6 +1539,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16_general_ci,
   my_strnncollsp_utf16_general_nopad_ci,
+  my_strnncollsp_nchars_utf16_general_nopad_ci,
   my_strnxfrm_nopad_utf16_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
@@ -1553,6 +1556,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
   NULL,                /* init */
   my_strnncoll_utf16_bin,
   my_strnncollsp_utf16_nopad_bin,
+  my_strnncollsp_nchars_utf16_nopad_bin,
   my_strnxfrm_unicode_full_nopad_bin,
   my_strnxfrmlen_unicode_full_bin,
   my_like_range_generic,
@@ -1845,6 +1849,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16le_general_ci,
   my_strnncollsp_utf16le_general_ci,
+  my_strnncollsp_nchars_utf16le_general_ci,
   my_strnxfrm_utf16le_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
@@ -1861,6 +1866,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
   NULL,                /* init */
   my_strnncoll_utf16le_bin,
   my_strnncollsp_utf16le_bin,
+  my_strnncollsp_nchars_utf16le_bin,
   my_strnxfrm_unicode_full_bin,
   my_strnxfrmlen_unicode_full_bin,
   my_like_range_generic,
@@ -1877,6 +1883,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
   NULL,                /* init */
   my_strnncoll_utf16le_general_ci,
   my_strnncollsp_utf16le_general_nopad_ci,
+  my_strnncollsp_nchars_utf16le_general_nopad_ci,
   my_strnxfrm_nopad_utf16le_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
@@ -1893,6 +1900,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
   NULL,                /* init */
   my_strnncoll_utf16le_bin,
   my_strnncollsp_utf16le_nopad_bin,
+  my_strnncollsp_nchars_utf16le_nopad_bin,
   my_strnxfrm_unicode_full_nopad_bin,
   my_strnxfrmlen_unicode_full_bin,
   my_like_range_generic,
@@ -2671,6 +2679,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
   NULL, /* init */
   my_strnncoll_utf32_general_ci,
   my_strnncollsp_utf32_general_ci,
+  my_strnncollsp_nchars_utf32_general_ci,
   my_strnxfrm_utf32_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
@@ -2687,6 +2696,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
   NULL, /* init */
   my_strnncoll_utf32_bin,
   my_strnncollsp_utf32_bin,
+  my_strnncollsp_nchars_utf32_bin,
   my_strnxfrm_unicode_full_bin,
   my_strnxfrmlen_unicode_full_bin,
   my_like_range_generic,
@@ -2703,6 +2713,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
   NULL, /* init */
   my_strnncoll_utf32_general_ci,
   my_strnncollsp_utf32_general_nopad_ci,
+  my_strnncollsp_nchars_utf32_general_nopad_ci,
   my_strnxfrm_nopad_utf32_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_generic,
@@ -2719,6 +2730,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
   NULL, /* init */
   my_strnncoll_utf32_bin,
   my_strnncollsp_utf32_nopad_bin,
+  my_strnncollsp_nchars_utf32_nopad_bin,
   my_strnxfrm_unicode_full_nopad_bin,
   my_strnxfrmlen_unicode_full_bin,
   my_like_range_generic,
@@ -3261,6 +3273,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_general_ci,
     my_strnncollsp_ucs2_general_ci,
+    my_strnncollsp_nchars_ucs2_general_ci,
     my_strnxfrm_ucs2_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
@@ -3277,6 +3290,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_bin,
     my_strnncollsp_ucs2_bin,
+    my_strnncollsp_nchars_ucs2_bin,
     my_strnxfrm_ucs2_bin,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
@@ -3293,6 +3307,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_general_ci,
     my_strnncollsp_ucs2_general_nopad_ci,
+    my_strnncollsp_nchars_ucs2_general_nopad_ci,
     my_strnxfrm_nopad_ucs2_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
@@ -3309,6 +3324,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
     NULL,		/* init */
     my_strnncoll_ucs2_bin,
     my_strnncollsp_ucs2_nopad_bin,
+    my_strnncollsp_nchars_ucs2_nopad_bin,
     my_strnxfrm_nopad_ucs2_bin,
     my_strnxfrmlen_unicode,
     my_like_range_generic,
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 949f3aadc36..34600eda1a5 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -67239,6 +67239,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler =
     NULL,		/* init */
     my_strnncoll_ujis_japanese_ci,
     my_strnncollsp_ujis_japanese_ci,
+    my_strnncollsp_nchars_ujis_japanese_ci,
     my_strnxfrm_mb,     /* strnxfrm     */
     my_strnxfrmlen_simple,
     my_like_range_mb,   /* like_range   */
@@ -67255,6 +67256,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
     NULL,                    /* init */
     my_strnncoll_ujis_bin,
     my_strnncollsp_ujis_bin,
+    my_strnncollsp_nchars_ujis_bin,
     my_strnxfrm_mb,
     my_strnxfrmlen_simple,
     my_like_range_mb,
@@ -67271,6 +67273,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_nopad_ci_handler =
     NULL,                    /* init */
     my_strnncoll_ujis_japanese_ci,
     my_strnncollsp_ujis_japanese_nopad_ci,
+    my_strnncollsp_nchars_ujis_japanese_nopad_ci,
     my_strnxfrm_mb_nopad,
     my_strnxfrmlen_simple,
     my_like_range_mb,
@@ -67287,6 +67290,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_nopad_bin_handler =
     NULL,                    /* init */
     my_strnncoll_ujis_bin,
     my_strnncollsp_ujis_nopad_bin,
+    my_strnncollsp_nchars_ujis_nopad_bin,
     my_strnxfrm_mb_nopad,
     my_strnxfrmlen_simple,
     my_like_range_mb,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index e579d7b2bc6..7a87dbb7c05 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5357,6 +5357,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
     NULL,               /* init */
     my_strnncoll_utf8_general_ci,
     my_strnncollsp_utf8_general_ci,
+    my_strnncollsp_nchars_utf8_general_ci,
     my_strnxfrm_utf8_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
@@ -5373,6 +5374,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
     NULL,               /* init */
     my_strnncoll_utf8_general_mysql500_ci,
     my_strnncollsp_utf8_general_mysql500_ci,
+    my_strnncollsp_nchars_utf8_general_mysql500_ci,
     my_strnxfrm_utf8_general_mysql500_ci,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
@@ -5389,6 +5391,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
     NULL,		/* init */
     my_strnncoll_utf8_bin,
     my_strnncollsp_utf8_bin,
+    my_strnncollsp_nchars_utf8_bin,
     my_strnxfrm_utf8_bin,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
@@ -5405,6 +5408,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
   NULL,               /* init */
   my_strnncoll_utf8_general_ci,
   my_strnncollsp_utf8_general_nopad_ci,
+  my_strnncollsp_nchars_utf8_general_nopad_ci,
   my_strnxfrm_nopad_utf8_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
@@ -5421,6 +5425,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler =
   NULL,		/* init */
   my_strnncoll_utf8_bin,
   my_strnncollsp_utf8_nopad_bin,
+  my_strnncollsp_nchars_utf8_nopad_bin,
   my_strnxfrm_nopad_utf8_bin,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
@@ -5750,6 +5755,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler =
     NULL,		/* init */
     my_strnncoll_utf8_cs,
     my_strnncollsp_utf8_cs,
+    my_strnncollsp_nchars_generic,
     my_strnxfrm_utf8_general_ci,
     my_strnxfrmlen_unicode,
     my_like_range_simple,
@@ -7058,6 +7064,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
     NULL,               /* init */
     my_strnncoll_simple,
     my_strnncollsp_simple,
+    my_strnncollsp_nchars_generic,
     my_strnxfrm_filename,
     my_strnxfrmlen_unicode,
     my_like_range_mb,
@@ -7697,6 +7704,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
   NULL,               /* init */
   my_strnncoll_utf8mb4_general_ci,
   my_strnncollsp_utf8mb4_general_ci,
+  my_strnncollsp_nchars_utf8mb4_general_ci,
   my_strnxfrm_utf8mb4_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
@@ -7713,6 +7721,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
     NULL,              /* init */
     my_strnncoll_utf8mb4_bin,
     my_strnncollsp_utf8mb4_bin,
+    my_strnncollsp_nchars_utf8mb4_bin,
     my_strnxfrm_unicode_full_bin,
     my_strnxfrmlen_unicode_full_bin,
     my_like_range_mb,
@@ -7729,6 +7738,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler=
   NULL,               /* init */
   my_strnncoll_utf8mb4_general_ci,
   my_strnncollsp_utf8mb4_general_nopad_ci,
+  my_strnncollsp_nchars_utf8mb4_general_nopad_ci,
   my_strnxfrm_nopad_utf8mb4_general_ci,
   my_strnxfrmlen_unicode,
   my_like_range_mb,
@@ -7745,6 +7755,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_nopad_bin_handler =
   NULL,		/* init */
   my_strnncoll_utf8mb4_bin,
   my_strnncollsp_utf8mb4_nopad_bin,
+  my_strnncollsp_nchars_utf8mb4_nopad_bin,
   my_strnxfrm_unicode_full_nopad_bin,
   my_strnxfrmlen_unicode_full_bin,
   my_like_range_mb,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index f33a83294d6..15fa6299e4e 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -674,6 +674,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
   NULL,				/* init */
   my_strnncoll_win1250ch,
   my_strnncollsp_win1250ch,
+  my_strnncollsp_nchars_generic_8bit,
   my_strnxfrm_win1250ch,
   my_strnxfrmlen_simple,
   my_like_range_win1250ch,
diff --git a/strings/ctype.c b/strings/ctype.c
index 32c41e6e9e7..0cf1131ab57 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -1210,3 +1210,32 @@ outp:
   copy_status->m_source_end_pos= from;
   return to - to_start;
 }
+
+
+int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
+                                  const uchar *str1, size_t len1,
+                                  const uchar *str2, size_t len2,
+                                  size_t nchars)
+{
+  int error;
+  len1= my_well_formed_length(cs, (const char *) str1,
+                                  (const char *) str1 + len1,
+                                  nchars, &error);
+  len2= my_well_formed_length(cs, (const char *) str2,
+                                  (const char *) str2 + len2,
+                                  nchars, &error);
+  DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0);
+  return cs->coll->strnncollsp(cs, str1, len1, str2, len2);
+}
+
+
+int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
+                                       const uchar *str1, size_t len1,
+                                       const uchar *str2, size_t len2,
+                                       size_t nchars)
+{
+  set_if_smaller(len1, nchars);
+  set_if_smaller(len2, nchars);
+  DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0);
+  return cs->coll->strnncollsp(cs, str1, len1, str2, len2);
+}
diff --git a/strings/strcoll.ic b/strings/strcoll.ic
index 86789fc4189..392a5dac589 100644
--- a/strings/strcoll.ic
+++ b/strings/strcoll.ic
@@ -287,6 +287,56 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
 }
 #endif
 
+
+/**
+  Compare two strings according to the collation,
+  with trailing space padding or trimming, according to "nchars".
+
+  @param cs          - the character set and collation
+  @param a           - the left string
+  @param a_length    - the length of the left string
+  @param b           - the right string
+  @param b_length    - the length of the right string
+  @param nchars      - compare this amount of characters only
+  @return            - the comparison result
+*/
+static int
+MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs __attribute__((unused)),
+                                     const uchar *a, size_t a_length,
+                                     const uchar *b, size_t b_length,
+                                     size_t nchars)
+{
+  const uchar *a_end= a + a_length;
+  const uchar *b_end= b + b_length;
+  for ( ; nchars ; nchars--)
+  {
+    int a_weight, b_weight, res;
+    uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
+    uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
+
+    if ((res= (a_weight - b_weight)))
+    {
+      /* Got two different weights. See comments in strnncollsp above. */
+      return res;
+    }
+    if (!a_wlen && !b_wlen)
+    {
+      /* Got two auto-generated trailing spaces. */
+      DBUG_ASSERT(a == a_end);
+      DBUG_ASSERT(b == b_end);
+      return 0;
+    }
+    /*
+      At least one of the strings has not ended yet, continue comparison.
+    */
+    DBUG_ASSERT(a < a_end || b < b_end);
+    a+= a_wlen;
+    b+= b_wlen;
+  }
+  return 0;
+}
+
+
 #endif /* DEFINE_STRNNCOLL */
 
 
diff --git a/strings/strings_def.h b/strings/strings_def.h
index b3727321e19..8bf089ec695 100644
--- a/strings/strings_def.h
+++ b/strings/strings_def.h
@@ -105,6 +105,16 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
 }
 
 
+int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
+                                  const uchar *str1, size_t len1,
+                                  const uchar *str2, size_t len2,
+                                  size_t nchars);
+
+int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
+                                       const uchar *str1, size_t len1,
+                                       const uchar *str2, size_t len2,
+                                       size_t nchars);
+
 uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
 uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
 
diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c
index 00d49971595..97b9eb1a95e 100644
--- a/unittest/strings/strings-t.c
+++ b/unittest/strings/strings-t.c
@@ -19,6 +19,30 @@
 
 
 /*
+  U+00DF LATIN SMALL LETTER SHARP S = _utf8 x'C39F' = _latin1 x'DF'
+*/
+
+#define UTF8_sz   "\xC3\x9F"
+#define LATIN1_sz "\xDF"
+
+/*
+  U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE = _utf8 x'C385'
+*/
+
+#define UTF8_ARING "\xC3\x85"
+
+/*
+  U+00E4 LATIN SMALL LETTER A WITH DIAERESIS = _utf8 x'C3A4'
+*/
+#define UTF8_auml   "\xC3\xA4"
+#define LATIN1_auml "\xE4"
+
+#define UCS2_a  "\x00\x61"
+#define UCS2_b  "\x00\x62"
+#define UCS2_sp "\x00\x20"
+
+
+/*
   Test that like_range() returns well-formed results.
 */
 static int
@@ -758,11 +782,483 @@ test_strcollsp()
 }
 
 
-int main()
+typedef struct
+{
+  LEX_CSTRING a;
+  LEX_CSTRING b;
+  size_t nchars;
+  int res;
+} STRNNCOLLSP_CHAR_PARAM;
+
+
+/*
+  Some lines in the below test data are marked as follows:
+
+  IF  - An ignorable failure. The scanner finds an ignorable character
+        followed by a normal character (or by a contraction),
+        but the "nchars" limit allows only one character to be scanned.
+        The whole sequence is ignored an is treated as end-of-line.
+  CF - A contraction failure. The scanner finds a contraction consisting
+        of two characters, but the "nchars" limit allows only one character
+        to be scanned. The whole contraction is ignored and is treated
+        as end-of-line.
+*/
+
+
+/*
+  Tests for mbminlen1 character sets,
+  for both PAD SPACE and NOPAD collations
+*/
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_common[]=
+{
+  {{CSTR("a")},              {CSTR("a")},                       0,  0},
+  {{CSTR("a")},              {CSTR("a")},                       1,  0},
+  {{CSTR("a")},              {CSTR("a")},                       2,  0},
+  {{CSTR("a")},              {CSTR("a")},                       3,  0},
+  {{CSTR("a")},              {CSTR("a")},                     100,  0},
+
+  {{CSTR("a")},              {CSTR("ab")},                      0,  0},
+  {{CSTR("a")},              {CSTR("ab")},                      1,  0},
+  {{CSTR("a")},              {CSTR("ab")},                      2, -1},
+  {{CSTR("a")},              {CSTR("ab")},                      3, -1},
+  {{CSTR("a")},              {CSTR("ab")},                    100, -1},
+
+  {{CSTR("a")},              {CSTR("a ")},                      0,  0},
+  {{CSTR("a")},              {CSTR("a ")},                      1,  0},
+  {{CSTR("a")},              {CSTR("a ")},                      2,  0},
+  {{CSTR("a")},              {CSTR("a ")},                      3,  0},
+  {{CSTR("a")},              {CSTR("a ")},                    100,  0},
+
+  {{CSTR("a")},              {CSTR("a  ")},                     0,  0},
+  {{CSTR("a")},              {CSTR("a  ")},                     1,  0},
+  {{CSTR("a")},              {CSTR("a  ")},                     2,  0},
+  {{CSTR("a")},              {CSTR("a  ")},                     3,  0},
+  {{CSTR("a")},              {CSTR("a  ")},                   100,  0},
+
+  {{CSTR("ss")},             {CSTR("ss")},                      0,  0},
+  {{CSTR("ss")},             {CSTR("ss")},                      1,  0},
+  {{CSTR("ss")},             {CSTR("ss")},                      2,  0},
+  {{CSTR("ss")},             {CSTR("ss")},                      3,  0},
+  {{CSTR("ss")},             {CSTR("ss")},                    100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+/* Tests for utf8, for both PAD SPACE and NOPAD collations */
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_common[]=
+{
+  {{CSTR(UTF8_sz)},          {CSTR(UTF8_sz)},                   0,  0},
+  {{CSTR(UTF8_sz)},          {CSTR(UTF8_sz)},                   1,  0},
+  {{CSTR(UTF8_sz)},          {CSTR(UTF8_sz)},                   2,  0},
+  {{CSTR(UTF8_sz)},          {CSTR(UTF8_sz)},                   3,  0},
+  {{CSTR(UTF8_sz)},          {CSTR(UTF8_sz)},                 100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+/* Tests for latin1, for both PAD and NOPAD collations */
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_xpad_common[]=
+{
+  {{CSTR(LATIN1_sz)},        {CSTR(LATIN1_sz)},                 0,  0},
+  {{CSTR(LATIN1_sz)},        {CSTR(LATIN1_sz)},                 1,  0},
+  {{CSTR(LATIN1_sz)},        {CSTR(LATIN1_sz)},                 2,  0},
+  {{CSTR(LATIN1_sz)},        {CSTR(LATIN1_sz)},                 3,  0},
+  {{CSTR(LATIN1_sz)},        {CSTR(LATIN1_sz)},               100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+/* Tests for utf8 collations that sort "A WITH DIAERESIS" equal to "A" */
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_a_eq_auml[]=
+{
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah")},                      0,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah")},                      1,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah")},                      2,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah")},                      3,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah")},                    100,  0},
+
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah ")},                     0,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah ")},                     1,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah ")},                     2,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah ")},                     3,  0},
+  {{CSTR(UTF8_auml "h")},    {CSTR("ah ")},                   100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]=
+{
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            0,  0},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            1,  0},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")}/*IF*/,      2,  1},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            3,  0},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            4,  0},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},          100,  0},
+
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   0,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   1, -1},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   2,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   3,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   4,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                 100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]=
+{
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            0,  0},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            1,  0},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")}/*IF*/,      2,  1},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            3,  1},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},            4,  1},
+  {{CSTR("ss")},             {CSTR("s" "\x00" "s")},          100,  1},
+
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   0,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   1, -1},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   2, -1},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   3, -1},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   4, -1},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                 100, -1},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_danish_ci[]=
+{
+  {{CSTR("aa")},             {CSTR("")},                        0,  0},
+  {{CSTR("aa")}/*CF*/,       {CSTR("")},                        1,  1},
+  {{CSTR("aa")},             {CSTR("")},                        2,  1},
+  {{CSTR("aa")},             {CSTR("")},                        3,  1},
+  {{CSTR("aa")},             {CSTR("")},                      100,  1},
+
+  {{CSTR("aa")},             {CSTR("a")},                       0,  0},
+  {{CSTR("aa")}/*CF*/,       {CSTR("a")},                       1,  0},
+  {{CSTR("aa")},             {CSTR("a")},                       2,  1},
+  {{CSTR("aa")},             {CSTR("a")},                       3,  1},
+  {{CSTR("aa")},             {CSTR("a")},                     100,  1},
+
+  {{CSTR("aa")},             {CSTR("aa")},                      0,  0},
+  {{CSTR("aa")}/*CF*/,       {CSTR("aa")}/*CF*/,                1,  0},
+  {{CSTR("aa")},             {CSTR("aa")},                      2,  0},
+  {{CSTR("aa")},             {CSTR("aa")},                      3,  0},
+  {{CSTR("aa")},             {CSTR("aa")},                    100,  0},
+
+  {{CSTR("aa")},             {CSTR("\x00" "a")},                0,  0},
+  {{CSTR("aa")}/*CF*/,       {CSTR("\x00" "a")}/*IF*/,          1,  1},
+  {{CSTR("aa")},             {CSTR("\x00" "a")},                2,  1},
+  {{CSTR("aa")},             {CSTR("\x00" "a")},                3,  1},
+  {{CSTR("aa")},             {CSTR("\x00" "a")},              100,  1},
+
+  {{CSTR("aa")},             {CSTR("\x00" "aa")},                0,  0},
+  {{CSTR("aa")}/*CF*/,       {CSTR("\x00" "aa")}/*IF*/,          1,  1},
+  {{CSTR("aa")},             {CSTR("\x00" "aa")}/*IF*/,          2,  1},
+  {{CSTR("aa")},             {CSTR("\x00" "aa")},                3,  0},
+  {{CSTR("aa")},             {CSTR("\x00" "aa")},              100,  0},
+
+  {{CSTR("aa")},             {CSTR("a" "\x00" "a")},            0,  0},
+  {{CSTR("aa")}/*CF*/,       {CSTR("a" "\x00" "a")},            1,  0},
+  {{CSTR("aa")},             {CSTR("a" "\x00" "a")}/*IF*/,      2,  1},
+  {{CSTR("aa")},             {CSTR("a" "\x00" "a")},            3,  1},
+  {{CSTR("aa")},             {CSTR("a" "\x00" "a")},          100,  1},
+
+  {{CSTR("aa")},             {CSTR(UTF8_ARING)},                0,  0},
+  {{CSTR("aa")}/*CF*/,       {CSTR(UTF8_ARING)},                1, -1},
+  {{CSTR("aa")},             {CSTR(UTF8_ARING)},                2,  0},
+  {{CSTR("aa")},             {CSTR(UTF8_ARING)},                3,  0},
+  {{CSTR("aa")},             {CSTR(UTF8_ARING)},              100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_german2_ci[]=
+{
+  {{CSTR("ss")},             {CSTR(LATIN1_sz)},                 0,  0},
+  {{CSTR("ss")},             {CSTR(LATIN1_sz)},                 1, -1},
+  {{CSTR("ss")},             {CSTR(LATIN1_sz)},                 2,  0},
+  {{CSTR("ss")},             {CSTR(LATIN1_sz)},                 3,  0},
+  {{CSTR("ss")},             {CSTR(LATIN1_sz)},               100,  0},
+
+  {{CSTR("ae")},             {CSTR(LATIN1_auml)},               0,  0},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml)},               1, -1},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml)},               2,  0},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml)},               3,  0},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml)},             100,  0},
+
+  {{CSTR("ae")},             {CSTR(LATIN1_auml " ")},           0,  0},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml " ")},           1, -1},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml " ")},           2,  0},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml " ")},           3,  0},
+  {{CSTR("ae")},             {CSTR(LATIN1_auml " ")},         100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_german2_ci[]=
+{
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   0,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   1, -1},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   2,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                   3,  0},
+  {{CSTR("ss")},             {CSTR(UTF8_sz)},                 100,  0},
+
+  {{CSTR("ae")},             {CSTR(UTF8_auml)},                 0,  0},
+  {{CSTR("ae")},             {CSTR(UTF8_auml)},                 1, -1},
+  {{CSTR("ae")},             {CSTR(UTF8_auml)},                 2,  0},
+  {{CSTR("ae")},             {CSTR(UTF8_auml)},                 3,  0},
+  {{CSTR("ae")},             {CSTR(UTF8_auml)},               100,  0},
+
+  {{CSTR("ae")},             {CSTR(UTF8_auml " ")},             0,  0},
+  {{CSTR("ae")},             {CSTR(UTF8_auml " ")},             1, -1},
+  {{CSTR("ae")},             {CSTR(UTF8_auml " ")},             2,  0},
+  {{CSTR("ae")},             {CSTR(UTF8_auml " ")},             3,  0},
+  {{CSTR("ae")},             {CSTR(UTF8_auml " ")},           100,  0},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_czech[]=
+{
+  {{CSTR("c")},              {CSTR("ch")},                      0,  0},
+  {{CSTR("c")},              {CSTR("ch")},                      1,  0},
+  {{CSTR("c")},              {CSTR("ch")},                      2, -1},
+
+  {{CSTR("h")},              {CSTR("ch")},                      0,  0},
+  {{CSTR("h")},              {CSTR("ch")},                      1,  1},
+  {{CSTR("h")},              {CSTR("ch")},                      2, -1},
+
+  {{CSTR("i")},              {CSTR("ch")},                      0,  0},
+  {{CSTR("i")},              {CSTR("ch")},                      1,  1},
+  {{CSTR("i")},              {CSTR("ch")},                      2,  1},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen2_xpad_common[]=
+{
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a)},                    0,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a)},                    1,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a)},                    2,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a)},                    3,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a)},                  100,  0},
+
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp)},            0,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp)},            1,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp)},            2,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp)},            3,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp)},          100,  0},
+
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp UCS2_sp)},    0,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp UCS2_sp)},    1,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp UCS2_sp)},    2,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp UCS2_sp)},    3,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_sp UCS2_sp)},  100,  0},
+
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_b)},             0,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_b)},             1,  0},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_b)},             2, -1},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_b)},             3, -1},
+  {{CSTR(UCS2_a)},           {CSTR(UCS2_a UCS2_b)},           100, -1},
+
+  {{NULL, 0},                {NULL, 0},                         0,  0}
+};
+
+
+static int
+strnncollsp_char_one(CHARSET_INFO *cs, const STRNNCOLLSP_CHAR_PARAM *p)
+{
+  int failed= 0;
+  char ahex[64], bhex[64];
+  int res= cs->coll->strnncollsp_nchars(cs,
+                                        (uchar *) p->a.str, p->a.length,
+                                        (uchar *) p->b.str, p->b.length,
+                                        p->nchars);
+  str2hex(ahex, sizeof(ahex), p->a.str, p->a.length);
+  str2hex(bhex, sizeof(bhex), p->b.str, p->b.length);
+  diag("%-25s %-12s %-12s %3d %7d %7d%s",
+       cs->name, ahex, bhex, (int) p->nchars, p->res, res,
+       eqres(res, p->res) ? "" : " FAILED");
+  if (!eqres(res, p->res))
+  {
+    failed++;
+  }
+  else
+  {
+    /* Test in reverse order */
+    res= cs->coll->strnncollsp_nchars(cs,
+                                      (uchar *) p->b.str, p->b.length,
+                                      (uchar *) p->a.str, p->a.length,
+                                      p->nchars);
+    if (!eqres(res, -p->res))
+    {
+      diag("Comparison in reverse order failed. Expected %d, got %d",
+           -p->res, res);
+      failed++;
+    }
+  }
+  return failed;
+}
+
+
+static int
+strnncollsp_char(const char *collation, const STRNNCOLLSP_CHAR_PARAM *param)
+{
+  int failed= 0;
+  const STRNNCOLLSP_CHAR_PARAM *p;
+  CHARSET_INFO *cs= get_charset_by_name(collation, MYF(0));
+
+  if (!cs)
+  {
+    diag("get_charset_by_name() failed");
+    return 1;
+  }
+
+  diag("%-25s %-12s %-12s %-3s %7s %7s",
+       "Collation", "a", "b", "Nch", "ExpSign", "Actual");
+
+  for (p= param; p->a.str; p++)
+  {
+    failed+= strnncollsp_char_one(cs, p);
+  }
+
+  return failed;
+}
+
+
+static int
+strnncollsp_char_mbminlen1(const char *collation,
+                           const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+  int failed= 0;
+  failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
+  if (specific)
+    failed+= strnncollsp_char(collation, specific);
+  return failed;
+}
+
+
+static int
+strnncollsp_char_mbminlen2(const char *collation,
+                           const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+  int failed= 0;
+  failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen2_xpad_common);
+  if (specific)
+    failed+= strnncollsp_char(collation, specific);
+  return failed;
+}
+
+
+static int
+strnncollsp_char_latin1(const char *collation,
+                        const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+  int failed= 0;
+  failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
+  failed+= strnncollsp_char(collation, strnncollsp_char_latin1_xpad_common);
+  if (specific)
+    failed+= strnncollsp_char(collation, specific);
+  return failed;
+}
+
+
+static int
+strnncollsp_char_utf8mbx(const char *collation,
+                         const STRNNCOLLSP_CHAR_PARAM *specific)
+{
+  int failed= 0;
+  failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
+  failed+= strnncollsp_char(collation, strnncollsp_char_utf8mbx_xpad_common);
+
+  if (!strstr(collation, "_bin") &&
+      !strstr(collation, "_german2") &&
+      !strstr(collation, "_danish"))
+    failed+= strnncollsp_char(collation,
+                              strnncollsp_char_utf8mbx_xpad_a_eq_auml);
+  if (specific)
+    failed+= strnncollsp_char(collation, specific);
+  return failed;
+}
+
+
+static int
+test_strnncollsp_char()
+{
+  int failed= 0;
+  failed+= strnncollsp_char_latin1("latin1_swedish_ci", NULL);
+  failed+= strnncollsp_char_latin1("latin1_swedish_nopad_ci", NULL);
+  failed+= strnncollsp_char_latin1("latin1_bin", NULL);
+  failed+= strnncollsp_char_latin1("latin1_nopad_bin", NULL);
+  failed+= strnncollsp_char_latin1("latin1_german2_ci",
+                                   strnncollsp_char_latin1_german2_ci);
+
+#ifdef HAVE_CHARSET_cp1250
+  failed+= strnncollsp_char_mbminlen1("cp1250_czech_cs",
+                                      strnncollsp_char_mbminlen1_xpad_czech);
+#endif
+
+#ifdef HAVE_CHARSET_latin2
+  failed+= strnncollsp_char_mbminlen1("latin2_czech_cs",
+                                      strnncollsp_char_mbminlen1_xpad_czech);
+#endif
+
+#ifdef HAVE_CHARSET_tis620
+  failed+= strnncollsp_char_mbminlen1("tis620_thai_ci", NULL);
+#endif
+
+#ifdef HAVE_CHARSET_big5
+  failed+= strnncollsp_char_mbminlen1("big5_chinese_ci", NULL);
+  failed+= strnncollsp_char_mbminlen1("big5_chinese_nopad_ci", NULL);
+  failed+= strnncollsp_char_mbminlen1("big5_bin", NULL);
+  failed+= strnncollsp_char_mbminlen1("big5_nopad_bin", NULL);
+#endif
+
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_general_ci", NULL);
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_general_nopad_ci", NULL);
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_bin", NULL);
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_nopad_bin", NULL);
+
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_ci",
+                                    strnncollsp_char_utf8mb3_unicode_ci);
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_nopad_ci",
+                                    strnncollsp_char_utf8mb3_unicode_nopad_ci);
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_danish_ci",
+                                    strnncollsp_char_utf8mb3_danish_ci);
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_german2_ci",
+                                    strnncollsp_char_utf8mbx_german2_ci);
+  failed+= strnncollsp_char_utf8mbx("utf8mb3_czech_ci",
+                                    strnncollsp_char_mbminlen1_xpad_czech);
+
+#ifdef HAVE_CHARSET_ucs2
+  failed+= strnncollsp_char_mbminlen2("ucs2_general_ci", NULL);
+  failed+= strnncollsp_char_mbminlen2("ucs2_general_nopad_ci", NULL);
+  failed+= strnncollsp_char_mbminlen2("ucs2_bin", NULL);
+  failed+= strnncollsp_char_mbminlen2("ucs2_nopad_bin", NULL);
+  failed+= strnncollsp_char_mbminlen2("ucs2_unicode_ci", NULL);
+  failed+= strnncollsp_char_mbminlen2("ucs2_unicode_nopad_ci", NULL);
+#endif
+
+  return failed;
+}
+
+
+int main(int ac, char **av)
 {
   size_t i, failed= 0;
-  
-  plan(2);
+
+  MY_INIT(av[0]);
+
+  plan(3);
   diag("Testing my_like_range_xxx() functions");
   
   for (i= 0; i < array_elements(charset_list); i++)
@@ -780,5 +1276,11 @@ int main()
   failed= test_strcollsp();
   ok(failed == 0, "Testing cs->coll->strnncollsp()");
 
+  diag("Testing cs->coll->strnncollsp_char()");
+  failed= test_strnncollsp_char();
+  ok(failed == 0, "Testing cs->coll->strnncollsp_char()");
+
+  my_end(0);
+
   return exit_status();
 }
author	Alexander Barkov <bar@mariadb.com>	2021-09-29 15:13:57 +0400
committer	Alexander Barkov <bar@mariadb.com>	2022-01-21 12:16:07 +0400
commit	b915f79e4e004fde4f6ac8f341afee980e11792b (patch)
tree	2568032d75c7af9a72c6669b306fda4418b5ed20
parent	db574173d19731f1e5dc75d325f72398afac8d59 (diff)
download	mariadb-git-bb-10.4-bar-MDEV-25904.tar.gz