summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2021-09-29 15:13:57 +0400
committerAlexander Barkov <bar@mariadb.com>2022-01-21 12:16:07 +0400
commitb915f79e4e004fde4f6ac8f341afee980e11792b (patch)
tree2568032d75c7af9a72c6669b306fda4418b5ed20 /include
parentdb574173d19731f1e5dc75d325f72398afac8d59 (diff)
downloadmariadb-git-b915f79e4e004fde4f6ac8f341afee980e11792b.tar.gz
MDEV-25904 New collation functions to compare InnoDB style trimmed NO PAD stringsbb-10.4-bar-MDEV-25904
Diffstat (limited to 'include')
-rw-r--r--include/m_ctype.h54
1 files changed, 54 insertions, 0 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 0f6e6a11666..187c8710929 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -330,6 +330,60 @@ struct my_collation_handler_st
const uchar *, size_t, const uchar *, size_t, my_bool);
int (*strnncollsp)(CHARSET_INFO *,
const uchar *, size_t, const uchar *, size_t);
+ /*
+ strnncollsp_nchars() - similar to strnncollsp() but assumes that both
+ strings were originally CHAR(N) values with the
+ same N, then were optionally space-padded,
+ or optionally space-trimmed.
+
+ In other words, this function compares in the way
+ if we insert both values into a CHAR(N) column
+ and then compare the two column values.
+
+ It compares the same amount of characters from the two strings.
+ This is especially important for NOPAD collations.
+
+ If CHAR_LENGTH of the two strings are different,
+ the shorter string is virtually padded with trailing spaces
+ up to CHAR_LENGTH of the longer string, to guarantee that the
+ same amount of characters are compared.
+ This is important if the two CHAR(N) strings are space-trimmed
+ (e.g. like in InnoDB compact format for CHAR).
+
+ The function compares not more than "nchars" characters only.
+ This can be useful to compare CHAR(N) space-padded strings
+ (when the exact N is known) without having to truncate them before
+ the comparison.
+
+ For example, Field_string stores a "CHAR(3) CHARACTER SET utf8mb4" value
+ of "aaa" as 12 bytes in a record buffer:
+ - 3 bytes of the actual data, followed by
+ - 9 bytes of spaces (just fillers, not real data)
+ The caller can pass nchars=3 to compare CHAR(3) record values.
+ In such case, the comparator won't go inside the 9 bytes of the fillers.
+
+ If N is not known, the caller can pass max(len1,len2) as the "nchars" value
+ (i.e. the maximum of the OCTET_LENGTH of the two strings).
+
+ Notes on complex collations.
+
+ This function counts contraction parts as individual characters.
+ For example, the Czech letter 'ch' (in Czech collations)
+ is ordinarily counted by the "nchars" limit as TWO characters
+ (although it is only one letter).
+ This corresponds to what CHAR(N) does in INSERT.
+
+ If the "nchars" limit tears apart a contraction, only the part fitting
+ into "nchars" characters is used. For example, in case of a Czech collation,
+ the string "ach" with nchars=2 is compared as 'ac': the contraction
+ 'ch' is torn apart and the letter 'c' acts as an individual character.
+ This emulates the same comparison result with the scenario when we insert
+ 'ach' into a CHAR(2) column and then compare it.
+ */
+ int (*strnncollsp_nchars)(CHARSET_INFO *,
+ const uchar *str1, size_t len1,
+ const uchar *str2, size_t len2,
+ size_t nchars);
size_t (*strnxfrm)(CHARSET_INFO *,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags);