diff options
author | Alexander Barkov <bar@mariadb.com> | 2020-06-10 08:42:31 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.com> | 2020-06-10 08:42:31 +0400 |
commit | 9b9a354da973de92fc783dabba957b5c00f63547 (patch) | |
tree | ed4a9cf5503db0ab081e8a0a024492af29dd674b | |
parent | 902742789ef284a37cbf19b524213eae0f7fc2dc (diff) | |
download | mariadb-git-9b9a354da973de92fc783dabba957b5c00f63547.tar.gz |
MDEV-22849 Reuse skip_trailing_space() in my_hash_sort_utf8mbX
Replacing the slow loop in my_hash_sort_utf8mbX() to the fast
skip_trailing_spaces(), which consumes 8 bytes in one iteration,
and is around 8 times faster on long data.
Also, renaming:
- my_hash_sort_utf8() to my_hash_sort_utf8mb3()
- my_hash_sort_utf8_nopad() to my_hash_sort_utf8mb3_nopad()
to merge to 10.5 easier (automatically?).
-rw-r--r-- | strings/ctype-utf8.c | 26 |
1 files changed, 11 insertions, 15 deletions
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 44dc3d2e02d..8f3b1224404 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -5159,8 +5159,8 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, const char *src, size_t srclen, } -static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *nr1, ulong *nr2) +static void my_hash_sort_utf8mb3_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) { my_wc_t wc; int res; @@ -5179,17 +5179,15 @@ static void my_hash_sort_utf8_nopad(CHARSET_INFO *cs, const uchar *s, size_t sle } -static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, - ulong *nr1, ulong *nr2) +static void my_hash_sort_utf8mb3(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) { - const uchar *e= s+slen; /* Remove end space. We have to do this to be able to compare 'A ' and 'A' as identical */ - while (e > s && e[-1] == ' ') - e--; - my_hash_sort_utf8_nopad(cs, s, e - s, nr1, nr2); + const uchar *e= skip_trailing_space(s, slen); + my_hash_sort_utf8mb3_nopad(cs, s, e - s, nr1, nr2); } @@ -5540,7 +5538,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler = my_wildcmp_utf8, my_strcasecmp_utf8, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; @@ -5556,7 +5554,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler = my_wildcmp_utf8, my_strcasecmp_utf8, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; @@ -5588,7 +5586,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler = my_wildcmp_utf8, my_strcasecmp_utf8, my_instr_mb, - my_hash_sort_utf8_nopad, + my_hash_sort_utf8mb3_nopad, my_propagate_complex }; @@ -7224,7 +7222,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler = my_wildcmp_utf8, my_strcasecmp_utf8, my_instr_mb, - my_hash_sort_utf8, + my_hash_sort_utf8mb3, my_propagate_complex }; @@ -7625,13 +7623,11 @@ static void my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen, ulong *nr1, ulong *nr2) { - const uchar *e= s + slen; /* Remove end space. We do this to be able to compare 'A ' and 'A' as identical */ - while (e > s && e[-1] == ' ') - e--; + const uchar *e= skip_trailing_space(s, slen); my_hash_sort_utf8mb4_nopad(cs, s, e - s, nr1, nr2); } |