From 5691d6ca6a44147f10c9c5e55b87660d287c748d Mon Sep 17 00:00:00 2001 From: Yann Ylavic Date: Sat, 15 Jan 2022 23:22:33 +0000 Subject: apr_cstr: Improve apr_cstr_casecmp() and apr_cstr_casecmpn() performances. The new versions [1] compile to a shorter/faster assembly than the previous ones [2], no functionnal change. [1] apr_cstr_casecmp() after this commit: Dump of assembler code for function apr_cstr_casecmp: 0x0000000000049fc0 <+0>: movzbl (%rdi),%eax 0x0000000000049fc3 <+3>: movzbl (%rsi),%edx 0x0000000000049fc6 <+6>: lea 0x3d573(%rip),%r8 # 0x87540 0x0000000000049fcd <+13>: movzbl (%r8,%rax,1),%eax 0x0000000000049fd2 <+18>: movzbl (%r8,%rdx,1),%ecx 0x0000000000049fd7 <+23>: cmp %ecx,%eax 0x0000000000049fd9 <+25>: jne 0x49ffe 0x0000000000049fdb <+27>: xor %edx,%edx 0x0000000000049fdd <+29>: jmp 0x49ffa 0x0000000000049fdf <+31>: nop 0x0000000000049fe0 <+32>: add $0x1,%rdx 0x0000000000049fe4 <+36>: movzbl (%rdi,%rdx,1),%eax 0x0000000000049fe8 <+40>: movzbl (%rsi,%rdx,1),%ecx 0x0000000000049fec <+44>: movzbl (%r8,%rax,1),%eax 0x0000000000049ff1 <+49>: movzbl (%r8,%rcx,1),%ecx 0x0000000000049ff6 <+54>: cmp %ecx,%eax 0x0000000000049ff8 <+56>: jne 0x49ffe 0x0000000000049ffa <+58>: test %eax,%eax 0x0000000000049ffc <+60>: jne 0x49fe0 0x0000000000049ffe <+62>: sub %ecx,%eax 0x000000000004a000 <+64>: ret End of assembler dump. [2] apr_cstr_casecmp() before this commit: Dump of assembler code for function apr_cstr_casecmp: 0x000000000004a000 <+0>: movzbl (%rdi),%eax 0x000000000004a003 <+3>: movzbl (%rsi),%edx 0x000000000004a006 <+6>: lea 0x3d533(%rip),%r8 # 0x87540 0x000000000004a00d <+13>: mov %rdi,%r9 0x000000000004a010 <+16>: mov %rax,%rcx 0x000000000004a013 <+19>: movswl (%r8,%rdx,2),%edx 0x000000000004a018 <+24>: movswl (%r8,%rax,2),%eax 0x000000000004a01d <+29>: sub %edx,%eax 0x000000000004a01f <+31>: jne 0x4a052 0x000000000004a021 <+33>: mov $0x1,%edx 0x000000000004a026 <+38>: test %ecx,%ecx 0x000000000004a028 <+40>: je 0x4a052 0x000000000004a02a <+42>: nopw 0x0(%rax,%rax,1) 0x000000000004a030 <+48>: movzbl (%r9,%rdx,1),%eax 0x000000000004a035 <+53>: movzbl (%rsi,%rdx,1),%ecx 0x000000000004a039 <+57>: add $0x1,%rdx 0x000000000004a03d <+61>: mov %rax,%rdi 0x000000000004a040 <+64>: movswl (%r8,%rcx,2),%ecx 0x000000000004a045 <+69>: movswl (%r8,%rax,2),%eax 0x000000000004a04a <+74>: sub %ecx,%eax 0x000000000004a04c <+76>: jne 0x4a052 0x000000000004a04e <+78>: test %edi,%edi 0x000000000004a050 <+80>: jne 0x4a030 0x000000000004a052 <+82>: ret End of assembler dump. Merge r1897102 from trunk. Submitted by: ylavic git-svn-id: https://svn.apache.org/repos/asf/apr/apr/branches/1.8.x@1897103 13f79535-47bb-0310-9956-ffa450edef68 --- strings/apr_cstr.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) (limited to 'strings/apr_cstr.c') diff --git a/strings/apr_cstr.c b/strings/apr_cstr.c index 27229a067..c211c46dd 100644 --- a/strings/apr_cstr.c +++ b/strings/apr_cstr.c @@ -197,7 +197,7 @@ APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings, * octets (such as extended latin alphabetics) are never case-folded. * NOTE: Other than Alpha A-Z/a-z, each code point is unique! */ -static const short ucharmap[] = { +static const unsigned char ucharmap[256] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, @@ -244,7 +244,7 @@ static const short ucharmap[] = { * * NOTE: Other than Alpha A-Z/a-z, each code point is unique! */ -static const short ucharmap[] = { +static const unsigned char ucharmap[256] = { 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, @@ -282,35 +282,25 @@ static const short ucharmap[] = { APR_DECLARE(int) apr_cstr_casecmp(const char *s1, const char *s2) { - const unsigned char *str1 = (const unsigned char *)s1; - const unsigned char *str2 = (const unsigned char *)s2; - for (;;) - { - const int c1 = (int)(*str1); - const int c2 = (int)(*str2); - const int cmp = ucharmap[c1] - ucharmap[c2]; - /* Not necessary to test for !c2, this is caught by cmp */ - if (cmp || !c1) - return cmp; - str1++; - str2++; + apr_size_t i = 0; + for (;; ++i) { + const int c1 = ucharmap[(unsigned char)s1[i]]; + const int c2 = ucharmap[(unsigned char)s2[i]]; + /* Not necessary to test for !c2, this is caught by c1 != c2 */ + if (c1 != c2 || !c1) + return c1 - c2; } } APR_DECLARE(int) apr_cstr_casecmpn(const char *s1, const char *s2, apr_size_t n) { - const unsigned char *str1 = (const unsigned char *)s1; - const unsigned char *str2 = (const unsigned char *)s2; - while (n--) - { - const int c1 = (int)(*str1); - const int c2 = (int)(*str2); - const int cmp = ucharmap[c1] - ucharmap[c2]; - /* Not necessary to test for !c2, this is caught by cmp */ - if (cmp || !c1) - return cmp; - str1++; - str2++; + apr_size_t i = 0; + for (; i < n; ++i) { + const int c1 = ucharmap[(unsigned char)s1[i]]; + const int c2 = ucharmap[(unsigned char)s2[i]]; + /* Not necessary to test for !c2, this is caught by c1 != c2 */ + if (c1 != c2 || !c1) + return c1 - c2; } return 0; } -- cgit v1.2.1