diff options
author | Yann Ylavic <ylavic@apache.org> | 2022-01-15 23:22:33 +0000 |
---|---|---|
committer | Yann Ylavic <ylavic@apache.org> | 2022-01-15 23:22:33 +0000 |
commit | 5691d6ca6a44147f10c9c5e55b87660d287c748d (patch) | |
tree | 397a7a0aab805dd71c2684c71a74c994add3af71 /strings/apr_cstr.c | |
parent | c8c38c8c529161e6dcb176eb9b392dcb7979c139 (diff) | |
download | apr-5691d6ca6a44147f10c9c5e55b87660d287c748d.tar.gz |
apr_cstr: Improve apr_cstr_casecmp() and apr_cstr_casecmpn() performances.
The new versions [1] compile to a shorter/faster assembly than the previous
ones [2], no functionnal change.
[1] apr_cstr_casecmp() after this commit:
Dump of assembler code for function apr_cstr_casecmp:
0x0000000000049fc0 <+0>: movzbl (%rdi),%eax
0x0000000000049fc3 <+3>: movzbl (%rsi),%edx
0x0000000000049fc6 <+6>: lea 0x3d573(%rip),%r8 # 0x87540 <ucharmap>
0x0000000000049fcd <+13>: movzbl (%r8,%rax,1),%eax
0x0000000000049fd2 <+18>: movzbl (%r8,%rdx,1),%ecx
0x0000000000049fd7 <+23>: cmp %ecx,%eax
0x0000000000049fd9 <+25>: jne 0x49ffe <apr_cstr_casecmp+62>
0x0000000000049fdb <+27>: xor %edx,%edx
0x0000000000049fdd <+29>: jmp 0x49ffa <apr_cstr_casecmp+58>
0x0000000000049fdf <+31>: nop
0x0000000000049fe0 <+32>: add $0x1,%rdx
0x0000000000049fe4 <+36>: movzbl (%rdi,%rdx,1),%eax
0x0000000000049fe8 <+40>: movzbl (%rsi,%rdx,1),%ecx
0x0000000000049fec <+44>: movzbl (%r8,%rax,1),%eax
0x0000000000049ff1 <+49>: movzbl (%r8,%rcx,1),%ecx
0x0000000000049ff6 <+54>: cmp %ecx,%eax
0x0000000000049ff8 <+56>: jne 0x49ffe <apr_cstr_casecmp+62>
0x0000000000049ffa <+58>: test %eax,%eax
0x0000000000049ffc <+60>: jne 0x49fe0 <apr_cstr_casecmp+32>
0x0000000000049ffe <+62>: sub %ecx,%eax
0x000000000004a000 <+64>: ret
End of assembler dump.
[2] apr_cstr_casecmp() before this commit:
Dump of assembler code for function apr_cstr_casecmp:
0x000000000004a000 <+0>: movzbl (%rdi),%eax
0x000000000004a003 <+3>: movzbl (%rsi),%edx
0x000000000004a006 <+6>: lea 0x3d533(%rip),%r8 # 0x87540 <ucharmap>
0x000000000004a00d <+13>: mov %rdi,%r9
0x000000000004a010 <+16>: mov %rax,%rcx
0x000000000004a013 <+19>: movswl (%r8,%rdx,2),%edx
0x000000000004a018 <+24>: movswl (%r8,%rax,2),%eax
0x000000000004a01d <+29>: sub %edx,%eax
0x000000000004a01f <+31>: jne 0x4a052 <apr_cstr_casecmp+82>
0x000000000004a021 <+33>: mov $0x1,%edx
0x000000000004a026 <+38>: test %ecx,%ecx
0x000000000004a028 <+40>: je 0x4a052 <apr_cstr_casecmp+82>
0x000000000004a02a <+42>: nopw 0x0(%rax,%rax,1)
0x000000000004a030 <+48>: movzbl (%r9,%rdx,1),%eax
0x000000000004a035 <+53>: movzbl (%rsi,%rdx,1),%ecx
0x000000000004a039 <+57>: add $0x1,%rdx
0x000000000004a03d <+61>: mov %rax,%rdi
0x000000000004a040 <+64>: movswl (%r8,%rcx,2),%ecx
0x000000000004a045 <+69>: movswl (%r8,%rax,2),%eax
0x000000000004a04a <+74>: sub %ecx,%eax
0x000000000004a04c <+76>: jne 0x4a052 <apr_cstr_casecmp+82>
0x000000000004a04e <+78>: test %edi,%edi
0x000000000004a050 <+80>: jne 0x4a030 <apr_cstr_casecmp+48>
0x000000000004a052 <+82>: ret
End of assembler dump.
Merge r1897102 from trunk.
Submitted by: ylavic
git-svn-id: https://svn.apache.org/repos/asf/apr/apr/branches/1.8.x@1897103 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'strings/apr_cstr.c')
-rw-r--r-- | strings/apr_cstr.c | 42 |
1 files changed, 16 insertions, 26 deletions
diff --git a/strings/apr_cstr.c b/strings/apr_cstr.c index 27229a067..c211c46dd 100644 --- a/strings/apr_cstr.c +++ b/strings/apr_cstr.c @@ -197,7 +197,7 @@ APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings, * octets (such as extended latin alphabetics) are never case-folded. * NOTE: Other than Alpha A-Z/a-z, each code point is unique! */ -static const short ucharmap[] = { +static const unsigned char ucharmap[256] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, @@ -244,7 +244,7 @@ static const short ucharmap[] = { * * NOTE: Other than Alpha A-Z/a-z, each code point is unique! */ -static const short ucharmap[] = { +static const unsigned char ucharmap[256] = { 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, @@ -282,35 +282,25 @@ static const short ucharmap[] = { APR_DECLARE(int) apr_cstr_casecmp(const char *s1, const char *s2) { - const unsigned char *str1 = (const unsigned char *)s1; - const unsigned char *str2 = (const unsigned char *)s2; - for (;;) - { - const int c1 = (int)(*str1); - const int c2 = (int)(*str2); - const int cmp = ucharmap[c1] - ucharmap[c2]; - /* Not necessary to test for !c2, this is caught by cmp */ - if (cmp || !c1) - return cmp; - str1++; - str2++; + apr_size_t i = 0; + for (;; ++i) { + const int c1 = ucharmap[(unsigned char)s1[i]]; + const int c2 = ucharmap[(unsigned char)s2[i]]; + /* Not necessary to test for !c2, this is caught by c1 != c2 */ + if (c1 != c2 || !c1) + return c1 - c2; } } APR_DECLARE(int) apr_cstr_casecmpn(const char *s1, const char *s2, apr_size_t n) { - const unsigned char *str1 = (const unsigned char *)s1; - const unsigned char *str2 = (const unsigned char *)s2; - while (n--) - { - const int c1 = (int)(*str1); - const int c2 = (int)(*str2); - const int cmp = ucharmap[c1] - ucharmap[c2]; - /* Not necessary to test for !c2, this is caught by cmp */ - if (cmp || !c1) - return cmp; - str1++; - str2++; + apr_size_t i = 0; + for (; i < n; ++i) { + const int c1 = ucharmap[(unsigned char)s1[i]]; + const int c2 = ucharmap[(unsigned char)s2[i]]; + /* Not necessary to test for !c2, this is caught by c1 != c2 */ + if (c1 != c2 || !c1) + return c1 - c2; } return 0; } |