From 9f31a7914f9d65fbc1225f503491dcc90e90c357 Mon Sep 17 00:00:00 2001 From: Chen Guo Date: Thu, 8 Jul 2010 15:50:11 -0700 Subject: (x)memcoll: speedup when input is known to be NUL delimited * lib/memcoll.c: Include stdlib. (memcoll0) New function. (strcoll_loop) New function, refactored for use in both memcoll and memcoll0. * lib/memcoll.h: Add prototype for memcoll0. * lib/xmemcoll.c: (xmemcoll0) New function. (collate_error) New function, refactored for use in both xmemcoll and xmemcoll0. * lib/xmemcoll.h: Add prototype for xmemcoll0. * m4/memcoll.m4: add inline invocation. --- lib/memcoll.c | 88 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 32 deletions(-) (limited to 'lib/memcoll.c') diff --git a/lib/memcoll.c b/lib/memcoll.c index e08ffa5469..8e4855135f 100644 --- a/lib/memcoll.c +++ b/lib/memcoll.c @@ -23,20 +23,54 @@ #include "memcoll.h" #include +#include #include +/* Ensure strcoll operates on the entire input strings, in case they contain + NUL bytes. */ + +static inline int +strcoll_loop (const char *s1, size_t s1len, const char *s2, size_t s2len) +{ + int diff; + while (! (errno = 0, (diff = strcoll (s1, s2)) || errno)) + { + /* strcoll found no difference, but perhaps it was fooled by NUL + characters in the data. Work around this problem by advancing + past the NUL chars. */ + size_t size1 = strlen (s1) + 1; + size_t size2 = strlen (s2) + 1; + s1 += size1; + s2 += size2; + s1len -= size1; + s2len -= size2; + + if (s1len == 0) + { + if (s2len != 0) + diff = -1; + break; + } + else if (s2len == 0) + { + diff = 1; + break; + } + } + return diff; +} + /* Compare S1 (with length S1LEN) and S2 (with length S2LEN) according to the LC_COLLATE locale. S1 and S2 do not overlap, and are not adjacent. Perhaps temporarily modify the bytes after S1 and S2, but restore their original contents before returning. Set errno to an error number if there is an error, and to zero otherwise. */ + int memcoll (char *s1, size_t s1len, char *s2, size_t s2len) { int diff; -#if HAVE_STRCOLL - /* strcoll is slow on many platforms, so check for the common case where the arguments are bytewise equal. Otherwise, walk through the buffers using strcoll on each substring. */ @@ -54,43 +88,33 @@ memcoll (char *s1, size_t s1len, char *s2, size_t s2len) s1[s1len++] = '\0'; s2[s2len++] = '\0'; - while (! (errno = 0, (diff = strcoll (s1, s2)) || errno)) - { - /* strcoll found no difference, but perhaps it was fooled by NUL - characters in the data. Work around this problem by advancing - past the NUL chars. */ - size_t size1 = strlen (s1) + 1; - size_t size2 = strlen (s2) + 1; - s1 += size1; - s2 += size2; - s1len -= size1; - s2len -= size2; - - if (s1len == 0) - { - if (s2len != 0) - diff = -1; - break; - } - else if (s2len == 0) - { - diff = 1; - break; - } - } + diff = strcoll_loop (s1, s1len, s2, s2len); s1[s1len - 1] = n1; s2[s2len - 1] = n2; } -#else + return diff; +} - diff = memcmp (s1, s2, s1len < s2len ? s1len : s2len); - if (! diff) - diff = s1len < s2len ? -1 : s1len != s2len; - errno = 0; +/* Like memcoll, but S1 and S2 are known to be NUL delimited, thus no + modification to S1 or S2 are needed. */ +int +memcoll0 (const char *s1, size_t s1len, const char *s2, size_t s2len) +{ + int diff; + if (!(s1len > 0 && s1[s1len] == '\0')) + abort (); + if (!(s2len > 0 && s2[s2len] == '\0')) + abort (); -#endif + if (s1len == s2len && memcmp (s1, s2, s1len) == 0) + { + errno = 0; + diff = 0; + } + else + diff = strcoll_loop (s1, s1len, s2, s2len); return diff; } -- cgit v1.2.1