summaryrefslogtreecommitdiff
path: root/subversion/libsvn_subr/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_subr/string.c')
-rw-r--r--subversion/libsvn_subr/string.c357
1 files changed, 284 insertions, 73 deletions
diff --git a/subversion/libsvn_subr/string.c b/subversion/libsvn_subr/string.c
index c3d7fec..43a1a4e 100644
--- a/subversion/libsvn_subr/string.c
+++ b/subversion/libsvn_subr/string.c
@@ -26,6 +26,7 @@
#include <apr.h>
+#include <assert.h>
#include <string.h> /* for memcpy(), memcmp(), strlen() */
#include <apr_fnmatch.h>
@@ -53,9 +54,9 @@ membuf_create(void **data, apr_size_t *size,
/* apr_palloc will allocate multiples of 8.
* Thus, we would waste some of that memory if we stuck to the
* smaller size. Note that this is safe even if apr_palloc would
- * use some other aligment or none at all. */
+ * use some other alignment or none at all. */
minimum_size = APR_ALIGN_DEFAULT(minimum_size);
- *data = (!minimum_size ? NULL : apr_palloc(pool, minimum_size));
+ *data = apr_palloc(pool, minimum_size);
*size = minimum_size;
}
@@ -78,14 +79,10 @@ membuf_ensure(void **data, apr_size_t *size,
apr_size_t new_size = *size;
if (new_size == 0)
- /* APR will increase odd allocation sizes to the next
- * multiple for 8, for instance. Take advantage of that
- * knowledge and allow for the extra size to be used. */
new_size = minimum_size;
else
while (new_size < minimum_size)
{
- /* new_size is aligned; doubling it should keep it aligned */
const apr_size_t prev_size = new_size;
new_size *= 2;
@@ -121,7 +118,10 @@ svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size)
const apr_size_t old_size = membuf->size;
membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
- if (membuf->data && old_data && old_data != membuf->data)
+
+ /* If we re-allocated MEMBUF->DATA, it cannot be NULL.
+ * Statically initialized membuffers (OLD_DATA) may be NULL, though. */
+ if (old_data && old_data != membuf->data)
memcpy(membuf->data, old_data, old_size);
}
@@ -151,7 +151,7 @@ string_compare(const char *str1,
if (len1 != len2)
return FALSE;
- /* now the strings must have identical lenghths */
+ /* now the strings must have identical lengths */
if ((memcmp(str1, str2, len1)) == 0)
return TRUE;
@@ -240,7 +240,9 @@ svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
new_string->data = data;
new_string->len = size;
- memcpy(data, bytes, size);
+ /* If SIZE is 0, NULL is valid for BYTES. */
+ if (size)
+ memcpy(data, bytes, size);
/* Null termination is the convention -- even if we suspect the data
to be binary, it's not up to us to decide, it's the caller's
@@ -299,8 +301,9 @@ svn_string_isempty(const svn_string_t *str)
svn_string_t *
svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool)
{
- return (svn_string_ncreate(original_string->data,
- original_string->len, pool));
+ return (original_string ? svn_string_ncreate(original_string->data,
+ original_string->len, pool)
+ : NULL);
}
@@ -393,7 +396,10 @@ svn_stringbuf_t *
svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
{
svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool);
- memcpy(strbuf->data, bytes, size);
+
+ /* If SIZE is 0, NULL is valid for BYTES. */
+ if (size)
+ memcpy(strbuf->data, bytes, size);
/* Null termination is the convention -- even if we suspect the data
to be binary, it's not up to us to decide, it's the caller's
@@ -418,6 +424,17 @@ svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool)
return svn_stringbuf_ncreate(str->data, str->len, pool);
}
+svn_stringbuf_t *
+svn_stringbuf_create_wrap(char *str, apr_pool_t *pool)
+{
+ svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result));
+ result->pool = pool;
+ result->data = str;
+ result->len = strlen(str);
+ result->blocksize = result->len + 1;
+
+ return result;
+}
svn_stringbuf_t *
svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap)
@@ -579,6 +596,10 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
apr_size_t total_len;
void *start_address;
+ if (!count)
+ /* Allow BYTES to be NULL by avoiding passing it to memcpy. */
+ return;
+
total_len = str->len + count; /* total size needed */
/* svn_stringbuf_ensure adds 1 for null terminator. */
@@ -595,6 +616,21 @@ svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
to null-terminate. */
}
+void
+svn_stringbuf_appendfill(svn_stringbuf_t *str,
+ char byte,
+ apr_size_t count)
+{
+ apr_size_t new_len = str->len + count;
+ svn_stringbuf_ensure(str, new_len);
+
+ memset(str->data + str->len, byte, count);
+
+ /* update buffer length and always NUL-terminate it */
+ str->len = new_len;
+ str->data[new_len] = '\0';
+}
+
void
svn_stringbuf_appendstr(svn_stringbuf_t *targetstr,
@@ -616,23 +652,22 @@ svn_stringbuf_insert(svn_stringbuf_t *str,
const char *bytes,
apr_size_t count)
{
+ /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */
+ if (count == 0)
+ return;
+
+ /* special case: BYTES overlaps with this string -> copy the source */
if (bytes + count > str->data && bytes < str->data + str->blocksize)
- {
- /* special case: BYTES overlaps with this string -> copy the source */
- const char *temp = apr_pmemdup(str->pool, bytes, count);
- svn_stringbuf_insert(str, pos, temp, count);
- }
- else
- {
- if (pos > str->len)
- pos = str->len;
+ bytes = apr_pmemdup(str->pool, bytes, count);
- svn_stringbuf_ensure(str, str->len + count);
- memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
- memcpy(str->data + pos, bytes, count);
+ if (pos > str->len)
+ pos = str->len;
- str->len += count;
- }
+ svn_stringbuf_ensure(str, str->len + count);
+ memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
+ memcpy(str->data + pos, bytes, count);
+
+ str->len += count;
}
void
@@ -642,7 +677,7 @@ svn_stringbuf_remove(svn_stringbuf_t *str,
{
if (pos > str->len)
pos = str->len;
- if (pos + count > str->len)
+ if (count > str->len - pos)
count = str->len - pos;
memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1);
@@ -656,32 +691,35 @@ svn_stringbuf_replace(svn_stringbuf_t *str,
const char *bytes,
apr_size_t new_count)
{
- if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
+ /* For COUNT==0, we allow BYTES to be NULL.
+ * In that case, this is just a substring removal. */
+ if (new_count == 0)
{
- /* special case: BYTES overlaps with this string -> copy the source */
- const char *temp = apr_pmemdup(str->pool, bytes, new_count);
- svn_stringbuf_replace(str, pos, old_count, temp, new_count);
+ svn_stringbuf_remove(str, pos, old_count);
+ return;
}
- else
- {
- if (pos > str->len)
- pos = str->len;
- if (pos + old_count > str->len)
- old_count = str->len - pos;
- if (old_count < new_count)
- {
- apr_size_t delta = new_count - old_count;
- svn_stringbuf_ensure(str, str->len + delta);
- }
+ /* special case: BYTES overlaps with this string -> copy the source */
+ if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
+ bytes = apr_pmemdup(str->pool, bytes, new_count);
- if (old_count != new_count)
- memmove(str->data + pos + new_count, str->data + pos + old_count,
- str->len - pos - old_count + 1);
+ if (pos > str->len)
+ pos = str->len;
+ if (old_count > str->len - pos)
+ old_count = str->len - pos;
- memcpy(str->data + pos, bytes, new_count);
- str->len += new_count - old_count;
+ if (old_count < new_count)
+ {
+ apr_size_t delta = new_count - old_count;
+ svn_stringbuf_ensure(str, str->len + delta);
}
+
+ if (old_count != new_count)
+ memmove(str->data + pos + new_count, str->data + pos + old_count,
+ str->len - pos - old_count + 1);
+
+ memcpy(str->data + pos, bytes, new_count);
+ str->len += new_count - old_count;
}
@@ -832,7 +870,7 @@ char *
svn_cstring_tokenize(const char *sep, char **str)
{
char *token;
- const char * next;
+ char *next;
char csep;
/* check parameters */
@@ -862,8 +900,8 @@ svn_cstring_tokenize(const char *sep, char **str)
}
else
{
- *(char *)next = '\0';
- *str = (char *)next + 1;
+ *next = '\0';
+ *str = next + 1;
}
return token;
@@ -1014,17 +1052,33 @@ svn_cstring_atoi(int *n, const char *str)
return SVN_NO_ERROR;
}
-
-apr_status_t
-svn__strtoff(apr_off_t *offset, const char *buf, char **end, int base)
+unsigned long
+svn__strtoul(const char* buffer, const char** end)
{
-#if !APR_VERSION_AT_LEAST(1,0,0)
- errno = 0;
- *offset = strtol(buf, end, base);
- return APR_FROM_OS_ERROR(errno);
-#else
- return apr_strtoff(offset, buf, end, base);
-#endif
+ unsigned long result = 0;
+
+ /* this loop will execute in just 2 CPU cycles, confirmed by measurement:
+ 7 macro-ops (max 4 / cycle => 2 cycles)
+ 1 load (max 1 / cycle)
+ 1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle)
+ 2 arithmetic ops (subtract, increment; max 3 / cycle)
+ 2 scale-and-add AGU ops (max 3 / cycle)
+ 1 compiler-generated move operation
+ dependency chain: temp = result * 4 + result; result = temp * 2 + c
+ (2 ops with latency 1 => 2 cycles)
+ */
+ while (1)
+ {
+ unsigned long c = (unsigned char)*buffer - (unsigned char)'0';
+ if (c > 9)
+ break;
+
+ result = result * 10 + c;
+ ++buffer;
+ }
+
+ *end = buffer;
+ return result;
}
/* "Precalculated" itoa values for 2 places (including leading zeros).
@@ -1123,11 +1177,11 @@ svn__i64toa(char * dest, apr_int64_t number)
return svn__ui64toa(dest, (apr_uint64_t)number);
*dest = '-';
- return svn__ui64toa(dest + 1, (apr_uint64_t)(0-number)) + 1;
+ return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1;
}
static void
-ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)
+ui64toa_sep(apr_uint64_t number, char separator, char *buffer)
{
apr_size_t length = svn__ui64toa(buffer, number);
apr_size_t i;
@@ -1135,7 +1189,7 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)
for (i = length; i > 3; i -= 3)
{
memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3);
- buffer[i-3] = seperator;
+ buffer[i-3] = separator;
length++;
}
@@ -1143,30 +1197,110 @@ ui64toa_sep(apr_uint64_t number, char seperator, char *buffer)
}
char *
-svn__ui64toa_sep(apr_uint64_t number, char seperator, apr_pool_t *pool)
+svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool)
{
char buffer[2 * SVN_INT64_BUFFER_SIZE];
- ui64toa_sep(number, seperator, buffer);
+ ui64toa_sep(number, separator, buffer);
return apr_pstrdup(pool, buffer);
}
char *
-svn__i64toa_sep(apr_int64_t number, char seperator, apr_pool_t *pool)
+svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool)
{
char buffer[2 * SVN_INT64_BUFFER_SIZE];
if (number < 0)
{
buffer[0] = '-';
- ui64toa_sep((apr_uint64_t)(-number), seperator, &buffer[1]);
+ ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]);
}
else
- ui64toa_sep((apr_uint64_t)(number), seperator, buffer);
+ ui64toa_sep((apr_uint64_t)(number), separator, buffer);
return apr_pstrdup(pool, buffer);
}
-unsigned int
+apr_size_t
+svn__ui64tobase36(char *dest, apr_uint64_t value)
+{
+ char *dest_start = dest;
+ if (value < 10)
+ {
+ /* pretty frequent and trivial case. Make it fast. */
+ *(dest++) = (char)(value) + '0';
+ }
+ else
+ {
+ char buffer[SVN_INT64_BUFFER_SIZE];
+ char *p = buffer;
+
+ /* write result as little-endian to buffer */
+ while (value > 0)
+ {
+ char c = (char)(value % 36);
+ value /= 36;
+
+ *p = (c <= 9) ? (c + '0') : (c - 10 + 'a');
+ ++p;
+ }
+
+ /* copy as big-endian to DEST */
+ while (p > buffer)
+ *(dest++) = *(--p);
+ }
+
+ *dest = '\0';
+ return dest - dest_start;
+}
+
+apr_uint64_t
+svn__base36toui64(const char **next, const char *source)
+{
+ apr_uint64_t result = 0;
+ apr_uint64_t factor = 1;
+ int i = 0;
+ char digits[SVN_INT64_BUFFER_SIZE];
+
+ /* convert digits to numerical values and count the number of places.
+ * Also, prevent buffer overflow. */
+ while (i < sizeof(digits))
+ {
+ char c = *source;
+ if (c < 'a')
+ {
+ /* includes detection of NUL terminator */
+ if (c < '0' || c > '9')
+ break;
+
+ c -= '0';
+ }
+ else
+ {
+ if (c < 'a' || c > 'z')
+ break;
+
+ c -= 'a' - 10;
+ }
+
+ digits[i++] = c;
+ source++;
+ }
+
+ /* fold digits into the result */
+ while (i > 0)
+ {
+ result += factor * (apr_uint64_t)digits[--i];
+ factor *= 36;
+ }
+
+ if (next)
+ *next = source;
+
+ return result;
+}
+
+
+apr_size_t
svn_cstring__similarity(const char *stra, const char *strb,
svn_membuf_t *buffer, apr_size_t *rlcs)
{
@@ -1178,7 +1312,7 @@ svn_cstring__similarity(const char *stra, const char *strb,
return svn_string__similarity(&stringa, &stringb, buffer, rlcs);
}
-unsigned int
+apr_size_t
svn_string__similarity(const svn_string_t *stringa,
const svn_string_t *stringb,
svn_membuf_t *buffer, apr_size_t *rlcs)
@@ -1242,7 +1376,7 @@ svn_string__similarity(const svn_string_t *stringa,
/* Calculate LCS length of the remainder */
for (pstr = stra; pstr < enda; ++pstr)
{
- int i;
+ apr_size_t i;
for (i = 1; i <= slots; ++i)
{
if (*pstr == strb[i-1])
@@ -1267,7 +1401,84 @@ svn_string__similarity(const svn_string_t *stringa,
/* Return similarity ratio rounded to 4 significant digits */
if (total)
- return(unsigned int)((2000 * lcs + total/2) / total);
+ return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total);
else
- return 1000;
+ return SVN_STRING__SIM_RANGE_MAX;
+}
+
+apr_size_t
+svn_cstring__match_length(const char *a,
+ const char *b,
+ apr_size_t max_len)
+{
+ apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+ /* Chunky processing is so much faster ...
+ *
+ * We can't make this work on architectures that require aligned access
+ * because A and B will probably have different alignment. So, skipping
+ * the first few chars until alignment is reached is not an option.
+ */
+ for (; pos + sizeof(apr_size_t) <= max_len; pos += sizeof(apr_size_t))
+ if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
+ break;
+
+#endif
+
+ for (; pos < max_len; ++pos)
+ if (a[pos] != b[pos])
+ break;
+
+ return pos;
+}
+
+apr_size_t
+svn_cstring__reverse_match_length(const char *a,
+ const char *b,
+ apr_size_t max_len)
+{
+ apr_size_t pos = 0;
+
+#if SVN_UNALIGNED_ACCESS_IS_OK
+
+ /* Chunky processing is so much faster ...
+ *
+ * We can't make this work on architectures that require aligned access
+ * because A and B will probably have different alignment. So, skipping
+ * the first few chars until alignment is reached is not an option.
+ */
+ for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
+ if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
+ break;
+
+ pos -= sizeof(apr_size_t);
+
+#endif
+
+ /* If we find a mismatch at -pos, pos-1 characters matched.
+ */
+ while (++pos <= max_len)
+ if (a[0-pos] != b[0-pos])
+ return pos - 1;
+
+ /* No mismatch found -> at least MAX_LEN matching chars.
+ */
+ return max_len;
+}
+
+const char *
+svn_cstring_skip_prefix(const char *str, const char *prefix)
+{
+ apr_size_t len = strlen(prefix);
+
+ if (strncmp(str, prefix, len) == 0)
+ {
+ return str + len;
+ }
+ else
+ {
+ return NULL;
+ }
}