summaryrefslogtreecommitdiff
path: root/subversion/libsvn_subr/utf_validate.c
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/libsvn_subr/utf_validate.c')
-rw-r--r--subversion/libsvn_subr/utf_validate.c79
1 files changed, 4 insertions, 75 deletions
diff --git a/subversion/libsvn_subr/utf_validate.c b/subversion/libsvn_subr/utf_validate.c
index 8311fd7..0aab81c 100644
--- a/subversion/libsvn_subr/utf_validate.c
+++ b/subversion/libsvn_subr/utf_validate.c
@@ -258,24 +258,7 @@ static const char machine [9][14] = {
static const char *
first_non_fsm_start_char(const char *data, apr_size_t max_len)
{
-#if !SVN_UNALIGNED_ACCESS_IS_OK
-
- /* On some systems, we need to make sure that buf is properly aligned
- * for chunky data access.
- */
- if ((apr_uintptr_t)data & (sizeof(apr_uintptr_t)-1))
- {
- apr_size_t len = (~(apr_uintptr_t)data) & (sizeof(apr_uintptr_t)-1);
- if (len > max_len)
- len = max_len;
- max_len -= len;
-
- for (; len > 0; ++data, --len)
- if (*data < 0 || *data >= 0x80)
- return data;
- }
-
-#endif
+#if SVN_UNALIGNED_ACCESS_IS_OK
/* Scan the input one machine word at a time. */
for (; max_len > sizeof(apr_uintptr_t)
@@ -283,55 +266,11 @@ first_non_fsm_start_char(const char *data, apr_size_t max_len)
if (*(const apr_uintptr_t *)data & SVN__BIT_7_SET)
break;
- /* The remaining odd bytes will be examined the naive way: */
- for (; max_len > 0; ++data, --max_len)
- if (*data < 0 || *data >= 0x80)
- break;
-
- return data;
-}
-
-/* Scan the C string in *DATA for chars that are not in the octet
- * category 0 (FSM_START). Return the position of either the such
- * char or of the terminating NUL.
- */
-static const char *
-first_non_fsm_start_char_cstring(const char *data)
-{
- /* We need to make sure that BUF is properly aligned for chunky data
- * access because we don't know the string's length. Unaligned chunk
- * read access beyond the NUL terminator could therefore result in a
- * segfault.
- */
- for (; (apr_uintptr_t)data & (sizeof(apr_uintptr_t)-1); ++data)
- if (*data <= 0 || *data >= 0x80)
- return data;
-
- /* Scan the input one machine word at a time. */
-#ifndef SVN_UTF_NO_UNINITIALISED_ACCESS
- /* This may read allocated but initialised bytes beyond the
- terminating null. Any such bytes are always readable and this
- code operates correctly whatever the uninitialised values happen
- to be. However memory checking tools such as valgrind and GCC
- 4.8's address santitizer will object so this bit of code can be
- disabled at compile time. */
- for (; ; data += sizeof(apr_uintptr_t))
- {
- /* Check for non-ASCII chars: */
- apr_uintptr_t chunk = *(const apr_uintptr_t *)data;
- if (chunk & SVN__BIT_7_SET)
- break;
-
- /* This is the well-known strlen test: */
- chunk |= (chunk & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
- if ((chunk & SVN__BIT_7_SET) != SVN__BIT_7_SET)
- break;
- }
#endif
/* The remaining odd bytes will be examined the naive way: */
- for (; ; ++data)
- if (*data <= 0 || *data >= 0x80)
+ for (; max_len > 0; ++data, --max_len)
+ if ((unsigned char)*data >= 0x80)
break;
return data;
@@ -359,20 +298,10 @@ svn_utf__last_valid(const char *data, apr_size_t len)
svn_boolean_t
svn_utf__cstring_is_valid(const char *data)
{
- int state = FSM_START;
-
if (!data)
return FALSE;
- data = first_non_fsm_start_char_cstring(data);
-
- while (*data)
- {
- unsigned char octet = *data++;
- int category = octet_category[octet];
- state = machine[state][category];
- }
- return state == FSM_START;
+ return svn_utf__is_valid(data, strlen(data));
}
svn_boolean_t