summaryrefslogtreecommitdiff
path: root/subversion/include/private/svn_utf_private.h
diff options
context:
space:
mode:
Diffstat (limited to 'subversion/include/private/svn_utf_private.h')
-rw-r--r--subversion/include/private/svn_utf_private.h176
1 files changed, 175 insertions, 1 deletions
diff --git a/subversion/include/private/svn_utf_private.h b/subversion/include/private/svn_utf_private.h
index 9f5a4ad..4584944 100644
--- a/subversion/include/private/svn_utf_private.h
+++ b/subversion/include/private/svn_utf_private.h
@@ -21,7 +21,7 @@
* @endcopyright
*
* @file svn_utf_private.h
- * @brief UTF validation routines
+ * @brief UTF validation and normalization routines
*/
#ifndef SVN_UTF_PRIVATE_H
@@ -31,6 +31,8 @@
#include <apr_pools.h>
#include "svn_types.h"
+#include "svn_string.h"
+#include "svn_string_private.h"
#ifdef __cplusplus
extern "C" {
@@ -71,6 +73,18 @@ svn_utf__last_valid(const char *src, apr_size_t len);
const char *
svn_utf__last_valid2(const char *src, apr_size_t len);
+/* Copy LENGTH bytes of SRC, converting characters as follows:
+ - Pass characters from the ASCII subset to the result
+ - Strip all combining marks from the string
+ - Represent other valid Unicode chars as {U+XXXX}
+ - Replace invalid Unicode chars with {U?XXXX}
+ - Represent chars that are not valid UTF-8 as ?\XX
+ - Replace codes outside the Unicode range with a sequence of ?\XX
+ - Represent the null byte as \0
+ Allocate the result in POOL. */
+const char *
+svn_utf__fuzzy_escape(const char *src, apr_size_t length, apr_pool_t *pool);
+
const char *
svn_utf__cstring_from_utf8_fuzzy(const char *src,
apr_pool_t *pool,
@@ -80,6 +94,166 @@ svn_utf__cstring_from_utf8_fuzzy(const char *src,
apr_pool_t *));
+#if defined(WIN32)
+/* On Windows: Convert the UTF-8 string SRC to UTF-16.
+ If PREFIX is not NULL, prepend it to the converted result.
+ The result, if not empty, will be allocated in RESULT_POOL. */
+svn_error_t *
+svn_utf__win32_utf8_to_utf16(const WCHAR **result,
+ const char *src,
+ const WCHAR *prefix,
+ apr_pool_t *result_pool);
+
+/* On Windows: Convert the UTF-16 string SRC to UTF-8.
+ If PREFIX is not NULL, prepend it to the converted result.
+ The result, if not empty, will be allocated in RESULT_POOL. */
+svn_error_t *
+svn_utf__win32_utf16_to_utf8(const char **result,
+ const WCHAR *src,
+ const char *prefix,
+ apr_pool_t *result_pool);
+#endif /* WIN32*/
+
+
+/* A constant used for many length parameters in the utf8proc wrappers
+ * to indicate that the length of a string is unknonw. */
+#define SVN_UTF__UNKNOWN_LENGTH ((apr_size_t) -1)
+
+
+/* Compare two UTF-8 strings, ignoring normalization, using buffers
+ * BUF1 and BUF2 for temporary storage. If either of LEN1 or LEN2 is
+ * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is
+ * null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Return compare value in *RESULT.
+ */
+svn_error_t *
+svn_utf__normcmp(int *result,
+ const char *str1, apr_size_t len1,
+ const char *str2, apr_size_t len2,
+ svn_membuf_t *buf1, svn_membuf_t *buf2);
+
+/* Normalize the UTF-8 string STR to form C, using BUF for temporary
+ * storage. If LEN is SVN_UTF__UNKNOWN_LENGTH, assume STR is
+ * null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Return the normalized string in *RESULT, which shares storage with
+ * BUF and is valid only until the next time BUF is modified.
+ *
+ * A returned error may indicate that STRING contains invalid UTF-8 or
+ * invalid Unicode codepoints.
+ */
+svn_error_t*
+svn_utf__normalize(const char **result,
+ const char *str, apr_size_t len,
+ svn_membuf_t *buf);
+
+/* Check if STRING is a valid, NFC-normalized UTF-8 string. Note that
+ * a FALSE return value may indicate that STRING is not valid UTF-8 at
+ * all.
+ *
+ * Use SCRATCH_POOL for temporary allocations.
+ */
+svn_boolean_t
+svn_utf__is_normalized(const char *string, apr_pool_t *scratch_pool);
+
+/* Encode an UCS-4 string to UTF-8, placing the result into BUFFER.
+ * While utf8proc does have a similar function, it does more checking
+ * and processing than we want here; this function does not attempt
+ * any normalizations but just encodes the individual code points.
+ * The encoded string will always be NUL-terminated.
+ *
+ * Return the length of the result (excluding the NUL terminator) in
+ * *result_length.
+ *
+ * A returned error indicates that a codepoint is invalid.
+ */
+svn_error_t *
+svn_utf__encode_ucs4_string(svn_membuf_t *buffer,
+ const apr_int32_t *ucs4str,
+ apr_size_t length,
+ apr_size_t *result_length);
+
+/* Pattern matching similar to the the SQLite LIKE and GLOB
+ * operators. PATTERN, KEY and ESCAPE must all point to UTF-8
+ * strings. Furthermore, ESCAPE, if provided, must be a character from
+ * the ASCII subset.
+ *
+ * If any of PATTERN_LEN, STRING_LEN or ESCAPE_LEN are
+ * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is
+ * null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Use buffers PATTERN_BUF, STRING_BUF and TEMP_BUF for temporary storage.
+ *
+ * If SQL_LIKE is true, interpret PATTERN as a pattern used by the SQL
+ * LIKE operator and notice ESCAPE. Otherwise it's a Unix fileglob
+ * pattern, and ESCAPE must be NULL.
+ *
+ * Set *MATCH to the result of the comparison.
+*/
+svn_error_t *
+svn_utf__glob(svn_boolean_t *match,
+ const char *pattern, apr_size_t pattern_len,
+ const char *string, apr_size_t string_len,
+ const char *escape, apr_size_t escape_len,
+ svn_boolean_t sql_like,
+ svn_membuf_t *pattern_buf,
+ svn_membuf_t *string_buf,
+ svn_membuf_t *temp_buf);
+
+/* Return the compiled version of the wrapped utf8proc library. */
+const char *
+svn_utf__utf8proc_compiled_version(void);
+
+/* Return the runtime version of the wrapped utf8proc library. */
+const char *
+svn_utf__utf8proc_runtime_version(void);
+
+/* Convert an UTF-16 (or UCS-2) string to UTF-8, returning the pointer
+ * in RESULT. If BIG_ENDIAN is set, then UTF16STR is big-endian;
+ * otherwise, it's little-endian.
+ *
+ * If UTF16LEN is SVN_UTF__UNKNOWN_LENGTH, then UTF16STR must be
+ * terminated with a zero; otherwise, it is the number of 16-bit codes
+ * to convert, and the source string may contain NUL values.
+ *
+ * Allocate RESULT in RESULT_POOL and use SCRATCH_POOL for
+ * intermediate allocation.
+ *
+ * This function combines UTF-16 surrogate pairs into single code
+ * points, but will leave single lead or trail surrogates unchanged.
+ */
+svn_error_t *
+svn_utf__utf16_to_utf8(const svn_string_t **result,
+ const apr_uint16_t *utf16str,
+ apr_size_t utf16len,
+ svn_boolean_t big_endian,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool);
+
+/* Convert an UTF-32 string to UTF-8, returning the pointer in
+ * RESULT. If BIG_ENDIAN is set, then UTF32STR is big-endian;
+ * otherwise, it's little-endian.
+ *
+ * If UTF32LEN is SVN_UTF__UNKNOWN_LENGTH, then UTF32STR must be
+ * terminated with a zero; otherwise, it is the number of 32-bit codes
+ * to convert, and the source string may contain NUL values.
+ *
+ * Allocate RESULT in RESULT_POOL and use SCRATCH_POOL for
+ * intermediate allocation.
+ */
+svn_error_t *
+svn_utf__utf32_to_utf8(const svn_string_t **result,
+ const apr_int32_t *utf32str,
+ apr_size_t utf32len,
+ svn_boolean_t big_endian,
+ apr_pool_t *result_pool,
+ apr_pool_t *scratch_pool);
+
+
#ifdef __cplusplus
}
#endif /* __cplusplus */