1 files changed, 175 insertions, 1 deletions
diff --git a/subversion/include/private/svn_utf_private.h b/subversion/include/private/svn_utf_private.h
index 9f5a4ad..4584944 100644
--- a/subversion/include/private/svn_utf_private.h
+++ b/subversion/include/private/svn_utf_private.h
@@ -21,7 +21,7 @@
  * @endcopyright
  *
  * @file svn_utf_private.h
- * @brief UTF validation routines
+ * @brief UTF validation and normalization routines
  */
 
 #ifndef SVN_UTF_PRIVATE_H
@@ -31,6 +31,8 @@
 #include <apr_pools.h>
 
 #include "svn_types.h"
+#include "svn_string.h"
+#include "svn_string_private.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -71,6 +73,18 @@ svn_utf__last_valid(const char *src, apr_size_t len);
 const char *
 svn_utf__last_valid2(const char *src, apr_size_t len);
 
+/* Copy LENGTH bytes of SRC, converting characters as follows:
+    - Pass characters from the ASCII subset to the result
+    - Strip all combining marks from the string
+    - Represent other valid Unicode chars as {U+XXXX}
+    - Replace invalid Unicode chars with {U?XXXX}
+    - Represent chars that are not valid UTF-8 as ?\XX
+    - Replace codes outside the Unicode range with a sequence of ?\XX
+    - Represent the null byte as \0
+   Allocate the result in POOL. */
+const char *
+svn_utf__fuzzy_escape(const char *src, apr_size_t length, apr_pool_t *pool);
+
 const char *
 svn_utf__cstring_from_utf8_fuzzy(const char *src,
                                  apr_pool_t *pool,
@@ -80,6 +94,166 @@ svn_utf__cstring_from_utf8_fuzzy(const char *src,
                                                apr_pool_t *));
 
 
+#if defined(WIN32)
+/* On Windows: Convert the UTF-8 string SRC to UTF-16.
+   If PREFIX is not NULL, prepend it to the converted result.
+   The result, if not empty, will be allocated in RESULT_POOL. */
+svn_error_t *
+svn_utf__win32_utf8_to_utf16(const WCHAR **result,
+                             const char *src,
+                             const WCHAR *prefix,
+                             apr_pool_t *result_pool);
+
+/* On Windows: Convert the UTF-16 string SRC to UTF-8.
+   If PREFIX is not NULL, prepend it to the converted result.
+   The result, if not empty, will be allocated in RESULT_POOL. */
+svn_error_t *
+svn_utf__win32_utf16_to_utf8(const char **result,
+                             const WCHAR *src,
+                             const char *prefix,
+                             apr_pool_t *result_pool);
+#endif /* WIN32*/
+
+
+/* A constant used for many length parameters in the utf8proc wrappers
+ * to indicate that the length of a string is unknonw. */
+#define SVN_UTF__UNKNOWN_LENGTH ((apr_size_t) -1)
+
+
+/* Compare two UTF-8 strings, ignoring normalization, using buffers
+ * BUF1 and BUF2 for temporary storage. If either of LEN1 or LEN2 is
+ * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is
+ * null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Return compare value in *RESULT.
+ */
+svn_error_t *
+svn_utf__normcmp(int *result,
+                 const char *str1, apr_size_t len1,
+                 const char *str2, apr_size_t len2,
+                 svn_membuf_t *buf1, svn_membuf_t *buf2);
+
+/* Normalize the UTF-8 string STR to form C, using BUF for temporary
+ * storage. If LEN is SVN_UTF__UNKNOWN_LENGTH, assume STR is
+ * null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Return the normalized string in *RESULT, which shares storage with
+ * BUF and is valid only until the next time BUF is modified.
+ *
+ * A returned error may indicate that STRING contains invalid UTF-8 or
+ * invalid Unicode codepoints.
+ */
+svn_error_t*
+svn_utf__normalize(const char **result,
+                   const char *str, apr_size_t len,
+                   svn_membuf_t *buf);
+
+/* Check if STRING is a valid, NFC-normalized UTF-8 string.  Note that
+ * a FALSE return value may indicate that STRING is not valid UTF-8 at
+ * all.
+ *
+ * Use SCRATCH_POOL for temporary allocations.
+ */
+svn_boolean_t
+svn_utf__is_normalized(const char *string, apr_pool_t *scratch_pool);
+
+/* Encode an UCS-4 string to UTF-8, placing the result into BUFFER.
+ * While utf8proc does have a similar function, it does more checking
+ * and processing than we want here; this function does not attempt
+ * any normalizations but just encodes the individual code points.
+ * The encoded string will always be NUL-terminated.
+ *
+ * Return the length of the result (excluding the NUL terminator) in
+ * *result_length.
+ *
+ * A returned error indicates that a codepoint is invalid.
+ */
+svn_error_t *
+svn_utf__encode_ucs4_string(svn_membuf_t *buffer,
+                            const apr_int32_t *ucs4str,
+                            apr_size_t length,
+                            apr_size_t *result_length);
+
+/* Pattern matching similar to the the SQLite LIKE and GLOB
+ * operators. PATTERN, KEY and ESCAPE must all point to UTF-8
+ * strings. Furthermore, ESCAPE, if provided, must be a character from
+ * the ASCII subset.
+ *
+ * If any of PATTERN_LEN, STRING_LEN or ESCAPE_LEN are
+ * SVN_UTF__UNKNOWN_LENGTH, assume the associated string is
+ * null-terminated; otherwise, consider the string only up to the
+ * given length.
+ *
+ * Use buffers PATTERN_BUF, STRING_BUF and TEMP_BUF for temporary storage.
+ *
+ * If SQL_LIKE is true, interpret PATTERN as a pattern used by the SQL
+ * LIKE operator and notice ESCAPE. Otherwise it's a Unix fileglob
+ * pattern, and ESCAPE must be NULL.
+ *
+ * Set *MATCH to the result of the comparison.
+*/
+svn_error_t *
+svn_utf__glob(svn_boolean_t *match,
+              const char *pattern, apr_size_t pattern_len,
+              const char *string, apr_size_t string_len,
+              const char *escape, apr_size_t escape_len,
+              svn_boolean_t sql_like,
+              svn_membuf_t *pattern_buf,
+              svn_membuf_t *string_buf,
+              svn_membuf_t *temp_buf);
+
+/* Return the compiled version of the wrapped utf8proc library. */
+const char *
+svn_utf__utf8proc_compiled_version(void);
+
+/* Return the runtime version of the wrapped utf8proc library. */
+const char *
+svn_utf__utf8proc_runtime_version(void);
+
+/* Convert an UTF-16 (or UCS-2) string to UTF-8, returning the pointer
+ * in RESULT. If BIG_ENDIAN is set, then UTF16STR is big-endian;
+ * otherwise, it's little-endian.
+ *
+ * If UTF16LEN is SVN_UTF__UNKNOWN_LENGTH, then UTF16STR must be
+ * terminated with a zero; otherwise, it is the number of 16-bit codes
+ * to convert, and the source string may contain NUL values.
+ *
+ * Allocate RESULT in RESULT_POOL and use SCRATCH_POOL for
+ * intermediate allocation.
+ *
+ * This function combines UTF-16 surrogate pairs into single code
+ * points, but will leave single lead or trail surrogates unchanged.
+ */
+svn_error_t *
+svn_utf__utf16_to_utf8(const svn_string_t **result,
+                       const apr_uint16_t *utf16str,
+                       apr_size_t utf16len,
+                       svn_boolean_t big_endian,
+                       apr_pool_t *result_pool,
+                       apr_pool_t *scratch_pool);
+
+/* Convert an UTF-32 string to UTF-8, returning the pointer in
+ * RESULT. If BIG_ENDIAN is set, then UTF32STR is big-endian;
+ * otherwise, it's little-endian.
+ *
+ * If UTF32LEN is SVN_UTF__UNKNOWN_LENGTH, then UTF32STR must be
+ * terminated with a zero; otherwise, it is the number of 32-bit codes
+ * to convert, and the source string may contain NUL values.
+ *
+ * Allocate RESULT in RESULT_POOL and use SCRATCH_POOL for
+ * intermediate allocation.
+ */
+svn_error_t *
+svn_utf__utf32_to_utf8(const svn_string_t **result,
+                       const apr_int32_t *utf32str,
+                       apr_size_t utf32len,
+                       svn_boolean_t big_endian,
+                       apr_pool_t *result_pool,
+                       apr_pool_t *scratch_pool);
+
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */