summaryrefslogtreecommitdiff
path: root/pcre_ord2utf8.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-28 17:16:11 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-28 17:16:11 +0000
commit5e8b286b566d7ec502b80892e5b709025631d58c (patch)
treec8a6c83ad13e79a9b64718b784fe0126764d98b7 /pcre_ord2utf8.c
parenta29cc4dc66d82b59de7616c53517c58271e6e0e8 (diff)
downloadpcre-5e8b286b566d7ec502b80892e5b709025631d58c.tar.gz
Merging all the changes from the pcre16 branch into the trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@836 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_ord2utf8.c')
-rw-r--r--pcre_ord2utf8.c30
1 files changed, 20 insertions, 10 deletions
diff --git a/pcre_ord2utf8.c b/pcre_ord2utf8.c
index 6f4eb9e..6865e14 100644
--- a/pcre_ord2utf8.c
+++ b/pcre_ord2utf8.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2008 University of Cambridge
+ Copyright (c) 1997-2012 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -52,35 +52,45 @@ character value into a UTF8 string. */
* Convert character value to UTF-8 *
*************************************************/
-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
+/* This function takes an integer value in the range 0 - 0x10ffff
+and encodes it as a UTF-8 character in 1 to 6 pcre_uchars.
Arguments:
cvalue the character value
- buffer pointer to buffer for result - at least 6 bytes long
+ buffer pointer to buffer for result - at least 6 pcre_uchars long
Returns: number of characters placed in the buffer
*/
int
-_pcre_ord2utf8(int cvalue, uschar *buffer)
+PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
{
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
+
register int i, j;
-for (i = 0; i < _pcre_utf8_table1_size; i++)
- if (cvalue <= _pcre_utf8_table1[i]) break;
+
+/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
+Should never happen in practice. */
+if ((cvalue & 0xf800) == 0xd800 || cvalue >= 0x110000)
+ cvalue = 0xfffe;
+
+for (i = 0; i < PRIV(utf8_table1_size); i++)
+ if (cvalue <= PRIV(utf8_table1)[i]) break;
buffer += i;
for (j = i; j > 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
-*buffer = _pcre_utf8_table2[i] | cvalue;
+*buffer = PRIV(utf8_table2)[i] | cvalue;
return i + 1;
+
#else
+
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
-(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer); /* called when SUPPORT_UTF is not defined. */
return 0;
+
#endif
}