summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Lehnardt <jan@apache.org>2013-10-03 17:51:37 +0200
committerJan Lehnardt <jan@apache.org>2013-10-03 17:51:37 +0200
commit073f9e2528b68a94bdaa114bc6dc19d978740847 (patch)
tree6771e6a99c7802c88f9f2568e46e0db1086318e3
parent532100c101387a3314a870264e58e7761c787c67 (diff)
parent9195223b12f6aae993010eea338446d28ab63f54 (diff)
downloadcouchdb-073f9e2528b68a94bdaa114bc6dc19d978740847.tar.gz
Merge branch '1425-fix-graceful-surrogate-handling'
* 1425-fix-graceful-surrogate-handling: Handle invalid UTF-8 byte sequences gracefully by replacing them with 0xFFFD
-rw-r--r--THANKS.in1
-rw-r--r--src/couchdb/priv/couch_js/utf8.c29
2 files changed, 17 insertions, 13 deletions
diff --git a/THANKS.in b/THANKS.in
index d82c23d47..b87ffec85 100644
--- a/THANKS.in
+++ b/THANKS.in
@@ -92,6 +92,7 @@ suggesting improvements or submitting changes. Some of these people are:
* Fedor Indutny <fedor@indutny.com>
* Tim Blair
* Tady Walsh <hello@tady.me>
+ * Sam Rijs <recv@awesam.de>
# Authors from commit 6c976bd and onwards are auto-inserted. If you are merging
# a commit from a non-committer, you should not add an entry to this file. When
# `bootstrap` is run, the actual THANKS file will be generated.
diff --git a/src/couchdb/priv/couch_js/utf8.c b/src/couchdb/priv/couch_js/utf8.c
index d60642671..2d23cc204 100644
--- a/src/couchdb/priv/couch_js/utf8.c
+++ b/src/couchdb/priv/couch_js/utf8.c
@@ -66,24 +66,31 @@ enc_charbuf(const jschar* src, size_t srclen, char* dst, size_t* dstlenp)
c = *src++;
srclen--;
- if((c >= 0xDC00) && (c <= 0xDFFF)) goto bad_surrogate;
-
- if(c < 0xD800 || c > 0xDBFF)
+ if(c <= 0xD7FF || c >= 0xE000)
{
- v = c;
+ v = (uint32) c;
}
- else
+ else if(c >= 0xD800 && c <= 0xDBFF)
{
if(srclen < 1) goto buffer_too_small;
c2 = *src++;
srclen--;
- if ((c2 < 0xDC00) || (c2 > 0xDFFF))
+ if(c2 >= 0xDC00 && c2 <= 0xDFFF)
+ {
+ v = (uint32) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000);
+ }
+ else
{
- c = c2;
- goto bad_surrogate;
+ // Invalid second half of surrogate pair
+ v = (uint32) 0xFFFD;
}
- v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
}
+ else
+ {
+ // Invalid first half surrogate pair
+ v = (uint32) 0xFFFD;
+ }
+
if(v < 0x0080)
{
/* no encoding necessary - performance hack */
@@ -109,10 +116,6 @@ enc_charbuf(const jschar* src, size_t srclen, char* dst, size_t* dstlenp)
*dstlenp = (origDstlen - dstlen);
return JS_TRUE;
-bad_surrogate:
- *dstlenp = (origDstlen - dstlen);
- return JS_FALSE;
-
buffer_too_small:
*dstlenp = (origDstlen - dstlen);
return JS_FALSE;