diff options
author | Jan Lehnardt <jan@apache.org> | 2013-10-03 17:51:37 +0200 |
---|---|---|
committer | Jan Lehnardt <jan@apache.org> | 2013-10-03 17:51:37 +0200 |
commit | 073f9e2528b68a94bdaa114bc6dc19d978740847 (patch) | |
tree | 6771e6a99c7802c88f9f2568e46e0db1086318e3 | |
parent | 532100c101387a3314a870264e58e7761c787c67 (diff) | |
parent | 9195223b12f6aae993010eea338446d28ab63f54 (diff) | |
download | couchdb-073f9e2528b68a94bdaa114bc6dc19d978740847.tar.gz |
Merge branch '1425-fix-graceful-surrogate-handling'
* 1425-fix-graceful-surrogate-handling:
Handle invalid UTF-8 byte sequences gracefully by replacing them with 0xFFFD
-rw-r--r-- | THANKS.in | 1 | ||||
-rw-r--r-- | src/couchdb/priv/couch_js/utf8.c | 29 |
2 files changed, 17 insertions, 13 deletions
@@ -92,6 +92,7 @@ suggesting improvements or submitting changes. Some of these people are: * Fedor Indutny <fedor@indutny.com> * Tim Blair * Tady Walsh <hello@tady.me> + * Sam Rijs <recv@awesam.de> # Authors from commit 6c976bd and onwards are auto-inserted. If you are merging # a commit from a non-committer, you should not add an entry to this file. When # `bootstrap` is run, the actual THANKS file will be generated. diff --git a/src/couchdb/priv/couch_js/utf8.c b/src/couchdb/priv/couch_js/utf8.c index d60642671..2d23cc204 100644 --- a/src/couchdb/priv/couch_js/utf8.c +++ b/src/couchdb/priv/couch_js/utf8.c @@ -66,24 +66,31 @@ enc_charbuf(const jschar* src, size_t srclen, char* dst, size_t* dstlenp) c = *src++; srclen--; - if((c >= 0xDC00) && (c <= 0xDFFF)) goto bad_surrogate; - - if(c < 0xD800 || c > 0xDBFF) + if(c <= 0xD7FF || c >= 0xE000) { - v = c; + v = (uint32) c; } - else + else if(c >= 0xD800 && c <= 0xDBFF) { if(srclen < 1) goto buffer_too_small; c2 = *src++; srclen--; - if ((c2 < 0xDC00) || (c2 > 0xDFFF)) + if(c2 >= 0xDC00 && c2 <= 0xDFFF) + { + v = (uint32) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000); + } + else { - c = c2; - goto bad_surrogate; + // Invalid second half of surrogate pair + v = (uint32) 0xFFFD; } - v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000; } + else + { + // Invalid first half surrogate pair + v = (uint32) 0xFFFD; + } + if(v < 0x0080) { /* no encoding necessary - performance hack */ @@ -109,10 +116,6 @@ enc_charbuf(const jschar* src, size_t srclen, char* dst, size_t* dstlenp) *dstlenp = (origDstlen - dstlen); return JS_TRUE; -bad_surrogate: - *dstlenp = (origDstlen - dstlen); - return JS_FALSE; - buffer_too_small: *dstlenp = (origDstlen - dstlen); return JS_FALSE; |