diff options
author | Aaron <aaron@10gen.com> | 2009-05-27 16:09:59 -0400 |
---|---|---|
committer | Aaron <aaron@10gen.com> | 2009-05-27 16:09:59 -0400 |
commit | 3b2ecb5ff075f935c4cb3c6e731f0276ea747c55 (patch) | |
tree | 9d77d2b7b9a88cba9dc5075f581167e3eeda71d7 | |
parent | 5d113ff1d093af389113a6fbddfbc3c394cbb981 (diff) | |
download | mongo-3b2ecb5ff075f935c4cb3c6e731f0276ea747c55.tar.gz |
sm correctly read strings as utf8 even if JS_EncodeCharacters doesn't work as documented
-rw-r--r-- | dbtests/jstests.cpp | 17 | ||||
-rw-r--r-- | scripting/engine_spidermonkey.cpp | 7 |
2 files changed, 22 insertions, 2 deletions
diff --git a/dbtests/jstests.cpp b/dbtests/jstests.cpp index 49cac4acfaa..b6e2b3a3e8d 100644 --- a/dbtests/jstests.cpp +++ b/dbtests/jstests.cpp @@ -467,6 +467,22 @@ namespace JSTests { } static const char *ns() { return "unittest.jstests.utf8check"; } }; + + class LongUtf8String { + public: + LongUtf8String() { reset(); } + ~LongUtf8String() { reset(); } + void run() { + if( !globalScriptEngine->utf8Ok() ) + return; + client.eval( "unittest", "db.jstests.longutf8string.save( {_id:'\\uffff\uffff\uffff\uffff'} )" ); + } + private: + void reset() { + client.dropCollection( ns() ); + } + static const char *ns() { return "unittest.jstests.longutf8string"; } + }; class All : public Suite { public: @@ -484,6 +500,7 @@ namespace JSTests { add< TypeConservation >(); add< WeirdObjects >(); add< Utf8Check >(); + add< LongUtf8String >(); } }; diff --git a/scripting/engine_spidermonkey.cpp b/scripting/engine_spidermonkey.cpp index e947d05ea08..0f95a6817e2 100644 --- a/scripting/engine_spidermonkey.cpp +++ b/scripting/engine_spidermonkey.cpp @@ -86,10 +86,13 @@ namespace mongo { if( srclen == 0 ) return ""; - size_t len = srclen * 4; + size_t len = srclen * 6; // we only need *3, but see note on len below char * dst = (char*)malloc( len ); - len /= 2; // weird JS_EncodeCharacters api expects len in 16bit units but modifies it to represent size in 8bit units. + len /= 2; + // doc re weird JS_EncodeCharacters api claims len expected in 16bit + // units, but experiments suggest 8bit units expected. We allocate + // enough memory that either will work. assert( JS_EncodeCharacters( _context , s , srclen , dst , &len) ); |