summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron <aaron@10gen.com>2009-05-27 16:09:59 -0400
committerAaron <aaron@10gen.com>2009-05-27 16:09:59 -0400
commit3b2ecb5ff075f935c4cb3c6e731f0276ea747c55 (patch)
tree9d77d2b7b9a88cba9dc5075f581167e3eeda71d7
parent5d113ff1d093af389113a6fbddfbc3c394cbb981 (diff)
downloadmongo-3b2ecb5ff075f935c4cb3c6e731f0276ea747c55.tar.gz
sm correctly read strings as utf8 even if JS_EncodeCharacters doesn't work as documented
-rw-r--r--dbtests/jstests.cpp17
-rw-r--r--scripting/engine_spidermonkey.cpp7
2 files changed, 22 insertions, 2 deletions
diff --git a/dbtests/jstests.cpp b/dbtests/jstests.cpp
index 49cac4acfaa..b6e2b3a3e8d 100644
--- a/dbtests/jstests.cpp
+++ b/dbtests/jstests.cpp
@@ -467,6 +467,22 @@ namespace JSTests {
}
static const char *ns() { return "unittest.jstests.utf8check"; }
};
+
+ class LongUtf8String {
+ public:
+ LongUtf8String() { reset(); }
+ ~LongUtf8String() { reset(); }
+ void run() {
+ if( !globalScriptEngine->utf8Ok() )
+ return;
+ client.eval( "unittest", "db.jstests.longutf8string.save( {_id:'\\uffff\uffff\uffff\uffff'} )" );
+ }
+ private:
+ void reset() {
+ client.dropCollection( ns() );
+ }
+ static const char *ns() { return "unittest.jstests.longutf8string"; }
+ };
class All : public Suite {
public:
@@ -484,6 +500,7 @@ namespace JSTests {
add< TypeConservation >();
add< WeirdObjects >();
add< Utf8Check >();
+ add< LongUtf8String >();
}
};
diff --git a/scripting/engine_spidermonkey.cpp b/scripting/engine_spidermonkey.cpp
index e947d05ea08..0f95a6817e2 100644
--- a/scripting/engine_spidermonkey.cpp
+++ b/scripting/engine_spidermonkey.cpp
@@ -86,10 +86,13 @@ namespace mongo {
if( srclen == 0 )
return "";
- size_t len = srclen * 4;
+ size_t len = srclen * 6; // we only need *3, but see note on len below
char * dst = (char*)malloc( len );
- len /= 2; // weird JS_EncodeCharacters api expects len in 16bit units but modifies it to represent size in 8bit units.
+ len /= 2;
+ // doc re weird JS_EncodeCharacters api claims len expected in 16bit
+ // units, but experiments suggest 8bit units expected. We allocate
+ // enough memory that either will work.
assert( JS_EncodeCharacters( _context , s , srclen , dst , &len) );