diff options
-rw-r--r-- | src/mongo/bson/bsonobjiterator.h | 3 | ||||
-rw-r--r-- | src/mongo/db/jsobj.cpp | 26 | ||||
-rw-r--r-- | src/mongo/db/ops/update.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/ops/update.h | 6 | ||||
-rw-r--r-- | src/mongo/dbtests/basictests.cpp | 174 | ||||
-rw-r--r-- | src/mongo/util/stringutils.h | 179 | ||||
-rw-r--r-- | src/mongo/util/version.cpp | 2 |
7 files changed, 207 insertions, 188 deletions
diff --git a/src/mongo/bson/bsonobjiterator.h b/src/mongo/bson/bsonobjiterator.h index e4dcca886ab..2809f5a175b 100644 --- a/src/mongo/bson/bsonobjiterator.h +++ b/src/mongo/bson/bsonobjiterator.h @@ -104,6 +104,9 @@ namespace mongo { } private: + + class ElementFieldCmp; + const char ** _fields; int _nfields; int _cur; diff --git a/src/mongo/db/jsobj.cpp b/src/mongo/db/jsobj.cpp index 1e850982396..6d47d01bd71 100644 --- a/src/mongo/db/jsobj.cpp +++ b/src/mongo/db/jsobj.cpp @@ -351,7 +351,7 @@ namespace mongo { const string& c = l.substr( lstart , lend - lstart ); const string& d = r.substr( rstart , rend - rstart ); - int x = lexNumCmp( c.c_str(), d.c_str() ); + int x = LexNumCmp::cmp( c.c_str(), d.c_str() ); if ( x < 0 ) return LEFT_BEFORE; @@ -1186,13 +1186,6 @@ namespace mongo { uassert( 14853 , "type not supported for appendMaxElementForType" , false ); } - int BSONElementFieldSorter( const void * a , const void * b ) { - const char * x = *((const char**)a); - const char * y = *((const char**)b); - x++; y++; - return lexNumCmp( x , y ); - } - bool fieldsMatch(const BSONObj& lhs, const BSONObj& rhs) { BSONObjIterator l(lhs); BSONObjIterator r(rhs); @@ -1206,6 +1199,20 @@ namespace mongo { return !(l.more() || r.more()); // false if lhs and rhs have diff nFields() } + /** Compare two bson elements, provided as const char *'s, by their field names. */ + class BSONObjIteratorSorted::ElementFieldCmp { + public: + bool operator()( const char *s1, const char *s2 ) const; + private: + LexNumCmp _cmp; + }; + + bool BSONObjIteratorSorted::ElementFieldCmp::operator()( const char *s1, const char *s2 ) + const { + // Skip type byte and compare field names. + return _cmp( s1 + 1, s2 + 1 ); + } + BSONObjIteratorSorted::BSONObjIteratorSorted( const BSONObj& o ) { _nfields = o.nFields(); _fields = new const char*[_nfields]; @@ -1216,7 +1223,8 @@ namespace mongo { assert( _fields[x-1] ); } assert( x == _nfields ); - qsort( _fields , _nfields , sizeof(char*) , BSONElementFieldSorter ); + ElementFieldCmp cmp; + std::sort( _fields , _fields + _nfields , cmp ); _cur = 0; } diff --git a/src/mongo/db/ops/update.cpp b/src/mongo/db/ops/update.cpp index 27e6ed50686..b2060268633 100644 --- a/src/mongo/db/ops/update.cpp +++ b/src/mongo/db/ops/update.cpp @@ -23,7 +23,6 @@ #include "../queryutil.h" #include "../repl.h" #include "../btree.h" -#include "../../util/stringutils.h" #include "update.h" #include "../pagefault.h" @@ -742,10 +741,6 @@ namespace mongo { return ss.str(); } - bool ModSetState::FieldCmp::operator()( const string &l, const string &r ) const { - return lexNumCmp( l.c_str(), r.c_str() ) < 0; - } - BSONObj ModSet::createNewFromQuery( const BSONObj& query ) { BSONObj newObj; diff --git a/src/mongo/db/ops/update.h b/src/mongo/db/ops/update.h index 816c36b4788..d64a3de65a7 100644 --- a/src/mongo/db/ops/update.h +++ b/src/mongo/db/ops/update.h @@ -19,6 +19,7 @@ #include "../../pch.h" #include "../jsobj.h" #include "../../util/embedded_builder.h" +#include "../../util/stringutils.h" #include "../matcher.h" namespace mongo { @@ -526,10 +527,7 @@ namespace mongo { * the goal is to make ModSet const so its re-usable */ class ModSetState : boost::noncopyable { - struct FieldCmp { - bool operator()( const string &l, const string &r ) const; - }; - typedef map<string,ModState,FieldCmp> ModStateHolder; + typedef map<string,ModState,LexNumCmp> ModStateHolder; const BSONObj& _obj; ModStateHolder _mods; bool _inPlacePossible; diff --git a/src/mongo/dbtests/basictests.cpp b/src/mongo/dbtests/basictests.cpp index 46a7dbc22bd..6046218202e 100644 --- a/src/mongo/dbtests/basictests.cpp +++ b/src/mongo/dbtests/basictests.cpp @@ -334,99 +334,105 @@ namespace BasicTests { class LexNumCmp { public: + static void assertCmp( int expected, const char *s1, const char *s2 ) { + mongo::LexNumCmp cmp; + ASSERT_EQUALS( expected, cmp.cmp( s1, s2 ) ); + ASSERT_EQUALS( expected < 0, cmp( s1, s2 ) ); + ASSERT_EQUALS( expected < 0, cmp( string( s1 ), string( s2 ) ) ); + } void run() { ASSERT( ! isNumber( (char)255 ) ); - ASSERT_EQUALS( 0, lexNumCmp( "a", "a" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "a", "aa" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "aa", "a" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "a", "b" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "100", "50" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "50", "100" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "b", "a" ) ); - ASSERT_EQUALS( 0, lexNumCmp( "aa", "aa" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "aa", "ab" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "ab", "aa" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "0", "a" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "a0", "aa" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "a", "0" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "aa", "a0" ) ); - ASSERT_EQUALS( 0, lexNumCmp( "0", "0" ) ); - ASSERT_EQUALS( 0, lexNumCmp( "10", "10" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "1", "10" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "10", "1" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "11", "10" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "10", "11" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "f11f", "f10f" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "f10f", "f11f" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "f11f", "f111" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "f111", "f11f" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "f12f", "f12g" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "f12g", "f12f" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "aa{", "aab" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "aa{", "aa1" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "a1{", "a11" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "a1{a", "a1{" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "a1{", "a1{a" ) ); - ASSERT_EQUALS( 1, lexNumCmp("21", "11") ); - ASSERT_EQUALS( -1, lexNumCmp("11", "21") ); - - ASSERT_EQUALS( -1 , lexNumCmp( "a.0" , "a.1" ) ); - ASSERT_EQUALS( -1 , lexNumCmp( "a.0.b" , "a.1" ) ); - - ASSERT_EQUALS( -1 , lexNumCmp( "b." , "b.|" ) ); - ASSERT_EQUALS( -1 , lexNumCmp( "b.0e" , (string("b.") + (char)255).c_str() ) ); - ASSERT_EQUALS( -1 , lexNumCmp( "b." , "b.0e" ) ); - - ASSERT_EQUALS( 0, lexNumCmp( "238947219478347782934718234", "238947219478347782934718234")); - ASSERT_EQUALS( 0, lexNumCmp( "000238947219478347782934718234", "238947219478347782934718234")); - ASSERT_EQUALS( 1, lexNumCmp( "000238947219478347782934718235", "238947219478347782934718234")); - ASSERT_EQUALS( -1, lexNumCmp( "238947219478347782934718234", "238947219478347782934718234.1")); - ASSERT_EQUALS( 0, lexNumCmp( "238", "000238")); - ASSERT_EQUALS( 0, lexNumCmp( "002384", "0002384")); - ASSERT_EQUALS( 0, lexNumCmp( "00002384", "0002384")); - ASSERT_EQUALS( 0, lexNumCmp( "0", "0")); - ASSERT_EQUALS( 0, lexNumCmp( "0000", "0")); - ASSERT_EQUALS( 0, lexNumCmp( "0", "000")); - ASSERT_EQUALS( -1, lexNumCmp( "0000", "0.0")); - ASSERT_EQUALS( 1, lexNumCmp( "2380", "238")); - ASSERT_EQUALS( 1, lexNumCmp( "2385", "2384")); - ASSERT_EQUALS( 1, lexNumCmp( "2385", "02384")); - ASSERT_EQUALS( 1, lexNumCmp( "2385", "002384")); - ASSERT_EQUALS( -1, lexNumCmp( "123.234.4567", "00238")); - ASSERT_EQUALS( 0, lexNumCmp( "123.234", "00123.234")); - ASSERT_EQUALS( 0, lexNumCmp( "a.123.b", "a.00123.b")); - ASSERT_EQUALS( 1, lexNumCmp( "a.123.b", "a.b.00123.b")); - ASSERT_EQUALS( -1, lexNumCmp( "a.00.0", "a.0.1")); - ASSERT_EQUALS( 0, lexNumCmp( "01.003.02", "1.3.2")); - ASSERT_EQUALS( -1, lexNumCmp( "1.3.2", "10.300.20")); - ASSERT_EQUALS( 0, lexNumCmp( "10.300.20", "000000000000010.0000300.000000020")); - ASSERT_EQUALS( 0, lexNumCmp( "0000a", "0a")); - ASSERT_EQUALS( -1, lexNumCmp( "a", "0a")); - ASSERT_EQUALS( -1, lexNumCmp( "000a", "001a")); - ASSERT_EQUALS( 0, lexNumCmp( "010a", "0010a")); + assertCmp( 0, "a", "a" ); + assertCmp( -1, "a", "aa" ); + assertCmp( 1, "aa", "a" ); + assertCmp( -1, "a", "b" ); + assertCmp( 1, "100", "50" ); + assertCmp( -1, "50", "100" ); + assertCmp( 1, "b", "a" ); + assertCmp( 0, "aa", "aa" ); + assertCmp( -1, "aa", "ab" ); + assertCmp( 1, "ab", "aa" ); + assertCmp( 1, "0", "a" ); + assertCmp( 1, "a0", "aa" ); + assertCmp( -1, "a", "0" ); + assertCmp( -1, "aa", "a0" ); + assertCmp( 0, "0", "0" ); + assertCmp( 0, "10", "10" ); + assertCmp( -1, "1", "10" ); + assertCmp( 1, "10", "1" ); + assertCmp( 1, "11", "10" ); + assertCmp( -1, "10", "11" ); + assertCmp( 1, "f11f", "f10f" ); + assertCmp( -1, "f10f", "f11f" ); + assertCmp( -1, "f11f", "f111" ); + assertCmp( 1, "f111", "f11f" ); + assertCmp( -1, "f12f", "f12g" ); + assertCmp( 1, "f12g", "f12f" ); + assertCmp( 1, "aa{", "aab" ); + assertCmp( -1, "aa{", "aa1" ); + assertCmp( -1, "a1{", "a11" ); + assertCmp( 1, "a1{a", "a1{" ); + assertCmp( -1, "a1{", "a1{a" ); + assertCmp( 1, "21", "11" ); + assertCmp( -1, "11", "21" ); + + assertCmp( -1 , "a.0" , "a.1" ); + assertCmp( -1 , "a.0.b" , "a.1" ); + + assertCmp( -1 , "b." , "b.|" ); + assertCmp( -1 , "b.0e" , (string("b.") + (char)255).c_str() ); + assertCmp( -1 , "b." , "b.0e" ); + + assertCmp( 0, "238947219478347782934718234", "238947219478347782934718234"); + assertCmp( 0, "000238947219478347782934718234", "238947219478347782934718234"); + assertCmp( 1, "000238947219478347782934718235", "238947219478347782934718234"); + assertCmp( -1, "238947219478347782934718234", "238947219478347782934718234.1"); + assertCmp( 0, "238", "000238"); + assertCmp( 0, "002384", "0002384"); + assertCmp( 0, "00002384", "0002384"); + assertCmp( 0, "0", "0"); + assertCmp( 0, "0000", "0"); + assertCmp( 0, "0", "000"); + assertCmp( -1, "0000", "0.0"); + assertCmp( 1, "2380", "238"); + assertCmp( 1, "2385", "2384"); + assertCmp( 1, "2385", "02384"); + assertCmp( 1, "2385", "002384"); + assertCmp( -1, "123.234.4567", "00238"); + assertCmp( 0, "123.234", "00123.234"); + assertCmp( 0, "a.123.b", "a.00123.b"); + assertCmp( 1, "a.123.b", "a.b.00123.b"); + assertCmp( -1, "a.00.0", "a.0.1"); + assertCmp( 0, "01.003.02", "1.3.2"); + assertCmp( -1, "1.3.2", "10.300.20"); + assertCmp( 0, "10.300.20", "000000000000010.0000300.000000020"); + assertCmp( 0, "0000a", "0a"); + assertCmp( -1, "a", "0a"); + assertCmp( -1, "000a", "001a"); + assertCmp( 0, "010a", "0010a"); - ASSERT_EQUALS( -1 , lexNumCmp( "a0" , "a00" ) ); - ASSERT_EQUALS( 0 , lexNumCmp( "a.0" , "a.00" ) ); - ASSERT_EQUALS( -1 , lexNumCmp( "a.b.c.d0" , "a.b.c.d00" ) ); - ASSERT_EQUALS( 1 , lexNumCmp( "a.b.c.0.y" , "a.b.c.00.x" ) ); + assertCmp( -1 , "a0" , "a00" ); + assertCmp( 0 , "a.0" , "a.00" ); + assertCmp( -1 , "a.b.c.d0" , "a.b.c.d00" ); + assertCmp( 1 , "a.b.c.0.y" , "a.b.c.00.x" ); - ASSERT_EQUALS( -1, lexNumCmp( "a", "a-" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "a-", "a" ) ); - ASSERT_EQUALS( 0, lexNumCmp( "a-", "a-" ) ); + assertCmp( -1, "a", "a-" ); + assertCmp( 1, "a-", "a" ); + assertCmp( 0, "a-", "a-" ); - ASSERT_EQUALS( -1, lexNumCmp( "a", "a-c" ) ); - ASSERT_EQUALS( 1, lexNumCmp( "a-c", "a" ) ); - ASSERT_EQUALS( 0, lexNumCmp( "a-c", "a-c" ) ); + assertCmp( -1, "a", "a-c" ); + assertCmp( 1, "a-c", "a" ); + assertCmp( 0, "a-c", "a-c" ); - ASSERT_EQUALS( 1, lexNumCmp( "a-c.t", "a.t" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "a.t", "a-c.t" ) ); - ASSERT_EQUALS( 0, lexNumCmp( "a-c.t", "a-c.t" ) ); + assertCmp( 1, "a-c.t", "a.t" ); + assertCmp( -1, "a.t", "a-c.t" ); + assertCmp( 0, "a-c.t", "a-c.t" ); - ASSERT_EQUALS( 1, lexNumCmp( "ac.t", "a.t" ) ); - ASSERT_EQUALS( -1, lexNumCmp( "a.t", "ac.t" ) ); - ASSERT_EQUALS( 0, lexNumCmp( "ac.t", "ac.t" ) ); + assertCmp( 1, "ac.t", "a.t" ); + assertCmp( -1, "a.t", "ac.t" ); + assertCmp( 0, "ac.t", "ac.t" ); } }; diff --git a/src/mongo/util/stringutils.h b/src/mongo/util/stringutils.h index 93598aa520b..d891040ec9c 100644 --- a/src/mongo/util/stringutils.h +++ b/src/mongo/util/stringutils.h @@ -40,100 +40,109 @@ namespace mongo { return string(copy); } - /** - * Non numeric characters are compared lexicographically; numeric substrings - * are compared numerically; dots separate ordered comparable subunits. - * For convenience, character 255 is greater than anything else. - */ - inline int lexNumCmp( const char *s1, const char *s2 ) { - //cout << "START : " << s1 << "\t" << s2 << endl; - - bool startWord = true; - - while( *s1 && *s2 ) { - - bool d1 = ( *s1 == '.' ); - bool d2 = ( *s2 == '.' ); - if ( d1 && !d2 ) - return -1; - if ( d2 && !d1 ) - return 1; - if ( d1 && d2 ) { - ++s1; ++s2; - startWord = true; - continue; - } + class LexNumCmp { + public: + /** + * Non numeric characters are compared lexicographically; numeric substrings + * are compared numerically; dots separate ordered comparable subunits. + * For convenience, character 255 is greater than anything else. + */ + static int cmp( const char *s1, const char *s2 ) { + //cout << "START : " << s1 << "\t" << s2 << endl; - bool p1 = ( *s1 == (char)255 ); - bool p2 = ( *s2 == (char)255 ); - //cout << "\t\t " << p1 << "\t" << p2 << endl; - if ( p1 && !p2 ) - return 1; - if ( p2 && !p1 ) - return -1; - - bool n1 = isNumber( *s1 ); - bool n2 = isNumber( *s2 ); - - if ( n1 && n2 ) { - // get rid of leading 0s - if ( startWord ) { - while ( *s1 == '0' ) s1++; - while ( *s2 == '0' ) s2++; - } - - char * e1 = (char*)s1; - char * e2 = (char*)s2; - - // find length - // if end of string, will break immediately ('\0') - while ( isNumber (*e1) ) e1++; - while ( isNumber (*e2) ) e2++; - - int len1 = (int)(e1-s1); - int len2 = (int)(e2-s2); - - int result; - // if one is longer than the other, return - if ( len1 > len2 ) { + bool startWord = true; + + while( *s1 && *s2 ) { + + bool d1 = ( *s1 == '.' ); + bool d2 = ( *s2 == '.' ); + if ( d1 && !d2 ) + return -1; + if ( d2 && !d1 ) return 1; + if ( d1 && d2 ) { + ++s1; ++s2; + startWord = true; + continue; } - else if ( len2 > len1 ) { + + bool p1 = ( *s1 == (char)255 ); + bool p2 = ( *s2 == (char)255 ); + //cout << "\t\t " << p1 << "\t" << p2 << endl; + if ( p1 && !p2 ) + return 1; + if ( p2 && !p1 ) return -1; + + bool n1 = isNumber( *s1 ); + bool n2 = isNumber( *s2 ); + + if ( n1 && n2 ) { + // get rid of leading 0s + if ( startWord ) { + while ( *s1 == '0' ) s1++; + while ( *s2 == '0' ) s2++; + } + + char * e1 = (char*)s1; + char * e2 = (char*)s2; + + // find length + // if end of string, will break immediately ('\0') + while ( isNumber (*e1) ) e1++; + while ( isNumber (*e2) ) e2++; + + int len1 = (int)(e1-s1); + int len2 = (int)(e2-s2); + + int result; + // if one is longer than the other, return + if ( len1 > len2 ) { + return 1; + } + else if ( len2 > len1 ) { + return -1; + } + // if the lengths are equal, just strcmp + else if ( (result = strncmp(s1, s2, len1)) != 0 ) { + return result; + } + + // otherwise, the numbers are equal + s1 = e1; + s2 = e2; + startWord = false; + continue; } - // if the lengths are equal, just strcmp - else if ( (result = strncmp(s1, s2, len1)) != 0 ) { - return result; - } - - // otherwise, the numbers are equal - s1 = e1; - s2 = e2; + + if ( n1 ) + return 1; + + if ( n2 ) + return -1; + + if ( *s1 > *s2 ) + return 1; + + if ( *s2 > *s1 ) + return -1; + + s1++; s2++; startWord = false; - continue; } - if ( n1 ) - return 1; - - if ( n2 ) - return -1; - - if ( *s1 > *s2 ) + if ( *s1 ) return 1; - - if ( *s2 > *s1 ) + if ( *s2 ) return -1; - - s1++; s2++; - startWord = false; + return 0; } - - if ( *s1 ) - return 1; - if ( *s2 ) - return -1; - return 0; - } - + bool operator()( const char *s1, const char *s2 ) const { + return cmp( s1, s2 ) < 0; + } + bool operator()( const string &s1, const string &s2 ) const { + return (*this)( s1.c_str(), s2.c_str() ); + } + }; + } // namespace mongo diff --git a/src/mongo/util/version.cpp b/src/mongo/util/version.cpp index f189d5dca64..1c8dbc9a2fe 100644 --- a/src/mongo/util/version.cpp +++ b/src/mongo/util/version.cpp @@ -252,7 +252,7 @@ namespace mongo { return -1; } - return lexNumCmp(rhs.data(), lhs.data()); + return LexNumCmp::cmp(rhs.data(), lhs.data()); } class VersionCmpTest : public UnitTest { |