diff options
Diffstat (limited to 'src/mongo/util/stringutils.h')
-rw-r--r-- | src/mongo/util/stringutils.h | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/src/mongo/util/stringutils.h b/src/mongo/util/stringutils.h new file mode 100644 index 00000000000..93598aa520b --- /dev/null +++ b/src/mongo/util/stringutils.h @@ -0,0 +1,139 @@ +// stringutils.h + +/* Copyright 2010 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace mongo { + + // see also mongoutils/str.h - perhaps move these there? + // see also text.h + + void splitStringDelim( const string& str , vector<string>* res , char delim ); + + void joinStringDelim( const vector<string>& strs , string* res , char delim ); + + inline string tolowerString( const string& input ) { + string::size_type sz = input.size(); + + boost::scoped_array<char> line(new char[sz+1]); + char * copy = line.get(); + + for ( string::size_type i=0; i<sz; i++ ) { + char c = input[i]; + copy[i] = (char)tolower( (int)c ); + } + copy[sz] = 0; + return string(copy); + } + + /** + * Non numeric characters are compared lexicographically; numeric substrings + * are compared numerically; dots separate ordered comparable subunits. + * For convenience, character 255 is greater than anything else. + */ + inline int lexNumCmp( const char *s1, const char *s2 ) { + //cout << "START : " << s1 << "\t" << s2 << endl; + + bool startWord = true; + + while( *s1 && *s2 ) { + + bool d1 = ( *s1 == '.' ); + bool d2 = ( *s2 == '.' ); + if ( d1 && !d2 ) + return -1; + if ( d2 && !d1 ) + return 1; + if ( d1 && d2 ) { + ++s1; ++s2; + startWord = true; + continue; + } + + bool p1 = ( *s1 == (char)255 ); + bool p2 = ( *s2 == (char)255 ); + //cout << "\t\t " << p1 << "\t" << p2 << endl; + if ( p1 && !p2 ) + return 1; + if ( p2 && !p1 ) + return -1; + + bool n1 = isNumber( *s1 ); + bool n2 = isNumber( *s2 ); + + if ( n1 && n2 ) { + // get rid of leading 0s + if ( startWord ) { + while ( *s1 == '0' ) s1++; + while ( *s2 == '0' ) s2++; + } + + char * e1 = (char*)s1; + char * e2 = (char*)s2; + + // find length + // if end of string, will break immediately ('\0') + while ( isNumber (*e1) ) e1++; + while ( isNumber (*e2) ) e2++; + + int len1 = (int)(e1-s1); + int len2 = (int)(e2-s2); + + int result; + // if one is longer than the other, return + if ( len1 > len2 ) { + return 1; + } + else if ( len2 > len1 ) { + return -1; + } + // if the lengths are equal, just strcmp + else if ( (result = strncmp(s1, s2, len1)) != 0 ) { + return result; + } + + // otherwise, the numbers are equal + s1 = e1; + s2 = e2; + startWord = false; + continue; + } + + if ( n1 ) + return 1; + + if ( n2 ) + return -1; + + if ( *s1 > *s2 ) + return 1; + + if ( *s2 > *s1 ) + return -1; + + s1++; s2++; + startWord = false; + } + + if ( *s1 ) + return 1; + if ( *s2 ) + return -1; + return 0; + } + +} // namespace mongo |