diff options
-rw-r--r-- | src/mongo/SConscript | 6 | ||||
-rw-r--r-- | src/mongo/util/string_map.h | 52 | ||||
-rw-r--r-- | src/mongo/util/string_map_internal.h | 49 | ||||
-rw-r--r-- | src/mongo/util/string_map_test.cpp | 139 | ||||
-rw-r--r-- | src/mongo/util/unordered_fast_key_table.h | 140 | ||||
-rw-r--r-- | src/mongo/util/unordered_fast_key_table_internal.h | 127 |
6 files changed, 512 insertions, 1 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 91d3155f61d..ce77c654dc9 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -32,6 +32,7 @@ env.StaticLibrary('foundation', 'util/signal_handlers.cpp', 'util/text.cpp', 'util/time_support.cpp', + 'util/timer.cpp' ], LIBDEPS=['stacktrace', '$BUILD_DIR/mongo/base/base', @@ -76,6 +77,10 @@ env.CppUnitTest('mutable_bson_builder_test', ['bson/mutable/mutable_bson_builder env.CppUnitTest('safe_num_test', ['util/safe_num_test.cpp'], LIBDEPS=['bson']) +env.CppUnitTest('string_map_test', ['util/string_map_test.cpp'], + LIBDEPS=['bson','foundation']) + + env.CppUnitTest('bson_field_test', ['bson/bson_field_test.cpp'], LIBDEPS=['bson']) @@ -95,7 +100,6 @@ commonFiles = [ "pch.cpp", "shell/mongo.cpp", "util/background.cpp", "util/intrusive_counter.cpp", - "util/timer.cpp", "util/util.cpp", "util/file_allocator.cpp", "util/trace.cpp", diff --git a/src/mongo/util/string_map.h b/src/mongo/util/string_map.h new file mode 100644 index 00000000000..e2fd7e01bc7 --- /dev/null +++ b/src/mongo/util/string_map.h @@ -0,0 +1,52 @@ +// string_map.h + +/* Copyright 2012 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <string> +#include "mongo/util/unordered_fast_key_table.h" + +namespace mongo { + + struct StringMapDefaultHash { + size_t operator()( const char* k ) const; + }; + + struct StringMapDefaultEqual { + bool operator()( const char* a, const char* b ) const { + return strcmp( a,b ) == 0; + } + }; + + struct StringMapDefaultConvertor { + const char* operator()( const std::string& s ) const { + return s.c_str(); + } + }; + + template< typename V > + class StringMap : public UnorderedFastKeyTable< const char*, // K_L + std::string, // K_S + V, // V + StringMapDefaultHash, + StringMapDefaultEqual, + StringMapDefaultConvertor > { + }; +} + +#include "mongo/util/string_map_internal.h" + diff --git a/src/mongo/util/string_map_internal.h b/src/mongo/util/string_map_internal.h new file mode 100644 index 00000000000..58d84436046 --- /dev/null +++ b/src/mongo/util/string_map_internal.h @@ -0,0 +1,49 @@ +// string_map_internal.h + + +/* Copyright 2012 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace mongo { + + size_t StringMapDefaultHash::operator()( const char* origKey ) const { + const char* key = origKey; + size_t hash = 7; + while ( *key ) { + hash += ( 517 * static_cast<int>(*key) ); + hash *= 13; + key++; + } + if ( hash == 0 ) + hash = -1; + return hash; + } + + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + typename UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::const_iterator UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::find( const K_L& key ) const { + if ( _size == 0 ) + return const_iterator(); + int pos = _area.find( key, _hash(key), 0, *this ); + if ( pos < 0 ) + return const_iterator(); + return const_iterator( &_area._entries[pos] ); + } + + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + typename UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::const_iterator UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::end() const { + return const_iterator(); + } + +} diff --git a/src/mongo/util/string_map_test.cpp b/src/mongo/util/string_map_test.cpp new file mode 100644 index 00000000000..20fa9e67ec3 --- /dev/null +++ b/src/mongo/util/string_map_test.cpp @@ -0,0 +1,139 @@ +// string_map_test.cpp + +/* Copyright 2012 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mongo/unittest/unittest.h" + +#include "mongo/platform/random.h" +#include "mongo/platform/unordered_map.h" +#include "mongo/util/string_map.h" +#include "mongo/util/log.h" +#include "mongo/util/timer.h" + +namespace { + using namespace mongo; + + TEST(StringMapTest, Hash1) { + StringMapDefaultHash h; + ASSERT_EQUALS( h(""), h("") ); + ASSERT_EQUALS( h("a"), h("a") ); + ASSERT_EQUALS( h("abc"), h("abc") ); + + ASSERT_NOT_EQUALS( h(""), h("a") ); + ASSERT_NOT_EQUALS( h("a"), h("ab") ); + + ASSERT_NOT_EQUALS( h("foo28"), h("foo35") ); + } + +#define equalsBothWays(a,b) \ + ASSERT_TRUE( e( (a), (b) ) ); \ + ASSERT_TRUE( e( (b), (a) ) ); + +#define notEqualsBothWays(a,b) \ + ASSERT_FALSE( e( (a), (b) ) ); \ + ASSERT_FALSE( e( (b), (a) ) ); + + TEST(StringMapTest, Equals1 ){ + StringMapDefaultEqual e; + + equalsBothWays( "", "" ); + equalsBothWays( "a", "a" ); + equalsBothWays( "bbbbb", "bbbbb" ); + + notEqualsBothWays( "", "a" ); + notEqualsBothWays( "a", "b" ); + notEqualsBothWays( "abc", "def" ); + notEqualsBothWays( "abc", "defasdasd" ); + } + + TEST(StringMapTest, Basic1) { + StringMap<int> m; + ASSERT_EQUALS( 0U, m.size() ); + m["eliot"] = 5; + ASSERT_EQUALS( 5, m["eliot"] ); + ASSERT_EQUALS( 1U, m.size() ); + } + + TEST(StringMapTest, Big1) { + StringMap<int> m; + char buf[64]; + + for ( int i=0; i<10000; i++ ) { + sprintf( buf, "foo%d", i ); + m[buf] = i; + } + + for ( int i=0; i<10000; i++ ) { + sprintf( buf, "foo%d", i ); + ASSERT_EQUALS( m[buf], i ); + } + } + + TEST(StringMapTest, find1 ) { + StringMap<int> m; + + ASSERT_TRUE( m.end() == m.find( "foo" ) ); + + m["foo"] = 5; + StringMap<int>::const_iterator i = m.find( "foo" ); + ASSERT_TRUE( i != m.end() ); + ASSERT_EQUALS( 5, i->second ); + ASSERT_EQUALS( "foo", i->first ); + i++; + ASSERT_TRUE( i == m.end() ); + } + + + template<typename M> + unsigned long long test_perf( M& m ) { + Timer t; + + PseudoRandom r(17); + char buf[64]; + for ( int i = 0; i< 100000; i++ ) { + sprintf( buf, "%dfoo%d", r.nextInt32(), r.nextInt32() ); + m[buf] = i; + ASSERT_EQUALS( i, m[buf] ); + } + + return t.micros(); + } + + TEST( StringMapTest, perf1 ) { + unsigned long long standard = 0; + unsigned long long unordered = 0; + unsigned long long custom = 0; + for ( int i = 0; i < 5; i++ ) { + { + std::map<string,int> m; + standard += test_perf( m ); + } + { + unordered_map<string,int> m; + unordered += test_perf( m ); + } + { + StringMap<int> m; + custom += test_perf( m ); + } + } + + log() << "std::map :\t" << standard << std::endl; + log() << "unordered:\t" << unordered << std::endl; + log() << "StringMap:\t" << custom << std::endl; + ASSERT_LESS_THAN( custom, standard ); + } +} diff --git a/src/mongo/util/unordered_fast_key_table.h b/src/mongo/util/unordered_fast_key_table.h new file mode 100644 index 00000000000..d914c6cd59c --- /dev/null +++ b/src/mongo/util/unordered_fast_key_table.h @@ -0,0 +1,140 @@ +// unordered_fast_key_table.h + +/* Copyright 2012 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <boost/smart_ptr/scoped_array.hpp> + +#include "mongo/base/disallow_copying.h" + +namespace mongo { + + template< typename K_L, // key lookup + typename K_S, // key storage + typename V, // value + typename H , // hash of K_L + typename E, // equal of K_L + typename C // convertor from K_S -> K_L + > + class UnorderedFastKeyTable { + MONGO_DISALLOW_COPYING(UnorderedFastKeyTable); + public: + typedef std::pair<K_S, V> value_type; + typedef V mapped_type; + + private: + struct Entry { + Entry() + : used( false ), everUsed( false ) { + } + + bool used; + bool everUsed; + size_t curHash; + value_type data; + }; + + struct Area { + Area( unsigned capacity, double maxProbeRatio ); + + int find( const K_L& key, size_t hash, int* firstEmpty, const UnorderedFastKeyTable& sm ) const; + + void transfer( Area* newArea, const UnorderedFastKeyTable& sm ) const; + + void swap( Area* other ) { + std::swap( _capacity, other->_capacity ); + std::swap( _maxProbe, other->_maxProbe ); + _entries.swap( other->_entries ); + } + + unsigned _capacity; + unsigned _maxProbe; + boost::scoped_array<Entry> _entries; + }; + + public: + static const unsigned DEFAULT_STARTING_CAPACITY = 20; + static const double DEFAULT_MAX_PROBE_RATIO = 0.05; + + UnorderedFastKeyTable( unsigned startingCapacity = DEFAULT_STARTING_CAPACITY, + double maxProbeRatio = DEFAULT_MAX_PROBE_RATIO ); + + /** + * @return number of elements in map + */ + size_t size() const { return _size; } + + bool empty() const { return _size == 0; } + + /* + * @return storage space + */ + size_t capacity() const { return _area._capacity; } + + V& operator[]( const K_L& key ) { return get( key ); } + + V& get( const K_L& key ); + + class const_iterator { + public: + const_iterator() { _theEntry = NULL; } + const_iterator( const Entry* entry ) { _theEntry = entry; } + + const value_type* operator->() const { return &_theEntry->data; } + + const_iterator operator++( int n ) { _theEntry = NULL; return *this; } + + bool operator==( const const_iterator& other ) const { + return _theEntry == other._theEntry; + } + bool operator!=( const const_iterator& other ) const { + return _theEntry != other._theEntry; + } + + private: + const Entry* _theEntry; + }; + + const_iterator find( const K_L& key ) const; + + const_iterator end() const; + + private: + /* + * @param firstEmpty, if we return -1, and firstEmpty != NULL, + * this will be set to the first empty bucket we found + * @retrun offset into _entries or -1 if not there + */ + int _find( const K_L& key, int hash, int* firstEmpty ) const; + + void _grow(); + + // ---- + + size_t _size; + const double _maxProbeRatio; + Area _area; + + H _hash; + E _equals; + C _convertor; + }; + +} + +#include "mongo/util/unordered_fast_key_table_internal.h" + diff --git a/src/mongo/util/unordered_fast_key_table_internal.h b/src/mongo/util/unordered_fast_key_table_internal.h new file mode 100644 index 00000000000..78f1e2f4a7d --- /dev/null +++ b/src/mongo/util/unordered_fast_key_table_internal.h @@ -0,0 +1,127 @@ +// unordered_fast_key_table_internal.h + + +/* Copyright 2012 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace mongo { + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::Area::Area( unsigned capacity, double maxProbeRatio ) + : _capacity( capacity ), + _maxProbe( static_cast<unsigned>( capacity * maxProbeRatio ) ), + _entries( new Entry[_capacity] ) { + } + + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + int UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::Area::find( const K_L& key, + size_t hash, + int* firstEmpty, + const UnorderedFastKeyTable& sm ) const { + if ( firstEmpty ) + *firstEmpty = -1; + + for ( unsigned probe = 0; probe < _maxProbe; probe++ ) { + unsigned pos = abs( hash + probe ) % _capacity; + + if ( ! _entries[pos].used ) { + // space is empty + if ( firstEmpty && *firstEmpty == -1 ) + *firstEmpty = pos; + if ( ! _entries[pos].everUsed ) + return -1; + continue; + } + + if ( _entries[pos].curHash != hash ) { + // space has something else + continue; + } + + if ( ! sm._equals(key, sm._convertor( _entries[pos].data.first ) ) ) { + // hashes match + // strings are not equals + continue; + } + + // hashes and strings are equal + // yay! + return pos; + } + return -1; + } + + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + void UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::Area::transfer( Area* newArea, + const UnorderedFastKeyTable& sm ) const { + for ( unsigned i = 0; i < _capacity; i++ ) { + if ( ! _entries[i].used ) + continue; + + int firstEmpty = -1; + int loc = newArea->find( sm._convertor( _entries[i].data.first ), + _entries[i].curHash, + &firstEmpty, + sm ); + + verify( loc == -1 ); + verify( firstEmpty >= 0 ); + + newArea->_entries[firstEmpty] = _entries[i]; + } + } + + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::UnorderedFastKeyTable( unsigned startingCapacity, + double maxProbeRatio ) + : _maxProbeRatio( maxProbeRatio ), _area( startingCapacity, maxProbeRatio ) { + _size = 0; + } + + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + V& UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::get( const K_L& key ) { + + const size_t hash = _hash( key ); + + for ( int numGrowTries = 0; numGrowTries < 10; numGrowTries++ ) { + int firstEmpty = -1; + int pos = _area.find( key, hash, &firstEmpty, *this ); + if ( pos >= 0 ) + return _area._entries[pos].data.second; + + // key not in map + // need to add + if ( firstEmpty >= 0 ) { + _size++; + _area._entries[firstEmpty].used = true; + _area._entries[firstEmpty].everUsed = true; + _area._entries[firstEmpty].curHash = hash; + _area._entries[firstEmpty].data.first = key; + return _area._entries[firstEmpty].data.second; + } + + // no space left in map + _grow(); + } + msgasserted( 16471, "UnorderedFastKeyTable couldn't add entry after growing many times" ); + } + + template< typename K_L, typename K_S, typename V, typename H, typename E, typename C > + void UnorderedFastKeyTable<K_L, K_S, V, H, E, C>::_grow() { + Area newArea( _area._capacity * 2, _maxProbeRatio ); + _area.transfer( &newArea, *this ); + _area.swap( &newArea ); + } + +} |