summaryrefslogtreecommitdiff
path: root/src/mongo/db/hasher.cpp
diff options
context:
space:
mode:
authorKevin Matulef <matulef@gmail.com>2012-05-03 17:37:28 -0400
committerKevin Matulef <matulef@gmail.com>2012-05-09 10:30:23 -0400
commitea82f5b5be6f73634c73ce031434ae32c4c66c59 (patch)
treee689fa6caffa7b1b421066614d78d114a609a6bd /src/mongo/db/hasher.cpp
parentd07386582932b442c9bab58dec1e44d2b67c0d6b (diff)
downloadmongo-ea82f5b5be6f73634c73ce031434ae32c4c66c59.tar.gz
SERVER-2001 part 1: hashing BSONElements
Diffstat (limited to 'src/mongo/db/hasher.cpp')
-rw-r--r--src/mongo/db/hasher.cpp94
1 files changed, 94 insertions, 0 deletions
diff --git a/src/mongo/db/hasher.cpp b/src/mongo/db/hasher.cpp
new file mode 100644
index 00000000000..b6603523f5f
--- /dev/null
+++ b/src/mongo/db/hasher.cpp
@@ -0,0 +1,94 @@
+/* hasher.cpp
+ *
+ * Defines a simple hash function class
+ */
+
+
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "mongo/db/hasher.h"
+#include "mongo/db/jsobj.h"
+
+namespace mongo {
+
+ Hasher::Hasher( HashSeed seed ) : _seed( seed ) {
+ md5_init( &_md5State );
+ md5_append( &_md5State , reinterpret_cast< const md5_byte_t * >( & _seed ) , sizeof( _seed ) );
+ }
+
+ void Hasher::addData( const void * keyData , size_t numBytes ) {
+ md5_append( &_md5State , static_cast< const md5_byte_t * >( keyData ), numBytes );
+ }
+
+ void Hasher::finish( HashDigest out ) {
+ md5_finish( &_md5State , out );
+ }
+
+ long long int BSONElementHasher::hash64( const BSONElement& e , HashSeed seed ){
+ scoped_ptr<Hasher> h( HasherFactory::createHasher( seed ) );
+ recursiveHash( h.get() , e , false );
+ HashDigest d;
+ h->finish(d);
+ //HashDigest is actually 16 bytes, but we just get 8 via truncation
+ // NOTE: assumes little-endian
+ return *reinterpret_cast< long long int * >( d );
+ }
+
+ void BSONElementHasher::recursiveHash( Hasher* h ,
+ const BSONElement& e ,
+ bool includeFieldName ) {
+
+ int canonicalType = e.canonicalType();
+ h->addData( &canonicalType , sizeof( canonicalType ) );
+
+ if ( includeFieldName ){
+ h->addData( e.fieldName() , e.fieldNameSize() );
+ }
+
+ if ( !e.mayEncapsulate() ){
+ //if there are no embedded objects (subobjects or arrays),
+ //compute the hash, squashing numeric types to 64-bit ints
+ if ( e.isNumber() ){
+ long long int i = e.safeNumberLong(); //well-defined for troublesome doubles
+ h->addData( &i , sizeof( i ) );
+ }
+ else {
+ h->addData( e.value() , e.valuesize() );
+ }
+ }
+ else {
+ //else identify the subobject.
+ //hash any preceding stuff (in the case of codeWscope)
+ //then each sub-element
+ //then finish with the EOO element.
+ BSONObj b;
+ if ( e.type() == CodeWScope ) {
+ h->addData( e.codeWScopeCode() , e.codeWScopeCodeLen() );
+ b = e.codeWScopeObject();
+ }
+ else {
+ b = e.embeddedObject();
+ }
+ BSONObjIterator i(b);
+ while( i.moreWithEOO() ) {
+ BSONElement el = i.next();
+ recursiveHash( h , el , true );
+ }
+ }
+ }
+
+}