SERVER-2001 part 1: hashing BSONElements

author: Kevin Matulef <matulef@gmail.com> 2012-05-03 17:37:28 -0400
committer: Kevin Matulef <matulef@gmail.com> 2012-05-09 10:30:23 -0400
commit: ea82f5b5be6f73634c73ce031434ae32c4c66c59 (patch)
tree: e689fa6caffa7b1b421066614d78d114a609a6bd /src/mongo/db/hasher.cpp
parent: d07386582932b442c9bab58dec1e44d2b67c0d6b (diff)
download: mongo-ea82f5b5be6f73634c73ce031434ae32c4c66c59.tar.gz
1 files changed, 94 insertions, 0 deletions
diff --git a/src/mongo/db/hasher.cpp b/src/mongo/db/hasher.cpp
new file mode 100644
index 00000000000..b6603523f5f
--- /dev/null
+++ b/src/mongo/db/hasher.cpp
@@ -0,0 +1,94 @@
+/* hasher.cpp
+ *
+ * Defines a simple hash function class
+ */
+
+
+/**
+*    Copyright (C) 2012 10gen Inc.
+*
+*    This program is free software: you can redistribute it and/or  modify
+*    it under the terms of the GNU Affero General Public License, version 3,
+*    as published by the Free Software Foundation.
+*
+*    This program is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+*    GNU Affero General Public License for more details.
+*
+*    You should have received a copy of the GNU Affero General Public License
+*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "mongo/db/hasher.h"
+#include "mongo/db/jsobj.h"
+
+namespace mongo {
+
+    Hasher::Hasher( HashSeed seed ) : _seed( seed ) {
+        md5_init( &_md5State );
+        md5_append( &_md5State , reinterpret_cast< const md5_byte_t * >( & _seed ) , sizeof( _seed ) );
+    }
+
+    void Hasher::addData( const void * keyData , size_t numBytes ) {
+        md5_append( &_md5State , static_cast< const md5_byte_t * >( keyData ), numBytes );
+    }
+
+    void Hasher::finish( HashDigest out ) {
+        md5_finish( &_md5State , out );
+    }
+
+    long long int BSONElementHasher::hash64( const BSONElement& e , HashSeed seed ){
+        scoped_ptr<Hasher> h( HasherFactory::createHasher( seed ) );
+        recursiveHash( h.get() , e , false );
+        HashDigest d;
+        h->finish(d);
+        //HashDigest is actually 16 bytes, but we just get 8 via truncation
+        // NOTE: assumes little-endian
+        return *reinterpret_cast< long long int * >( d );
+    }
+
+    void BSONElementHasher::recursiveHash( Hasher* h ,
+                                           const BSONElement& e ,
+                                           bool includeFieldName ) {
+
+        int canonicalType = e.canonicalType();
+        h->addData( &canonicalType , sizeof( canonicalType ) );
+
+        if ( includeFieldName ){
+            h->addData( e.fieldName() , e.fieldNameSize() );
+        }
+
+        if ( !e.mayEncapsulate() ){
+            //if there are no embedded objects (subobjects or arrays),
+            //compute the hash, squashing numeric types to 64-bit ints
+            if ( e.isNumber() ){
+                long long int i = e.safeNumberLong(); //well-defined for troublesome doubles
+                h->addData( &i , sizeof( i ) );
+            }
+            else {
+                h->addData( e.value() , e.valuesize() );
+            }
+        }
+        else {
+            //else identify the subobject.
+            //hash any preceding stuff (in the case of codeWscope)
+            //then each sub-element
+            //then finish with the EOO element.
+            BSONObj b;
+            if ( e.type() == CodeWScope ) {
+                h->addData( e.codeWScopeCode() , e.codeWScopeCodeLen() );
+                b = e.codeWScopeObject();
+            }
+            else {
+                b = e.embeddedObject();
+            }
+            BSONObjIterator i(b);
+            while( i.moreWithEOO() ) {
+                BSONElement el = i.next();
+                recursiveHash( h , el ,  true );
+            }
+        }
+    }
+
+}
author	Kevin Matulef <matulef@gmail.com>	2012-05-03 17:37:28 -0400
committer	Kevin Matulef <matulef@gmail.com>	2012-05-09 10:30:23 -0400
commit	ea82f5b5be6f73634c73ce031434ae32c4c66c59 (patch)
tree	e689fa6caffa7b1b421066614d78d114a609a6bd /src/mongo/db/hasher.cpp
parent	d07386582932b442c9bab58dec1e44d2b67c0d6b (diff)
download	mongo-ea82f5b5be6f73634c73ce031434ae32c4c66c59.tar.gz