summaryrefslogtreecommitdiff
path: root/src/mongo/db/hasher.h
diff options
context:
space:
mode:
authorKevin Matulef <matulef@gmail.com>2012-05-03 17:37:28 -0400
committerKevin Matulef <matulef@gmail.com>2012-05-09 10:30:23 -0400
commitea82f5b5be6f73634c73ce031434ae32c4c66c59 (patch)
treee689fa6caffa7b1b421066614d78d114a609a6bd /src/mongo/db/hasher.h
parentd07386582932b442c9bab58dec1e44d2b67c0d6b (diff)
downloadmongo-ea82f5b5be6f73634c73ce031434ae32c4c66c59.tar.gz
SERVER-2001 part 1: hashing BSONElements
Diffstat (limited to 'src/mongo/db/hasher.h')
-rw-r--r--src/mongo/db/hasher.h96
1 files changed, 96 insertions, 0 deletions
diff --git a/src/mongo/db/hasher.h b/src/mongo/db/hasher.h
new file mode 100644
index 00000000000..a303460d442
--- /dev/null
+++ b/src/mongo/db/hasher.h
@@ -0,0 +1,96 @@
+/* hasher.h
+ *
+ * Defines a simple hash function class
+ */
+
+
+/**
+* Copyright (C) 2012 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "pch.h"
+#include "mongo/bson/bsonelement.h"
+#include "mongo/util/md5.hpp"
+
+namespace mongo {
+
+ typedef int HashSeed;
+ typedef unsigned char HashDigest[16];
+
+ class Hasher : private boost::noncopyable {
+ public:
+
+ explicit Hasher( HashSeed seed );
+ ~Hasher() { };
+
+ //pointer to next part of input key, length in bytes to read
+ void addData( const void * keyData , size_t numBytes );
+
+ //finish computing the hash, put the result in the digest
+ //only call this once per Hasher
+ void finish( HashDigest out );
+
+ private:
+ md5_state_t _md5State;
+ HashSeed _seed;
+ };
+
+ class HasherFactory : private boost::noncopyable {
+ public:
+ /* Eventually this may be a more sophisticated factory
+ * for creating other hashers, but for now use MD5.
+ */
+ static Hasher* createHasher( HashSeed seed ) {
+ return new Hasher( seed );
+ }
+
+ private:
+ HasherFactory();
+ };
+
+ class BSONElementHasher : private boost::noncopyable {
+ public:
+ /* This computes a 64-bit hash of the value part of BSONElement "e",
+ * preceded by the seed "seed". Squashes element (and any sub-elements)
+ * of the same canonical type, so hash({a:{b:4}}) will be the same
+ * as hash({a:{b:4.1}}). In particular, this squashes doubles to 64-bit long
+ * ints via truncation, so floating point values round towards 0 to the
+ * nearest int representable as a 64-bit long.
+ *
+ * This function is used in the computation of hashed indexes
+ * and hashed shard keys, and thus should not be changed unless
+ * the associated "getKeys" and "makeSingleKey" method in the
+ * hashindex type is changed accordingly.
+ */
+ static long long int hash64( const BSONElement& e , HashSeed seed );
+
+ private:
+ BSONElementHasher();
+
+ /* This incrementally computes the hash of BSONElement "e"
+ * using hash function "h". If "includeFieldName" is true,
+ * then the name of the field is hashed in between the type of
+ * the element and the element value. The hash function "h"
+ * is applied recursively to any sub-elements (arrays/sub-documents),
+ * squashing elements of the same canonical type.
+ * Used as a helper for hash64 above.
+ */
+ static void recursiveHash( Hasher* h , const BSONElement& e , bool includeFieldName );
+
+ };
+
+}