summaryrefslogtreecommitdiff
path: root/src/mongo/db/hashindex.h
blob: 51bf82e1c0f7cdf6dce3ecfd52c404a490a55ff3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// hashindex.h

/**
*    Copyright (C) 2012 10gen Inc.
*
*    This program is free software: you can redistribute it and/or  modify
*    it under the terms of the GNU Affero General Public License, version 3,
*    as published by the Free Software Foundation.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU Affero General Public License for more details.
*
*    You should have received a copy of the GNU Affero General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "mongo/db/hasher.h"
#include "mongo/db/index.h"
#include "mongo/db/keypattern.h"
#include "mongo/db/matcher.h"
#include "mongo/db/namespace-inl.h"
#include "mongo/db/pdfile.h"

namespace mongo {

    /* This is an index where the keys are hashes of a given field.
     *
     * Optional arguments:
     *  "seed" : int (default = 0, a seed for the hash function)
     *  "hashVersion : int (default = 0, determines which hash function to use)
     *
     * Example use in the mongo shell:
     * > db.foo.ensureIndex({a : "hashed"}, {seed : 3, hashVersion : 0})
     *
     * LIMITATION: Only works with a single field. The HashedIndexType
     * constructor uses uassert to ensure that the spec has the form
     * {<fieldname> : "hashed"}, and not, for example,
     * { a : "hashed" , b : 1}
     *
     * LIMITATION: Cannot be used as a unique index.
     * The HashedIndexType constructor uses uassert to ensure that
     * the spec does not contain {"unique" : true}
     *
     * LIMITATION: Cannot be used to index arrays.
     * The getKeys function uasserts that value being inserted
     * is not an array.  This index will not be built if any
     * array values of the hashed field exist.
     *
     */
    class HashedIndexType : public IndexType{
    public:

        static const string HASHED_INDEX_TYPE_IDENTIFIER;
        typedef int HashVersion;

        /* Creates a new HashedIndex around a HashedIndexPlugin
         * and an IndexSpec. New HashedIndexTypes are created via
         * a factory method in the HashedIndexPlugin class.
         */
        HashedIndexType( const IndexPlugin* plugin , const IndexSpec* spec );
        virtual ~HashedIndexType();

        /* This index is only considered "HELPFUL" for a query
         * if it's the union of at least one equality constraint on the
         * hashed field.  Otherwise it's considered USELESS.
         * Example queries (supposing the indexKey is {a : "hashed"}):
         *   {a : 3}  HELPFUL
         *   {a : 3 , b : 3} HELPFUL
         *   {a : {$in : [3,4]}} HELPFUL
         *   {a : {$gte : 3, $lte : 3}} HELPFUL
         *   {} USELESS
         *   {b : 3} USELESS
         *   {a : {$gt : 3}} USELESS
         */
        IndexSuitability suitability( const FieldRangeSet& queryConstraints ,
                                      const BSONObj& order ) const;

        /* The input is "obj" which should have a field corresponding to the hashedfield.
         * The output is a BSONObj with a single BSONElement whose value is the hash
         * Eg if this is an index on "a" we have
         *   obj is {a : 45} --> key becomes {"" : hash(45) }
         *
         * Limitation: arrays values are not currently supported.  This function uasserts
         * that the value is not an array, and errors out in that case.
         */
        void getKeys( const BSONObj &obj, BSONObjSet &keys ) const;

        /* The newCursor method works for suitable queries by generating a BtreeCursor
         * using the hash of point-intervals parsed by FieldRangeSet.
         * For unsuitable queries it just instantiates a btree cursor over the whole tree
         */
        shared_ptr<Cursor> newCursor( const BSONObj& query ,
                const BSONObj& order , int numWanted ) const;

        /* Takes a BSONElement, seed and hashVersion, and outputs the
         * 64-bit hash used for this index
         * E.g. if the element is {a : 3} this outputs v1-hash(3)
         * */
        static long long int makeSingleKey( const BSONElement& e ,
                                            HashSeed seed ,
                                            HashVersion v = 0 );

        /* Since the keys for this index are hashes, documents are not stored in order,
         * thus we will need to perform scanAndOrder whenever the "order" is non-empty.
         */
        bool scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const {
            return ! order.isEmpty();
        }

    private:
        string _hashedField;
        KeyPattern _keyPattern;
        HashSeed _seed; //defaults to zero if not in the IndexSpec
        HashVersion _hashVersion; //defaults to zero if not in the IndexSpec
        bool _isSparse;
    };

}