summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_index_format.cpp
blob: 56df0e219e53281d8482d9002f217e7670dd1868 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// fts_index_format.cpp

/**
*    Copyright (C) 2012 10gen Inc.
*
*    This program is free software: you can redistribute it and/or  modify
*    it under the terms of the GNU Affero General Public License, version 3,
*    as published by the Free Software Foundation.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU Affero General Public License for more details.
*
*    You should have received a copy of the GNU Affero General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "mongo/pch.h"

#include "mongo/base/init.h"
#include "mongo/db/fts/fts_index_format.h"

namespace mongo {

    namespace fts {

        namespace {
            BSONObj nullObj;
            BSONElement nullElt;
        }

        MONGO_INITIALIZER( FTSIndexFormat )( InitializerContext* context ) {
            BSONObjBuilder b;
            b.appendNull( "" );
            nullObj = b.obj();
            nullElt = nullObj.firstElement();
            return Status::OK();
        }

        void FTSIndexFormat::getKeys( const FTSSpec& spec,
                                      const BSONObj& obj,
                                      BSONObjSet* keys ) {

            int extraSize = 0;
            vector<BSONElement> extrasBefore;
            vector<BSONElement> extrasAfter;

            // compute the non FTS key elements
            for ( unsigned i = 0; i < spec.numExtraBefore(); i++ ) {
                BSONElement e = obj.getFieldDotted(spec.extraBefore(i));
                if ( e.eoo() )
                    e = nullElt;
                uassert( 16675, "cannot have a multi-key as a prefix to a text index",
                         e.type() != Array );
                extrasBefore.push_back(e);
                extraSize += e.size();
            }
            for ( unsigned i = 0; i < spec.numExtraAfter(); i++ ) {
                BSONElement e = obj.getFieldDotted(spec.extraAfter(i));
                if ( e.eoo() )
                    e = nullElt;
                extrasAfter.push_back(e);
                extraSize += e.size();
            }


            TermFrequencyMap term_freqs;
            spec.scoreDocument( obj, &term_freqs );

            // create index keys from raw scores
            // only 1 per string
            for ( TermFrequencyMap::const_iterator i = term_freqs.begin();
                  i != term_freqs.end();
                  ++i ) {

                const string& term = i->first;
                double weight = i->second;

                // guess the total size of the btree entry based on the size of the weight, term tuple
                int guess =
                    5 /* bson overhead */ +
                    10 /* weight */ +
                    8 /* term overhead */ +
                    term.size() +
                    extraSize;

                BSONObjBuilder b(guess); // builds a BSON object with guess length.
                for ( unsigned k = 0; k < extrasBefore.size(); k++ )
                    b.appendAs( extrasBefore[k], "" );
                _appendIndexKey( b, weight, term );
                for ( unsigned k = 0; k < extrasAfter.size(); k++ )
                    b.appendAs( extrasAfter[k], "" );
                BSONObj res = b.obj();

                verify( guess >= res.objsize() );

                keys->insert( res );
            }
        }

        BSONObj FTSIndexFormat::getIndexKey( double weight,
                                             const string& term,
                                             const BSONObj& indexPrefix ) {
            BSONObjBuilder b;

            BSONObjIterator i( indexPrefix );
            while ( i.more() )
                b.appendAs( i.next(), "" );

            _appendIndexKey( b, weight, term );
            return b.obj();
        }

        void FTSIndexFormat::_appendIndexKey( BSONObjBuilder& b, double weight, const string& term ) {
            verify( weight >= 0 && weight <= MAX_WEIGHT ); // FTSmaxweight =  defined in fts_header
            b.append( "", term );
            b.append( "", weight );
        }
    }
}