summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_spec.h
blob: e867dc042468f5acb2813794bb46626560d930be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// fts_spec.h

/**
*    Copyright (C) 2012 10gen Inc.
*
*    This program is free software: you can redistribute it and/or  modify
*    it under the terms of the GNU Affero General Public License, version 3,
*    as published by the Free Software Foundation.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU Affero General Public License for more details.
*
*    You should have received a copy of the GNU Affero General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*
*    As a special exception, the copyright holders give permission to link the
*    code of portions of this program with the OpenSSL library under certain
*    conditions as described in each individual source file and distribute
*    linked combinations including the program with the OpenSSL library. You
*    must comply with the GNU Affero General Public License in all respects for
*    all of the code used other than as permitted herein. If you modify file(s)
*    with this exception, you may extend this exception to your version of the
*    file(s), but you are not obligated to do so. If you do not wish to do so,
*    delete this exception statement from your version. If you delete this
*    exception statement from all source files in the program, then also delete
*    it in the license file.
*/

#pragma once

#include <map>
#include <vector>
#include <string>

#include "mongo/db/fts/fts_util.h"
#include "mongo/db/fts/stemmer.h"
#include "mongo/db/fts/stop_words.h"
#include "mongo/db/fts/tokenizer.h"
#include "mongo/platform/unordered_map.h"

namespace mongo {

    namespace fts {

        extern const double MAX_WEIGHT;

        typedef std::map<string,double> Weights; // TODO cool map

        typedef unordered_map<string,double> TermFrequencyMap;


        class FTSSpec {

            struct Tools {
                Tools( string language )
                    : language( language ){}
                const std::string& language;
                const Stemmer* stemmer;
                const StopWords* stopwords;
            };

        public:
            FTSSpec( const BSONObj& indexInfo );

            bool wildcard() const { return _wildcard; }
            const string& defaultLanguage() const { return _defaultLanguage; }
            const string& languageOverrideField() const { return _languageOverrideField; }

            size_t numExtraBefore() const { return _extraBefore.size(); }
            const std::string& extraBefore( unsigned i ) const { return _extraBefore[i]; }

            size_t numExtraAfter() const { return _extraAfter.size(); }
            const std::string& extraAfter( unsigned i ) const { return _extraAfter[i]; }

            string getLanguageToUse( const BSONObj& userDoc ) const;

            void scoreDocument( const BSONObj& obj, TermFrequencyMap* scores ) const;

            /**
             * given a query, pulls out the pieces (in order) that go in the index first
             */
            Status getIndexPrefix( const BSONObj& filter, BSONObj* out ) const;

            const Weights& weights() const { return _weights; }

            /**
             * @param out - untouched if field isn't present
             * @return if field is here
             */
            bool weight( const StringData& field, double* out ) const;


            static BSONObj fixSpec( const BSONObj& spec );
        private:
            void _scoreRecurse(const Tools& tools,
                               const BSONObj& obj,
                               TermFrequencyMap* term_freqs ) const;

            void _scoreString( const Tools& tools,
                               const StringData& raw,
                               TermFrequencyMap* term_freqs,
                               double weight ) const;

            string _defaultLanguage;
            string _languageOverrideField;
            bool _wildcard;

            // _weights stores a mapping between the fields and the value as a double
            // basically, how much should an occurence of (query term) in (field) be worth
            Weights _weights;

            // other fields to index
            std::vector<string> _extraBefore;
            std::vector<string> _extraAfter;
        };

    }
}