summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/fts_language.h
blob: 3a9acbbdd94d8ffdc4a542233b410b1f3cdef340 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// fts_language.h

/**
 *    Copyright (C) 2013 MongoDB Inc.
 *
 *    This program is free software: you can redistribute it and/or  modify
 *    it under the terms of the GNU Affero General Public License, version 3,
 *    as published by the Free Software Foundation.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU Affero General Public License for more details.
 *
 *    You should have received a copy of the GNU Affero General Public License
 *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the GNU Affero General Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include "mongo/db/fts/fts_util.h"
#include "mongo/base/status_with.h"

#include <string>

namespace mongo {

    namespace fts {

        #define MONGO_FTS_LANGUAGE_DECLARE( language, name, version ) \
            FTSLanguage language; \
            MONGO_INITIALIZER_GENERAL( language, MONGO_NO_PREREQUISITES, \
                                       ( "FTSAllLanguagesRegistered" ) ) \
                                     ( ::mongo::InitializerContext* context ) { \
                FTSLanguage::registerLanguage( name, version, &language ); \
                return Status::OK(); \
            }

        /**
         * A FTSLanguage represents a language for a text-indexed document or a text search.
         * FTSLanguage objects are not copyable.
         * 
         * Recommended usage:
         *
         *     StatusWithFTSLanguage swl = FTSLanguage::make( "en", TEXT_INDEX_VERSION_2 );
         *     if ( !swl.getStatus().isOK() ) {
         *         // Error.
         *     }
         *     else {
         *         const FTSLanguage* language = swl.getValue();
         *         // Use language.
         *     }
         */
        class FTSLanguage {
            // Use make() instead of copying.
            MONGO_DISALLOW_COPYING( FTSLanguage );
        public:
            /** Create an uninitialized language. */
            FTSLanguage();

            /**
             * Returns the language as a std::string in canonical form (lowercased English name).  It is
             * an error to call str() on an uninitialized language.
             */
            const std::string& str() const;

            /**
             * Register std::string 'languageName' as a new language with text index version
             * 'textIndexVersion'.  Saves the resulting language to out-argument 'languageOut'.
             * Subsequent calls to FTSLanguage::make() will recognize the newly-registered language
             * string.
             */
            static void registerLanguage( StringData languageName,
                                          TextIndexVersion textIndexVersion,
                                          FTSLanguage *languageOut );

            /**
             * Register 'alias' as an alias for 'language' with text index version
             * 'textIndexVersion'.  Subsequent calls to FTSLanguage::make() will recognize the
             * newly-registered alias. 
             */
            static void registerLanguageAlias( const FTSLanguage* language,
                                               StringData alias,
                                               TextIndexVersion textIndexVersion );

            /**
             * Return the FTSLanguage associated with the given language string.  Returns an error
             * Status if an invalid language std::string is passed.
             * 
             * For textIndexVersion=TEXT_INDEX_VERSION_2, language strings are
             * case-insensitive, and need to be in one of the two following forms:
             * - English name, like "spanish".
             * - Two-letter code, like "es".
             *
             * For textIndexVersion=TEXT_INDEX_VERSION_1, no validation or normalization of
             * language strings is performed.  This is necessary to preserve indexing behavior for
             * documents with language strings like "en": for compatibility, text data in these
             * documents needs to be processed with the English stemmer and the empty stopword list
             * (since "en" is recognized by Snowball but not the stopword processing logic).
             */
            static StatusWith<const FTSLanguage*> make( StringData langName,
                                                        TextIndexVersion textIndexVersion );

        private:
            // std::string representation of language in canonical form.
            std::string _canonicalName;
        };

        typedef StatusWith<const FTSLanguage*> StatusWithFTSLanguage;

        extern FTSLanguage languagePorterV1;
        extern FTSLanguage languageEnglishV2;
        extern FTSLanguage languageFrenchV2;

    }
}