summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/generate_stop_words.py
blob: 6ca7b44316a8e138e95a9333bcd8c6b994397507 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import sys

def generate( header, source, language_files ):
    out = open( header, "w" )
    out.write( """
#pragma once
#include <set>
#include <string>
#include "mongo/util/string_map.h"
namespace mongo {
namespace fts {

  void loadStopWordMap( StringMap< std::set< std::string > >* m );
}
}
""" )
    out.close()



    out = open( source, "w", encoding='utf-8')
    out.write( '#include "{}"'.format(header.rpartition( "/" )[2].rpartition( "\\" )[2]) )
    out.write( """
namespace mongo {
namespace fts {

  void loadStopWordMap( StringMap< std::set< std::string > >* m ) {

    m->insert({
""" )

    for l_file in language_files:
        l = l_file.rpartition( "_" )[2].partition( "." )[0]

        out.write( '  // %s\n' % l_file )
        out.write( '  {\n' )
        out.write( '    "%s", {\n' % l )
        for word in open( l_file, "rb" ):
            out.write( '       "%s",\n' % word.decode('utf-8').strip() )
        out.write( '  }},\n' )

    out.write( """
    });
  }
} // namespace fts
} // namespace mongo
""" )


if __name__ == "__main__":
    generate( sys.argv[ len(sys.argv) - 2],
              sys.argv[ len(sys.argv) - 1],
              sys.argv[1:-2] )