summaryrefslogtreecommitdiff
path: root/src/mongo/db/fts/generate_stop_words.py
blob: e0dc801ca922ea2c46bbb42286e5ce4cf738e811 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import sys

def generate( header, source, language_files ):
    print( "header: %s" % header )
    print( "source: %s" % source )
    print( "language_files:" )
    for x in language_files:
        print( "\t%s" % x )

    out = open( header, "wb" )
    out.write( """
#pragma once
#include <set>
#include <string>
#include "mongo/util/string_map.h"
namespace mongo {
namespace fts {

  void loadStopWordMap( StringMap< std::set< std::string > >* m );
}
}
""" )
    out.close()



    out = open( source, "wb" )
    out.write( '#include "%s"' % header.rpartition( "/" )[2].rpartition( "\\" )[2] )
    out.write( """
namespace mongo {
namespace fts {

  void loadStopWordMap( StringMap< std::set< std::string > >* m ) {

""" )

    for l_file in language_files:
        l = l_file.rpartition( "_" )[2].partition( "." )[0]

        out.write( '  // %s\n' % l_file )
        out.write( '  {\n' )
        out.write( '   const char* const words[] = {\n' )
        for word in open( l_file, "rb" ):
            out.write( '       "%s",\n' % word.strip() )
        out.write( '   };\n' )
        out.write( '   const size_t wordcnt = sizeof(words) / sizeof(words[0]);\n' )
        out.write( '   std::set< std::string >& l = (*m)["%s"];\n' % l )
        out.write( '   l.insert(&words[0], &words[wordcnt]);\n' )
        out.write( '  }\n' )
    out.write( """
  }
} // namespace fts
} // namespace mongo
""" )


if __name__ == "__main__":
    generate( sys.argv[ len(sys.argv) - 2],
              sys.argv[ len(sys.argv) - 1],
              sys.argv[1:-2] )