summaryrefslogtreecommitdiff
path: root/sphinx/websupport/search/whooshsearch.py
blob: 6d1f9de4a8998b7a4c941030d0fcc7edbba9c126 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# -*- coding: utf-8 -*-
"""
    sphinx.websupport.search.whooshsearch
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Whoosh search adapter.

    :copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

from whoosh import index
from whoosh.fields import Schema, ID, TEXT
from whoosh.qparser import QueryParser
from whoosh.analysis import StemmingAnalyzer

from sphinx.util.osutil import ensuredir
from sphinx.websupport.search import BaseSearch


class WhooshSearch(BaseSearch):
    """The whoosh search adapter for sphinx web support."""

    # Define the Whoosh Schema for the search index.
    schema = Schema(path=ID(stored=True, unique=True),
                    title=TEXT(field_boost=2.0, stored=True),
                    text=TEXT(analyzer=StemmingAnalyzer(), stored=True))

    def __init__(self, db_path):
        ensuredir(db_path)
        if index.exists_in(db_path):
            self.index = index.open_dir(db_path)
        else:
            self.index = index.create_in(db_path, schema=self.schema)
        self.qparser = QueryParser('text', self.schema)

    def init_indexing(self, changed=[]):
        for changed_path in changed:
            self.index.delete_by_term('path', changed_path)
        self.index_writer = self.index.writer()

    def finish_indexing(self):
        self.index_writer.commit()

    def add_document(self, pagename, title, text):
        self.index_writer.add_document(path=unicode(pagename),
                                       title=title,
                                       text=text)

    def handle_query(self, q):
        searcher = self.index.searcher()
        whoosh_results = searcher.search(self.qparser.parse(q))
        results = []
        for result in whoosh_results:
            context = self.extract_context(result['text'])
            results.append((result['path'],
                            result.get('title', ''),
                            context))
        return results