summaryrefslogtreecommitdiff
path: root/sandbox/py-rest-doc/sphinx/search.py
diff options
context:
space:
mode:
authorblackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2007-05-22 21:11:58 +0000
committerblackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>2007-05-22 21:11:58 +0000
commit57b6a2e9aaf13a745407bcf77ec182ec16b58e20 (patch)
treecb7263f3015af199e5048513f99a843958ccaa4f /sandbox/py-rest-doc/sphinx/search.py
parent6c34caabbe1eb96f3ddcf6900d63526b639df924 (diff)
downloaddocutils-57b6a2e9aaf13a745407bcf77ec182ec16b58e20.tar.gz
implemented online search
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@5094 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'sandbox/py-rest-doc/sphinx/search.py')
-rw-r--r--sandbox/py-rest-doc/sphinx/search.py67
1 files changed, 64 insertions, 3 deletions
diff --git a/sandbox/py-rest-doc/sphinx/search.py b/sandbox/py-rest-doc/sphinx/search.py
index 93e2744a0..4507bcbca 100644
--- a/sandbox/py-rest-doc/sphinx/search.py
+++ b/sandbox/py-rest-doc/sphinx/search.py
@@ -9,7 +9,9 @@
:license: Python license.
"""
import re
+import pickle
+from collections import defaultdict
from docutils.nodes import Text, NodeVisitor
from .stemmer import PorterStemmer
from .json import dump_json
@@ -29,6 +31,9 @@ class Stemmer(PorterStemmer):
class WordCollector(NodeVisitor):
+ """
+ A special visitor that collects words for the `IndexBuilder`.
+ """
def __init__(self, document):
NodeVisitor.__init__(self, document)
@@ -40,6 +45,14 @@ class WordCollector(NodeVisitor):
class IndexBuilder(object):
+ """
+ Helper class that creates a searchindex based on the doctrees
+ passed to the `feed` method.
+ """
+ formats = {
+ 'json': dump_json,
+ 'pickle': pickle.dumps
+ }
def __init__(self):
self._filenames = {}
@@ -48,8 +61,16 @@ class IndexBuilder(object):
self._categories = {}
self._stemmer = Stemmer()
- def dump(self, stream):
- stream.write(dump_json([
+ def dump(self, stream, format):
+ """Dump the freezed index to a stream."""
+ stream.write(self.formats[format](self.freeze()))
+
+ def freeze(self):
+ """
+ Create a useable data structure. You can pass this output
+ to the `SearchFrontend` to search the index.
+ """
+ return [
[k for k, v in sorted(self._filenames.items(),
key=lambda x: x[1])],
dict(item for item in sorted(self._categories.items(),
@@ -58,9 +79,10 @@ class IndexBuilder(object):
key=lambda x: x[0])],
dict(item for item in sorted(self._mapping.items(),
key=lambda x: x[0])),
- ]))
+ ]
def feed(self, filename, category, title, doctree):
+ """Feed a doctree to the index."""
file_id = self._filenames.setdefault(filename, len(self._filenames))
self._titles[file_id] = title
visitor = WordCollector(doctree)
@@ -69,3 +91,42 @@ class IndexBuilder(object):
for word in word_re.findall(title) + visitor.found_words:
self._mapping.setdefault(self._stemmer.stem(word.lower()),
set()).add(file_id)
+
+
+class SearchFrontend(object):
+ """
+ This class acts as a frontend for the search index. It can search
+ a searchindex as provided by `IndexBuilder`.
+ """
+
+ def __init__(self, index):
+ self.filenames, self.areas, self.titles, self.words = index
+ self._stemmer = Stemmer()
+
+ def query(self, required, excluded, areas):
+ file_map = defaultdict(set)
+ for word in required:
+ if word not in self.words:
+ break
+ for fid in self.words[word]:
+ file_map[fid].add(word)
+
+ return sorted(((self.filenames[fid], self.titles[fid])
+ for fid, words in file_map.iteritems()
+ if len(words) == len(required) and
+ any(fid in self.areas.get(area, ()) for area in areas) and not
+ any(fid in self.words.get(word, ()) for word in excluded)
+ ), key=lambda x: x[1].lower())
+
+ def search(self, searchstring, areas):
+ required = set()
+ excluded = set()
+ for word in searchstring.split():
+ if word.startswith('-'):
+ storage = excluded
+ word = word[1:]
+ else:
+ storage = required
+ storage.add(self._stemmer.stem(word.lower()))
+
+ return self.query(required, excluded, areas)