diff options
author | blackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2007-05-22 21:11:58 +0000 |
---|---|---|
committer | blackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> | 2007-05-22 21:11:58 +0000 |
commit | 57b6a2e9aaf13a745407bcf77ec182ec16b58e20 (patch) | |
tree | cb7263f3015af199e5048513f99a843958ccaa4f /sandbox/py-rest-doc/sphinx/search.py | |
parent | 6c34caabbe1eb96f3ddcf6900d63526b639df924 (diff) | |
download | docutils-57b6a2e9aaf13a745407bcf77ec182ec16b58e20.tar.gz |
implemented online search
git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@5094 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
Diffstat (limited to 'sandbox/py-rest-doc/sphinx/search.py')
-rw-r--r-- | sandbox/py-rest-doc/sphinx/search.py | 67 |
1 files changed, 64 insertions, 3 deletions
diff --git a/sandbox/py-rest-doc/sphinx/search.py b/sandbox/py-rest-doc/sphinx/search.py index 93e2744a0..4507bcbca 100644 --- a/sandbox/py-rest-doc/sphinx/search.py +++ b/sandbox/py-rest-doc/sphinx/search.py @@ -9,7 +9,9 @@ :license: Python license. """ import re +import pickle +from collections import defaultdict from docutils.nodes import Text, NodeVisitor from .stemmer import PorterStemmer from .json import dump_json @@ -29,6 +31,9 @@ class Stemmer(PorterStemmer): class WordCollector(NodeVisitor): + """ + A special visitor that collects words for the `IndexBuilder`. + """ def __init__(self, document): NodeVisitor.__init__(self, document) @@ -40,6 +45,14 @@ class WordCollector(NodeVisitor): class IndexBuilder(object): + """ + Helper class that creates a searchindex based on the doctrees + passed to the `feed` method. + """ + formats = { + 'json': dump_json, + 'pickle': pickle.dumps + } def __init__(self): self._filenames = {} @@ -48,8 +61,16 @@ class IndexBuilder(object): self._categories = {} self._stemmer = Stemmer() - def dump(self, stream): - stream.write(dump_json([ + def dump(self, stream, format): + """Dump the freezed index to a stream.""" + stream.write(self.formats[format](self.freeze())) + + def freeze(self): + """ + Create a useable data structure. You can pass this output + to the `SearchFrontend` to search the index. + """ + return [ [k for k, v in sorted(self._filenames.items(), key=lambda x: x[1])], dict(item for item in sorted(self._categories.items(), @@ -58,9 +79,10 @@ class IndexBuilder(object): key=lambda x: x[0])], dict(item for item in sorted(self._mapping.items(), key=lambda x: x[0])), - ])) + ] def feed(self, filename, category, title, doctree): + """Feed a doctree to the index.""" file_id = self._filenames.setdefault(filename, len(self._filenames)) self._titles[file_id] = title visitor = WordCollector(doctree) @@ -69,3 +91,42 @@ class IndexBuilder(object): for word in word_re.findall(title) + visitor.found_words: self._mapping.setdefault(self._stemmer.stem(word.lower()), set()).add(file_id) + + +class SearchFrontend(object): + """ + This class acts as a frontend for the search index. It can search + a searchindex as provided by `IndexBuilder`. + """ + + def __init__(self, index): + self.filenames, self.areas, self.titles, self.words = index + self._stemmer = Stemmer() + + def query(self, required, excluded, areas): + file_map = defaultdict(set) + for word in required: + if word not in self.words: + break + for fid in self.words[word]: + file_map[fid].add(word) + + return sorted(((self.filenames[fid], self.titles[fid]) + for fid, words in file_map.iteritems() + if len(words) == len(required) and + any(fid in self.areas.get(area, ()) for area in areas) and not + any(fid in self.words.get(word, ()) for word in excluded) + ), key=lambda x: x[1].lower()) + + def search(self, searchstring, areas): + required = set() + excluded = set() + for word in searchstring.split(): + if word.startswith('-'): + storage = excluded + word = word[1:] + else: + storage = required + storage.add(self._stemmer.stem(word.lower())) + + return self.query(required, excluded, areas) |