implemented online search

git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk@5094 929543f6-e4f2-0310-98a6-ba3bd3dd1d04
author: blackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2007-05-22 21:11:58 +0000
committer: blackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04> 2007-05-22 21:11:58 +0000
commit: 57b6a2e9aaf13a745407bcf77ec182ec16b58e20 (patch)
tree: cb7263f3015af199e5048513f99a843958ccaa4f /sandbox/py-rest-doc/sphinx/search.py
parent: 6c34caabbe1eb96f3ddcf6900d63526b639df924 (diff)
download: docutils-57b6a2e9aaf13a745407bcf77ec182ec16b58e20.tar.gz
1 files changed, 64 insertions, 3 deletions
diff --git a/sandbox/py-rest-doc/sphinx/search.py b/sandbox/py-rest-doc/sphinx/search.py
index 93e2744a0..4507bcbca 100644
--- a/sandbox/py-rest-doc/sphinx/search.py
+++ b/sandbox/py-rest-doc/sphinx/search.py
@@ -9,7 +9,9 @@
     :license: Python license.
 """
 import re
+import pickle
 
+from collections import defaultdict
 from docutils.nodes import Text, NodeVisitor
 from .stemmer import PorterStemmer
 from .json import dump_json
@@ -29,6 +31,9 @@ class Stemmer(PorterStemmer):
 
 
 class WordCollector(NodeVisitor):
+    """
+    A special visitor that collects words for the `IndexBuilder`.
+    """
 
     def __init__(self, document):
         NodeVisitor.__init__(self, document)
@@ -40,6 +45,14 @@ class WordCollector(NodeVisitor):
 
 
 class IndexBuilder(object):
+    """
+    Helper class that creates a searchindex based on the doctrees
+    passed to the `feed` method.
+    """
+    formats = {
+        'json':     dump_json,
+        'pickle':   pickle.dumps
+    }
 
     def __init__(self):
         self._filenames = {}
@@ -48,8 +61,16 @@ class IndexBuilder(object):
         self._categories = {}
         self._stemmer = Stemmer()
 
-    def dump(self, stream):
-        stream.write(dump_json([
+    def dump(self, stream, format):
+        """Dump the freezed index to a stream."""
+        stream.write(self.formats[format](self.freeze()))
+
+    def freeze(self):
+        """
+        Create a useable data structure. You can pass this output
+        to the `SearchFrontend` to search the index.
+        """
+        return [
             [k for k, v in sorted(self._filenames.items(),
                                   key=lambda x: x[1])],
             dict(item for item in sorted(self._categories.items(),
@@ -58,9 +79,10 @@ class IndexBuilder(object):
                                   key=lambda x: x[0])],
             dict(item for item in sorted(self._mapping.items(),
                                          key=lambda x: x[0])),
-        ]))
+        ]
 
     def feed(self, filename, category, title, doctree):
+        """Feed a doctree to the index."""
         file_id = self._filenames.setdefault(filename, len(self._filenames))
         self._titles[file_id] = title
         visitor = WordCollector(doctree)
@@ -69,3 +91,42 @@ class IndexBuilder(object):
         for word in word_re.findall(title) + visitor.found_words:
             self._mapping.setdefault(self._stemmer.stem(word.lower()),
                                      set()).add(file_id)
+
+
+class SearchFrontend(object):
+    """
+    This class acts as a frontend for the search index. It can search
+    a searchindex as provided by `IndexBuilder`.
+    """
+
+    def __init__(self, index):
+        self.filenames, self.areas, self.titles, self.words = index
+        self._stemmer = Stemmer()
+
+    def query(self, required, excluded, areas):
+        file_map = defaultdict(set)
+        for word in required:
+            if word not in self.words:
+                break
+            for fid in self.words[word]:
+                file_map[fid].add(word)
+
+        return sorted(((self.filenames[fid], self.titles[fid])
+            for fid, words in file_map.iteritems()
+            if len(words) == len(required) and
+               any(fid in self.areas.get(area, ()) for area in areas) and not
+               any(fid in self.words.get(word, ()) for word in excluded)
+        ), key=lambda x: x[1].lower())
+
+    def search(self, searchstring, areas):
+        required = set()
+        excluded = set()
+        for word in searchstring.split():
+            if word.startswith('-'):
+                storage = excluded
+                word = word[1:]
+            else:
+                storage = required
+            storage.add(self._stemmer.stem(word.lower()))
+
+        return self.query(required, excluded, areas)
author	blackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2007-05-22 21:11:58 +0000
committer	blackbird <blackbird@929543f6-e4f2-0310-98a6-ba3bd3dd1d04>	2007-05-22 21:11:58 +0000
commit	57b6a2e9aaf13a745407bcf77ec182ec16b58e20 (patch)
tree	cb7263f3015af199e5048513f99a843958ccaa4f /sandbox/py-rest-doc/sphinx/search.py
parent	6c34caabbe1eb96f3ddcf6900d63526b639df924 (diff)
download	docutils-57b6a2e9aaf13a745407bcf77ec182ec16b58e20.tar.gz