[svn] improved do_insertions and added support for nested lexers in rst lexer (.. sourcecode:: python etc)

author: blackbird <devnull@localhost> 2007-03-25 21:36:54 +0200
committer: blackbird <devnull@localhost> 2007-03-25 21:36:54 +0200
commit: 491c85e250a8c2d988059d9aa7aaf44a5340accf (patch)
tree: f8714529c45303477c5c240496cd9e27c1d187b3
parent: f0f5db8a817d7927126609c0f7c18af232c77bb2 (diff)
download: pygments-491c85e250a8c2d988059d9aa7aaf44a5340accf.tar.gz
2 files changed, 74 insertions, 10 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index bffec0f1..2648b07b 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -588,7 +588,7 @@ def do_insertions(insertions, tokens):
 
     The result is a combined token stream.
 
-    FIXME: The indices yielded by this function are not correct!
+    TODO: clean up the code here.
     """
     insertions = iter(insertions)
     try:
@@ -599,21 +599,34 @@ def do_insertions(insertions, tokens):
             yield item
         return
 
+    realpos = None
     insleft = True
+
+    # iterate over the token stream where we want to insert
+    # the tokens from the insertion list.
     for i, t, v in tokens:
+        # first iteration. store the postition of first item
+        if realpos is None:
+            realpos = i
         oldi = 0
         while insleft and i + len(v) >= index:
-            yield i, t, v[oldi:index-i]
-            for item in itokens:
-                yield item
-            oldi = index-i
+            tmpval = v[oldi:index - i]
+            yield realpos, t, tmpval
+            realpos += len(tmpval)
+            for it_index, it_token, it_value in itokens:
+                yield realpos, it_token, it_value
+                realpos += len(it_value)
+            oldi = index - i
             try:
                 index, itokens = insertions.next()
             except StopIteration:
                 insleft = False
                 break  # not strictly necessary
-        yield i, t, v[oldi:]
+        yield realpos, t, v[oldi:]
+        realpos += len(v) - oldi
+
     # leftover tokens
     if insleft:
-        for item in itokens:
-            yield item
+        for p, t, v in itokens:
+            yield realpos, t, v
+            realpos += len(v)
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py
index 09ae3ed5..806a32b3 100644
--- a/pygments/lexers/text.py
+++ b/pygments/lexers/text.py
@@ -18,10 +18,11 @@ try:
 except NameError:
     from sets import Set as set
 
-from pygments.lexer import RegexLexer, bygroups, include, using, this
+from pygments.lexer import RegexLexer, bygroups, include, using, this, \
+     do_insertions
 from pygments.token import Punctuation, \
     Text, Comment, Keyword, Name, String, Generic, Operator, Number
-from pygments.util import ClassNotFound
+from pygments.util import get_bool_opt
 
 
 __all__ = ['IniLexer', 'SourcesListLexer', 'MakefileLexer', 'DiffLexer',
@@ -459,6 +460,48 @@ class RstLexer(RegexLexer):
     mimetypes = ["text/x-rst"]
     flags = re.MULTILINE
 
+    def _handle_sourcecode(self, match):
+        from pygments.lexers import get_lexer_by_name
+        from pygments.util import ClassNotFound
+
+        # section header
+        yield match.start(1), Punctuation, match.group(1)
+        yield match.start(2), Text, match.group(2)
+        yield match.start(3), Operator.Word, match.group(3)
+        yield match.start(4), Punctuation, match.group(4)
+        yield match.start(5), Text, match.group(5)
+        yield match.start(6), Keyword, match.group(6)
+        yield match.start(7), Text, match.group(7)
+
+        # lookup lexer if wanted and existing
+        lexer = None
+        if self.handlecodeblocks:
+            try:
+                lexer = get_lexer_by_name(match.group(6).strip())
+            except ClassNotFound:
+                pass
+        indention = match.group(8)
+        indention_size = len(indention)
+        code = (indention + match.group(9) + match.group(10) + match.group(11))
+
+        # no lexer for this language. handle it like it was a code block
+        if lexer is None:
+            yield match.start(8), String, code
+            return
+
+        # highlight the lines with the lexer.
+        ins = []
+        codelines = code.splitlines(True)
+        code = ''
+        for line in codelines:
+            if len(line) > indention_size:
+                ins.append((len(code), [(0, Text, line[:indention_size])]))
+                code += line[indention_size:]
+            else:
+                code += line
+        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
+            yield item
+
     tokens = {
         'root': [
             # Heading with overline
@@ -482,6 +525,10 @@ class RstLexer(RegexLexer):
              bygroups(Text, Number, using(this, state='inline'))),
             (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
              bygroups(Text, Number, using(this, state='inline'))),
+            # sourcecode sections
+            (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
+             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
+             _handle_sourcecode),
             # Introducing a section
             (r'^( *\.\.)(\s*)(\w+)(::)(?:([ \t]*)(.+))?',
              bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)),
@@ -527,3 +574,7 @@ class RstLexer(RegexLexer):
             (r'[`\\]', String),
         ]
     }
+
+    def __init__(self, **options):
+        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
+        RegexLexer.__init__(self, **options)
author	blackbird <devnull@localhost>	2007-03-25 21:36:54 +0200
committer	blackbird <devnull@localhost>	2007-03-25 21:36:54 +0200
commit	491c85e250a8c2d988059d9aa7aaf44a5340accf (patch)
tree	f8714529c45303477c5c240496cd9e27c1d187b3
parent	f0f5db8a817d7927126609c0f7c18af232c77bb2 (diff)
download	pygments-491c85e250a8c2d988059d9aa7aaf44a5340accf.tar.gz