summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorblackbird <devnull@localhost>2007-03-25 21:36:54 +0200
committerblackbird <devnull@localhost>2007-03-25 21:36:54 +0200
commit491c85e250a8c2d988059d9aa7aaf44a5340accf (patch)
treef8714529c45303477c5c240496cd9e27c1d187b3
parentf0f5db8a817d7927126609c0f7c18af232c77bb2 (diff)
downloadpygments-491c85e250a8c2d988059d9aa7aaf44a5340accf.tar.gz
[svn] improved do_insertions and added support for nested lexers in rst lexer (.. sourcecode:: python etc)
-rw-r--r--pygments/lexer.py29
-rw-r--r--pygments/lexers/text.py55
2 files changed, 74 insertions, 10 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index bffec0f1..2648b07b 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -588,7 +588,7 @@ def do_insertions(insertions, tokens):
The result is a combined token stream.
- FIXME: The indices yielded by this function are not correct!
+ TODO: clean up the code here.
"""
insertions = iter(insertions)
try:
@@ -599,21 +599,34 @@ def do_insertions(insertions, tokens):
yield item
return
+ realpos = None
insleft = True
+
+ # iterate over the token stream where we want to insert
+ # the tokens from the insertion list.
for i, t, v in tokens:
+ # first iteration. store the postition of first item
+ if realpos is None:
+ realpos = i
oldi = 0
while insleft and i + len(v) >= index:
- yield i, t, v[oldi:index-i]
- for item in itokens:
- yield item
- oldi = index-i
+ tmpval = v[oldi:index - i]
+ yield realpos, t, tmpval
+ realpos += len(tmpval)
+ for it_index, it_token, it_value in itokens:
+ yield realpos, it_token, it_value
+ realpos += len(it_value)
+ oldi = index - i
try:
index, itokens = insertions.next()
except StopIteration:
insleft = False
break # not strictly necessary
- yield i, t, v[oldi:]
+ yield realpos, t, v[oldi:]
+ realpos += len(v) - oldi
+
# leftover tokens
if insleft:
- for item in itokens:
- yield item
+ for p, t, v in itokens:
+ yield realpos, t, v
+ realpos += len(v)
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py
index 09ae3ed5..806a32b3 100644
--- a/pygments/lexers/text.py
+++ b/pygments/lexers/text.py
@@ -18,10 +18,11 @@ try:
except NameError:
from sets import Set as set
-from pygments.lexer import RegexLexer, bygroups, include, using, this
+from pygments.lexer import RegexLexer, bygroups, include, using, this, \
+ do_insertions
from pygments.token import Punctuation, \
Text, Comment, Keyword, Name, String, Generic, Operator, Number
-from pygments.util import ClassNotFound
+from pygments.util import get_bool_opt
__all__ = ['IniLexer', 'SourcesListLexer', 'MakefileLexer', 'DiffLexer',
@@ -459,6 +460,48 @@ class RstLexer(RegexLexer):
mimetypes = ["text/x-rst"]
flags = re.MULTILINE
+ def _handle_sourcecode(self, match):
+ from pygments.lexers import get_lexer_by_name
+ from pygments.util import ClassNotFound
+
+ # section header
+ yield match.start(1), Punctuation, match.group(1)
+ yield match.start(2), Text, match.group(2)
+ yield match.start(3), Operator.Word, match.group(3)
+ yield match.start(4), Punctuation, match.group(4)
+ yield match.start(5), Text, match.group(5)
+ yield match.start(6), Keyword, match.group(6)
+ yield match.start(7), Text, match.group(7)
+
+ # lookup lexer if wanted and existing
+ lexer = None
+ if self.handlecodeblocks:
+ try:
+ lexer = get_lexer_by_name(match.group(6).strip())
+ except ClassNotFound:
+ pass
+ indention = match.group(8)
+ indention_size = len(indention)
+ code = (indention + match.group(9) + match.group(10) + match.group(11))
+
+ # no lexer for this language. handle it like it was a code block
+ if lexer is None:
+ yield match.start(8), String, code
+ return
+
+ # highlight the lines with the lexer.
+ ins = []
+ codelines = code.splitlines(True)
+ code = ''
+ for line in codelines:
+ if len(line) > indention_size:
+ ins.append((len(code), [(0, Text, line[:indention_size])]))
+ code += line[indention_size:]
+ else:
+ code += line
+ for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
+ yield item
+
tokens = {
'root': [
# Heading with overline
@@ -482,6 +525,10 @@ class RstLexer(RegexLexer):
bygroups(Text, Number, using(this, state='inline'))),
(r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
bygroups(Text, Number, using(this, state='inline'))),
+ # sourcecode sections
+ (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)'
+ r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
+ _handle_sourcecode),
# Introducing a section
(r'^( *\.\.)(\s*)(\w+)(::)(?:([ \t]*)(.+))?',
bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, Keyword)),
@@ -527,3 +574,7 @@ class RstLexer(RegexLexer):
(r'[`\\]', String),
]
}
+
+ def __init__(self, **options):
+ self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
+ RegexLexer.__init__(self, **options)