summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormitsuhiko <devnull@localhost>2008-09-20 14:13:15 +0200
committermitsuhiko <devnull@localhost>2008-09-20 14:13:15 +0200
commit003d7c25dd8c13a8bd86a79f1cd1ae1454f89c92 (patch)
treea3cf26e92e22fd47ef280d9da3182463bab1c01a
parentcd27ebdc05f0faf8067d86f1aeaea5c42e3568b8 (diff)
parentf217fdc4eefa21b66a8767bc74e7d89edc67f927 (diff)
downloadpygments-003d7c25dd8c13a8bd86a79f1cd1ae1454f89c92.tar.gz
Automated merge with ssh://team@pocoo.org/pygments-main
-rw-r--r--CHANGES6
-rw-r--r--pygments/formatters/latex.py22
-rw-r--r--pygments/lexer.py7
-rw-r--r--pygments/lexers/functional.py9
-rw-r--r--pygments/token.py18
-rw-r--r--tests/test_token.py20
6 files changed, 38 insertions, 44 deletions
diff --git a/CHANGES b/CHANGES
index fd74c95e..c0054ea8 100644
--- a/CHANGES
+++ b/CHANGES
@@ -5,11 +5,17 @@ Version 0.12
------------
(codename not selected, release XXX XX, 2008)
+- Don't use join(splitlines()) when converting newlines to ``\n``,
+ because that doesn't keep all newlines at the end when the
+ ``stripnl`` lexer option is False.
+
- Add Tango style, written by Andre Roberge for the Crunchy project.
- Add Python3TracebackLexer and ``python3`` option to
PythonConsoleLexer.
+- Fix a few bugs in the Haskell lexer.
+
- Fix PythonTracebackLexer to be able to recognize SyntaxError and
KeyboardInterrupt (#360).
diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py
index 529fa8f9..af8da1bb 100644
--- a/pygments/formatters/latex.py
+++ b/pygments/formatters/latex.py
@@ -5,7 +5,7 @@
Formatter for LaTeX fancyvrb output.
- :copyright: 2006-2007 by Georg Brandl.
+ :copyright: 2006-2008 by Georg Brandl.
:license: BSD, see LICENSE for more details.
"""
import StringIO
@@ -18,13 +18,13 @@ from pygments.util import get_bool_opt, get_int_opt
__all__ = ['LatexFormatter']
-def escape_tex(text):
+def escape_tex(text, commandprefix):
return text.replace('@', '\x00'). \
replace('[', '\x01'). \
replace(']', '\x02'). \
- replace('\x00', '@at[]').\
- replace('\x01', '@lb[]').\
- replace('\x02', '@rb[]')
+ replace('\x00', '@%sZat[]' % commandprefix).\
+ replace('\x01', '@%sZlb[]' % commandprefix).\
+ replace('\x02', '@%sZrb[]' % commandprefix)
DOC_TEMPLATE = r'''
@@ -194,10 +194,12 @@ class LatexFormatter(Formatter):
used to format text in the verbatim environment. ``arg`` is ignored.
"""
nc = '\\newcommand'
- return '%s\\at{@}\n%s\\lb{[}\n%s\\rb{]}\n' % (nc, nc, nc) + \
- '\n'.join(['\\newcommand\\%s[1]{%s}' % (alias, cmndef)
- for alias, cmndef in self.cmd2def.iteritems()
- if cmndef != '#1'])
+ cp = self.commandprefix
+ return (
+ '%s\\%sZat{@}\n%s\\%sZlb{[}\n%s\\%sZrb{]}\n' % (nc, cp, nc, cp, nc, cp) +
+ '\n'.join(['\\newcommand\\%s[1]{%s}' % (alias, cmndef)
+ for alias, cmndef in self.cmd2def.iteritems()
+ if cmndef != '#1']))
def format(self, tokensource, outfile):
# TODO: add support for background colors
@@ -220,7 +222,7 @@ class LatexFormatter(Formatter):
for ttype, value in tokensource:
if enc:
value = value.encode(enc)
- value = escape_tex(value)
+ value = escape_tex(value, self.commandprefix)
cmd = self.ttype2cmd.get(ttype)
while cmd is None:
ttype = ttype.parent
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 5c41d4a2..cba93e4f 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -127,10 +127,9 @@ class Lexer(object):
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
"""
- if isinstance(text, unicode):
- text = u'\n'.join(text.splitlines())
- else:
- text = '\n'.join(text.splitlines())
+ text = text.replace('\r\n', '\n')
+ text = text.replace('\r', '\n')
+ if not isinstance(text, unicode):
if self.encoding == 'guess':
try:
text = text.decode('utf-8')
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index 18432f9a..c885cffd 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -5,7 +5,7 @@
Lexers for functional languages.
- :copyright: 2006-2007 by Georg Brandl, Marek Kubica,
+ :copyright: 2006-2008 by Georg Brandl, Marek Kubica,
Adam Blinkinsop <blinks@acm.org>, Matteo Sasso.
:license: BSD, see LICENSE for more details.
"""
@@ -353,6 +353,7 @@ class HaskellLexer(RegexLexer):
'root': [
# Whitespace:
(r'\s+', Text),
+ #(r'--\s*|.*$', Comment.Doc),
(r'--.*$', Comment.Single),
(r'{-', Comment.Multiline, 'comment'),
# Lexemes:
@@ -360,7 +361,7 @@ class HaskellLexer(RegexLexer):
(r'\bimport\b', Keyword.Reserved, 'import'),
(r'\bmodule\b', Keyword.Reserved, 'module'),
(r'\berror\b', Name.Exception),
- (r'\b(%s)\b' % '|'.join(reserved), Keyword.Reserved),
+ (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
(r'^[_a-z][\w\']*', Name.Function),
(r'[_a-z][\w\']*', Name),
(r'[A-Z][\w\']*', Keyword.Type),
@@ -410,7 +411,9 @@ class HaskellLexer(RegexLexer):
'funclist': [
(r'\s+', Text),
(r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
- (r'[a-zA-Z0-9_]+', Name.Function),
+ (r'[_a-z][\w\']+', Name.Function),
+ (r'--.*$', Comment.Single),
+ (r'{-', Comment.Multiline, 'comment'),
(r',', Punctuation),
(r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
# (HACK, but it makes sense to push two instances, believe me)
diff --git a/pygments/token.py b/pygments/token.py
index 6db41a56..4fc50919 100644
--- a/pygments/token.py
+++ b/pygments/token.py
@@ -201,21 +201,3 @@ STANDARD_TYPES = {
Generic.Subheading: 'gu',
Generic.Traceback: 'gt',
}
-
-
-
-if __name__ == '__main__':
- import sys
- # sanity check for token name dict: no duplicate entries!
- stp = STANDARD_TYPES.copy()
- stp[Token] = '---' # Token and Text do conflict, that is okay
- t = {}
- for k, v in stp.iteritems():
- t.setdefault(v, []).append(k)
- if len(t) == len(stp):
- print 'Okay!'
- sys.exit()
-
- for k, v in t.iteritems():
- if len(v) > 1:
- print "%r has more than one key: %r" % (k, v)
diff --git a/tests/test_token.py b/tests/test_token.py
index 8cf779f7..4c83b9ea 100644
--- a/tests/test_token.py
+++ b/tests/test_token.py
@@ -36,15 +36,17 @@ class TokenTest(unittest.TestCase):
self.assert_(token.string_to_tokentype('String') is token.String)
def test_sanity_check(self):
- try:
- try:
- old_stdout = sys.stdout
- sys.stdout = StringIO.StringIO()
- execfile(token.__file__.rstrip('c'), {'__name__': '__main__'})
- finally:
- sys.stdout = old_stdout
- except SystemExit:
- pass
+ stp = token.STANDARD_TYPES.copy()
+ stp[token.Token] = '---' # Token and Text do conflict, that is okay
+ t = {}
+ for k, v in stp.iteritems():
+ t.setdefault(v, []).append(k)
+ if len(t) == len(stp):
+ return # Okay
+
+ for k, v in t.iteritems():
+ if len(v) > 1:
+ self.fail("%r has more than one key: %r" % (k, v))
if __name__ == '__main__':