diff options
Diffstat (limited to 'pygments')
-rw-r--r-- | pygments/cmdline.py | 10 | ||||
-rw-r--r-- | pygments/formatters/latex.py | 92 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 4 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 6 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 2 | ||||
-rw-r--r-- | pygments/lexers/jvm.py | 2 | ||||
-rw-r--r-- | pygments/lexers/math.py | 52 | ||||
-rw-r--r-- | pygments/lexers/other.py | 91 | ||||
-rw-r--r-- | pygments/token.py | 2 |
9 files changed, 247 insertions, 14 deletions
diff --git a/pygments/cmdline.py b/pygments/cmdline.py index af8d48ea..a4ed83fa 100644 --- a/pygments/cmdline.py +++ b/pygments/cmdline.py @@ -19,6 +19,7 @@ from pygments import __version__, highlight from pygments.util import ClassNotFound, OptionError, docstring_headline from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \ find_lexer_class, guess_lexer, TextLexer +from pygments.formatters.latex import LatexEmbededLexer, LatexFormatter from pygments.formatters import get_all_formatters, get_formatter_by_name, \ get_formatter_for_filename, find_formatter_class, \ TerminalFormatter # pylint:disable-msg=E0611 @@ -405,6 +406,15 @@ def main(args=sys.argv): else: code = sys.stdin.read() + # When using the LaTeX formatter and the option `escapeinside` is + # specified, we need a special lexer which collects escaped text + # before running the chosen language lexer. + escapeinside = parsed_opts.get('escapeinside', '') + if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): + left = escapeinside[0] + right = escapeinside[1] + lexer = LatexEmbededLexer(left, right, lexer) + # No encoding given? Use latin1 if output file given, # stdin/stdout encoding otherwise. # (This is a compromise, I'm not too happy with it...) diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index 0f2397eb..c70dba38 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -12,12 +12,13 @@ from __future__ import division from pygments.formatter import Formatter +from pygments.lexer import Lexer from pygments.token import Token, STANDARD_TYPES from pygments.util import get_bool_opt, get_int_opt, StringIO, xrange, \ iteritems -__all__ = ['LatexFormatter'] +__all__ = ['LatexEmbededLexer', 'LatexFormatter'] def escape_tex(text, commandprefix): @@ -226,6 +227,15 @@ class LatexFormatter(Formatter): ``False``). .. versionadded:: 1.2 + + `escapeinside` + If set to a string of length 2, enables escaping to LaTeX. Text + delimited by these 2 characters is read as LaTeX code and + typeset accordingly. It has no effect in string literals. It has + no effect in comments if `texcomments` or `mathescape` is + set. (default: ``''``). + + .. versionadded:: 2.0 """ name = 'LaTeX' aliases = ['latex', 'tex'] @@ -243,6 +253,13 @@ class LatexFormatter(Formatter): self.commandprefix = options.get('commandprefix', 'PY') self.texcomments = get_bool_opt(options, 'texcomments', False) self.mathescape = get_bool_opt(options, 'mathescape', False) + self.escapeinside = options.get('escapeinside', '') + + if len(self.escapeinside) == 2: + self.left = self.escapeinside[0] + self.right = self.escapeinside[1] + else: + self.escapeinside = '' self._create_stylesheet() @@ -320,8 +337,8 @@ class LatexFormatter(Formatter): outfile.write(u',numbers=left' + (start and u',firstnumber=%d' % start or u'') + (step and u',stepnumber=%d' % step or u'')) - if self.mathescape or self.texcomments: - outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') + if self.mathescape or self.texcomments or self.escapeinside: + outfile.write(ur',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') if self.verboptions: outfile.write(u',' + self.verboptions) outfile.write(u']\n') @@ -350,9 +367,22 @@ class LatexFormatter(Formatter): parts[i] = escape_tex(part, self.commandprefix) in_math = not in_math value = '$'.join(parts) + elif self.escapeinside: + text = value + value = '' + while len(text) > 0: + a,sep1,text = text.partition(self.left) + if len(sep1) > 0: + b,sep2,text = text.partition(self.right) + if len(sep2) > 0: + value = value + escape_tex(a, self.commandprefix) + b + else: + value = value + escape_tex(a + sep1 + b, self.commandprefix) + else: + value = value + escape_tex(a, self.commandprefix) else: value = escape_tex(value, self.commandprefix) - else: + elif not (ttype in Token.Escape): value = escape_tex(value, self.commandprefix) styles = [] while ttype is not Token: @@ -384,3 +414,57 @@ class LatexFormatter(Formatter): encoding = self.encoding or 'latin1', styledefs = self.get_style_defs(), code = outfile.getvalue())) + + +class LatexEmbededLexer(Lexer): + r""" + + This lexer takes one lexer as argument, the lexer for the language + being formatted, and the left and right delimiters for escaped text. + + First everything is scanned using the language lexer to obtain + strings and comments. All other consecutive tokens are merged and + the resulting text is scanned for escaped segments, which are given + the Token.Escape type. Finally text that is not escaped is scanned + again with the language lexer. + """ + def __init__(self, left, right, lang, **options): + self.left = left + self.right = right + self.lang = lang + Lexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + buf = '' + for i, t, v in self.lang.get_tokens_unprocessed(text): + if t in Token.Comment or t in Token.String: + if buf: + for x in self.get_tokens_aux(idx, buf): + yield x + buf = '' + yield i, t, v + else: + if not buf: + idx = i; + buf += v + if buf: + for x in self.get_tokens_aux(idx, buf): + yield x + + def get_tokens_aux(self, index, text): + while text: + a, sep1, text = text.partition(self.left) + if a: + for i, t, v in self.lang.get_tokens_unprocessed(a): + yield index + i, t, v + index += len(a) + if sep1: + b, sep2, text = text.partition(self.right) + if sep2: + yield index + len(sep1), Token.Escape, b + index += len(sep1) + len(b) + len(sep2) + else: + yield index, Token.Error, sep1 + index += len(sep1) + text = b + diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 43b41c3a..e4b1e38c 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -17,6 +17,7 @@ from __future__ import print_function LEXERS = { 'ABAPLexer': ('pygments.lexers.other', 'ABAP', ('abap',), ('*.abap',), ('text/x-abap',)), + 'APLLexer': ('pygments.lexers.other', 'APL', ('apl',), ('*.apl',), ()), 'ActionScript3Lexer': ('pygments.lexers.web', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')), 'ActionScriptLexer': ('pygments.lexers.web', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3')), 'AdaLexer': ('pygments.lexers.compiled', 'Ada', ('ada', 'ada95ada2005'), ('*.adb', '*.ads', '*.ada'), ('text/x-ada',)), @@ -72,7 +73,7 @@ LEXERS = { 'ColdfusionCFCLexer': ('pygments.lexers.templates', 'Coldfusion CFC', ('cfc',), ('*.cfc',), ()), 'ColdfusionHtmlLexer': ('pygments.lexers.templates', 'Coldfusion HTML', ('cfm',), ('*.cfm', '*.cfml'), ('application/x-coldfusion',)), 'ColdfusionLexer': ('pygments.lexers.templates', 'cfstatement', ('cfs',), (), ()), - 'CommonLispLexer': ('pygments.lexers.functional', 'Common Lisp', ('common-lisp', 'cl', 'lisp'), ('*.cl', '*.lisp', '*.el'), ('text/x-common-lisp',)), + 'CommonLispLexer': ('pygments.lexers.functional', 'Common Lisp', ('common-lisp', 'cl', 'lisp', 'elisp', 'emacs'), ('*.cl', '*.lisp', '*.el'), ('text/x-common-lisp',)), 'CoqLexer': ('pygments.lexers.functional', 'Coq', ('coq',), ('*.v',), ('text/x-coq',)), 'CppLexer': ('pygments.lexers.compiled', 'C++', ('cpp', 'c++'), ('*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx', '*.C', '*.H', '*.cp', '*.CPP'), ('text/x-c++hdr', 'text/x-c++src')), 'CppObjdumpLexer': ('pygments.lexers.asm', 'cpp-objdump', ('cpp-objdump', 'c++-objdumb', 'cxx-objdump'), ('*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'), ('text/x-cpp-objdump',)), @@ -118,6 +119,7 @@ LEXERS = { 'FelixLexer': ('pygments.lexers.compiled', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)), 'FortranLexer': ('pygments.lexers.compiled', 'Fortran', ('fortran',), ('*.f', '*.f90', '*.F', '*.F90'), ('text/x-fortran',)), 'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()), + 'GAPLexer': ('pygments.lexers.math', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()), 'GLShaderLexer': ('pygments.lexers.compiled', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)), 'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)), 'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')), diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 9a242d61..2737859b 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -1423,7 +1423,7 @@ def objective(baselexer): (r'^([-+])(\s*)' # method marker r'(\(.*?\))?(\s*)' # return type r'([a-zA-Z$_][a-zA-Z0-9$_]*:?)', # begin of method name - bygroups(Keyword, Text, using(this), + bygroups(Punctuation, Text, using(this), Text, Name.Function), 'method'), inherit, @@ -1434,8 +1434,8 @@ def objective(baselexer): # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), - (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this), - Name.Variable)), + (r'(\(.*?\))(\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)', + bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function), (';', Punctuation, '#pop'), ('{', Punctuation, 'function'), diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 55ac1937..122114fa 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -723,7 +723,7 @@ class CommonLispLexer(RegexLexer): .. versionadded:: 0.9 """ name = 'Common Lisp' - aliases = ['common-lisp', 'cl', 'lisp'] + aliases = ['common-lisp', 'cl', 'lisp', 'elisp', 'emacs'] filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too mimetypes = ['text/x-common-lisp'] diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 3360ced3..12d9890c 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -66,7 +66,7 @@ class JavaLexer(RegexLexer): (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), - (r'[0-9]+L?', Number.Integer), + (r'[0-9]+(_+[0-9]+)*L?', Number.Integer), (r'\n', Text) ], 'class': [ diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py index 1bce106c..e7a8948b 100644 --- a/pygments/lexers/math.py +++ b/pygments/lexers/math.py @@ -26,7 +26,7 @@ from pygments.lexers import _stan_builtins __all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer', 'RConsoleLexer', 'SLexer', 'JagsLexer', 'BugsLexer', 'StanLexer', - 'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer'] + 'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer', 'GAPLexer'] class JuliaLexer(RegexLexer): @@ -1972,3 +1972,53 @@ class MathematicaLexer(RegexLexer): (r'\s+', Text.Whitespace), ], } + +class GAPLexer(RegexLexer): + """ + For `GAP <http://www.gap-system.org>`_ source code. + + .. versionadded:: 2.0 + """ + name = 'GAP' + aliases = ['gap'] + filenames = ['*.g', '*.gd', '*.gi', '*.gap'] + + tokens = { + 'root' : [ + (r'#.*$', Comment.Single), + (r'"(?:[^"\\]|\\.)*"', String), + (r'\(|\)|\[|\]|\{|\}', Punctuation), + (r'''(?x)\b(?: + if|then|elif|else|fi| + for|while|do|od| + repeat|until| + break|continue| + function|local|return|end| + rec| + quit|QUIT| + IsBound|Unbind| + TryNextMethod| + Info|Assert + )\b''', Keyword), + (r'''(?x)\b(?: + true|false|fail|infinity + )\b''', + Name.Constant), + (r'''(?x)\b(?: + (Declare|Install)([A-Z][A-Za-z]+)| + BindGlobal|BIND_GLOBAL + )\b''', + Name.Builtin), + (r'\.|,|:=|;|=|\+|-|\*|/|\^|>|<', Operator), + (r'''(?x)\b(?: + and|or|not|mod|in + )\b''', + Operator.Word), + (r'''(?x) + (?:[a-zA-Z_0-9]+|`[^`]*`) + (?:::[a-zA-Z_0-9]+|`[^`]*`)*''', Name.Variable), + (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number), + (r'\.[0-9]+(?:e[0-9]+)?', Number), + (r'.', Text) + ] + } diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index d9e7440a..80b06b28 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -36,7 +36,7 @@ __all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'MOOCodeLexer', 'ECLLexer', 'UrbiscriptLexer', 'OpenEdgeLexer', 'BroLexer', 'MscgenLexer', 'KconfigLexer', 'VGLLexer', 'SourcePawnLexer', 'RobotFrameworkLexer', 'PuppetLexer', 'NSISLexer', 'RPMSpecLexer', - 'CbmBasicV2Lexer', 'AutoItLexer', 'RexxLexer'] + 'CbmBasicV2Lexer', 'AutoItLexer', 'RexxLexer', 'APLLexer'] class ECLLexer(RegexLexer): @@ -2528,11 +2528,11 @@ class AwkLexer(RegexLexer): 'root': [ (r'^(?=\s|/)', Text, 'slashstartsregex'), include('commentsandwhitespace'), - (r'\+\+|--|\|\||&&|in|\$|!?~|' + (r'\+\+|--|\|\||&&|in\b|\$|!?~|' r'(\*\*|[-<>+*%\^/!=])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), - (r'(break|continue|do|while|exit|for|if|' + (r'(break|continue|do|while|exit|for|if|else|' r'return)\b', Keyword, 'slashstartsregex'), (r'function\b', Keyword.Declaration, 'slashstartsregex'), (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' @@ -3799,3 +3799,88 @@ class RexxLexer(RegexLexer): for (pattern, weight) in RexxLexer.PATTERNS_AND_WEIGHTS if pattern.search(lowerText)) + 0.01 return min(result, 1.0) + + +class APLLexer(RegexLexer): + """ + A simple APL lexer. + + .. versionadded:: 2.0 + """ + name = 'APL' + aliases = ['apl'] + filenames = ['*.apl'] + + tokens = { + 'root': [ + # Whitespace + # ========== + (r'\s+', Text), + # + # Comment + # ======= + # '⍝' is traditional; '#' is supported by GNU APL and NGN (but not Dyalog) + (u'[⍝#].*$', Comment.Single), + # + # Strings + # ======= + (r'\'((\'\')|[^\'])*\'', String.Single), + (r'"(("")|[^"])*"', String.Double), # supported by NGN APL + # + # Punctuation + # =========== + # This token type is used for diamond and parenthesis + # but not for bracket and ; (see below) + (u'[⋄◇()]', Punctuation), + # + # Array indexing + # ============== + # Since this token type is very important in APL, it is not included in + # the punctuation token type but rather in the following one + (r'[\[\];]', String.Regex), + # + # Distinguished names + # =================== + # following IBM APL2 standard + (u'⎕[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Function), + # + # Labels + # ====== + # following IBM APL2 standard + # (u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*:', Name.Label), + # + # Variables + # ========= + # following IBM APL2 standard + (u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Variable), + # + # Numbers + # ======= + (u'¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞)' + u'([Jj]¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞))?', + Number), + # + # Operators + # ========== + (u'[\.\\\/⌿⍀¨⍣⍨⍠⍤∘]', Name.Attribute), # closest token type + (u'[+\-×÷⌈⌊∣|⍳?*⍟○!⌹<≤=>≥≠≡≢∊⍷∪∩~∨∧⍱⍲⍴,⍪⌽⊖⍉↑↓⊂⊃⌷⍋⍒⊤⊥⍕⍎⊣⊢⍁⍂≈⌸⍯↗]', + Operator), + # + # Constant + # ======== + (u'⍬', Name.Constant), + # + # Quad symbol + # =========== + (u'[⎕⍞]', Name.Variable.Global), + # + # Arrows left/right + # ================= + (u'[←→]', Keyword.Declaration), + # + # D-Fn + # ==== + (u'[⍺⍵⍶⍹∇:]', Name.Builtin.Pseudo), + (r'[{}]', Keyword.Type), + ], + } diff --git a/pygments/token.py b/pygments/token.py index f6c3066d..c40ffd33 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -49,6 +49,7 @@ Token = _TokenType() # Special token types Text = Token.Text Whitespace = Text.Whitespace +Escape = Token.Escape Error = Token.Error # Text that doesn't belong to this lexer (e.g. HTML in PHP) Other = Token.Other @@ -116,6 +117,7 @@ STANDARD_TYPES = { Text: '', Whitespace: 'w', + Escape: 'esc', Error: 'err', Other: 'x', |