summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES1
-rw-r--r--pygments/lexers/jvm.py217
-rw-r--r--pygments/lexers/math.py99
-rw-r--r--pygments/lexers/python.py97
4 files changed, 209 insertions, 205 deletions
diff --git a/CHANGES b/CHANGES
index b6b80f1d..057835b3 100644
--- a/CHANGES
+++ b/CHANGES
@@ -46,6 +46,7 @@ Version 2.0
* Alloy (PR#355)
* Docker config files
* Todo.txt todo lists
+ * Pig (PR#304)
- Added a helper to "optimize" regular expressions that match one of many
literal words; this can save 20% and more lexing time with lexers that
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index d422f5fd..19f0ded9 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -12,12 +12,12 @@
import re
from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
- this, combined, default
+ this, combined, default, words
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation
+ Number, Punctuation
+from pygments.util import shebang_matches
from pygments import unistring as uni
-
__all__ = ['JavaLexer', 'ScalaLexer', 'GosuLexer', 'GosuTemplateLexer',
'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'ClojureScriptLexer',
'KotlinLexer', 'XtendLexer', 'AspectJLexer', 'CeylonLexer',
@@ -42,9 +42,9 @@ class JavaLexer(RegexLexer):
(r'//.*?\n', Comment.Single),
(r'/\*.*?\*/', Comment.Multiline),
# method names
- (r'((?:(?:[^\W\d]|\$)[\w\.\[\]\$<>]*\s+)+?)' # return arguments
- r'((?:[^\W\d]|\$)[\w\$]*)' # method name
- r'(\s*)(\()', # signature start
+ (r'((?:(?:[^\W\d]|\$)[\w\.\[\]\$<>]*\s+)+?)' # return arguments
+ r'((?:[^\W\d]|\$)[\w\$]*)' # method name
+ r'(\s*)(\()', # signature start
bygroups(using(this), Name.Function, Text, Operator)),
(r'@[^\W\d][\w\.]*', Name.Decorator),
(r'(assert|break|case|catch|continue|default|do|else|finally|for|'
@@ -91,7 +91,7 @@ class AspectJLexer(JavaLexer):
filenames = ['*.aj']
mimetypes = ['text/x-aspectj']
- aj_keywords = [
+ aj_keywords = set((
'aspect', 'pointcut', 'privileged', 'call', 'execution',
'initialization', 'preinitialization', 'handler', 'get', 'set',
'staticinitialization', 'target', 'args', 'within', 'withincode',
@@ -101,9 +101,9 @@ class AspectJLexer(JavaLexer):
'thisJoinPointStaticPart', 'thisEnclosingJoinPointStaticPart',
'issingleton', 'perthis', 'pertarget', 'percflow', 'percflowbelow',
'pertypewithin', 'lock', 'unlock', 'thisAspectInstance'
- ]
- aj_inter_type = ['parents:', 'warning:', 'error:', 'soft:', 'precedence:']
- aj_inter_type_annotation = ['@type', '@method', '@constructor', '@field']
+ ))
+ aj_inter_type = set(('parents:', 'warning:', 'error:', 'soft:', 'precedence:'))
+ aj_inter_type_annotation = set(('@type', '@method', '@constructor', '@field'))
def get_tokens_unprocessed(self, text):
for index, token, value in JavaLexer.get_tokens_unprocessed(self, text):
@@ -268,8 +268,8 @@ class ScalaLexer(RegexLexer):
(r'""".*?"""(?!")', String),
(r'"(\\\\|\\"|[^"])*"', String),
(r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char),
-# (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator,
-# Name.Attribute)),
+ # (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator,
+ # Name.Attribute)),
(idrest, Name),
(r'`[^`]+`', Name),
(r'\[', Operator, 'typeparam'),
@@ -340,9 +340,9 @@ class GosuLexer(RegexLexer):
tokens = {
'root': [
# method names
- (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # modifiers etc.
- r'([a-zA-Z_]\w*)' # method name
- r'(\s*)(\()', # signature start
+ (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # modifiers etc.
+ r'([a-zA-Z_]\w*)' # method name
+ r'(\s*)(\()', # signature start
bygroups(using(this), Name.Function, Text, Operator)),
(r'[^\S\n]+', Text),
(r'//.*?\n', Comment.Single),
@@ -376,28 +376,28 @@ class GosuLexer(RegexLexer):
(r'\n', Text)
],
'templateText': [
- (r'(\\<)|(\\\$)', String),
- (r'(<%@\s+)(extends|params)',
- bygroups(Operator, Name.Decorator), 'stringTemplate'),
- (r'<%!--.*?--%>', Comment.Multiline),
- (r'(<%)|(<%=)', Operator, 'stringTemplate'),
- (r'\$\{', Operator, 'stringTemplateShorthand'),
- (r'.', String)
+ (r'(\\<)|(\\\$)', String),
+ (r'(<%@\s+)(extends|params)',
+ bygroups(Operator, Name.Decorator), 'stringTemplate'),
+ (r'<%!--.*?--%>', Comment.Multiline),
+ (r'(<%)|(<%=)', Operator, 'stringTemplate'),
+ (r'\$\{', Operator, 'stringTemplateShorthand'),
+ (r'.', String)
],
'string': [
- (r'"', String, '#pop'),
- include('templateText')
+ (r'"', String, '#pop'),
+ include('templateText')
],
'stringTemplate': [
- (r'"', String, 'string'),
- (r'%>', Operator, '#pop'),
- include('root')
+ (r'"', String, 'string'),
+ (r'%>', Operator, '#pop'),
+ include('root')
],
'stringTemplateShorthand': [
- (r'"', String, 'string'),
- (r'\{', Operator, 'stringTemplateShorthand'),
- (r'\}', Operator, '#pop'),
- include('root')
+ (r'"', String, 'string'),
+ (r'\{', Operator, 'stringTemplateShorthand'),
+ (r'\}', Operator, '#pop'),
+ include('root')
],
}
@@ -443,9 +443,9 @@ class GroovyLexer(RegexLexer):
],
'base': [
# method names
- (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
- r'([a-zA-Z_]\w*)' # method name
- r'(\s*)(\()', # signature start
+ (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
+ r'([a-zA-Z_]\w*)' # method name
+ r'(\s*)(\()', # signature start
bygroups(using(this), Name.Function, Text, Operator)),
(r'[^\S\n]+', Text),
(r'//.*?\n', Comment.Single),
@@ -506,44 +506,44 @@ class IokeLexer(RegexLexer):
(r'(\\b|\\e|\\t|\\n|\\f|\\r|\\"|\\\\|\\#|\\\Z|\\u[0-9a-fA-F]{1,4}'
r'|\\[0-3]?[0-7]?[0-7])', String.Escape),
(r'#{', Punctuation, 'textInterpolationRoot')
- ],
+ ],
'text': [
(r'(?<!\\)"', String, '#pop'),
include('interpolatableText'),
(r'[^"]', String)
- ],
+ ],
'documentation': [
(r'(?<!\\)"', String.Doc, '#pop'),
include('interpolatableText'),
(r'[^"]', String.Doc)
- ],
+ ],
'textInterpolationRoot': [
(r'}', Punctuation, '#pop'),
include('root')
- ],
+ ],
'slashRegexp': [
(r'(?<!\\)/[oxpniums]*', String.Regex, '#pop'),
include('interpolatableText'),
(r'\\/', String.Regex),
(r'[^/]', String.Regex)
- ],
+ ],
'squareRegexp': [
(r'(?<!\\)][oxpniums]*', String.Regex, '#pop'),
include('interpolatableText'),
(r'\\]', String.Regex),
(r'[^\]]', String.Regex)
- ],
+ ],
'squareText': [
(r'(?<!\\)]', String, '#pop'),
include('interpolatableText'),
(r'[^\]]', String)
- ],
+ ],
'root': [
(r'\n', Text),
@@ -553,28 +553,28 @@ class IokeLexer(RegexLexer):
(r';(.*?)\n', Comment),
(r'\A#!(.*?)\n', Comment),
- #Regexps
+ # Regexps
(r'#/', String.Regex, 'slashRegexp'),
(r'#r\[', String.Regex, 'squareRegexp'),
- #Symbols
+ # Symbols
(r':[\w!:?]+', String.Symbol),
(r'[\w!:?]+:(?![\w!?])', String.Other),
(r':"(\\\\|\\"|[^"])*"', String.Symbol),
- #Documentation
+ # Documentation
(r'((?<=fn\()|(?<=fnx\()|(?<=method\()|(?<=macro\()|(?<=lecro\()'
r'|(?<=syntax\()|(?<=dmacro\()|(?<=dlecro\()|(?<=dlecrox\()'
r'|(?<=dsyntax\())\s*"', String.Doc, 'documentation'),
- #Text
+ # Text
(r'"', String, 'text'),
(r'#\[', String, 'squareText'),
- #Mimic
+ # Mimic
(r'\w[a-zA-Z0-9!?_:]+(?=\s*=.*mimic\s)', Name.Entity),
- #Assignment
+ # Assignment
(r'[a-zA-Z_][\w!:?]*(?=[\s]*[+*/-]?=[^=].*($|\.))',
Name.Variable),
@@ -594,20 +594,20 @@ class IokeLexer(RegexLexer):
# Ground
(r'(stackTraceAsText)(?![a-zA-Z0-9!:_?])', Keyword),
- #DefaultBehaviour Literals
+ # DefaultBehaviour Literals
(r'(dict|list|message|set)(?![a-zA-Z0-9!:_?])', Keyword.Reserved),
- #DefaultBehaviour Case
+ # DefaultBehaviour Case
(r'(case|case:and|case:else|case:nand|case:nor|case:not|case:or|'
r'case:otherwise|case:xor)(?![a-zA-Z0-9!:_?])', Keyword.Reserved),
- #DefaultBehaviour Reflection
+ # DefaultBehaviour Reflection
(r'(asText|become\!|derive|freeze\!|frozen\?|in\?|is\?|kind\?|'
r'mimic\!|mimics|mimics\?|prependMimic\!|removeAllMimics\!|'
r'removeMimic\!|same\?|send|thaw\!|uniqueHexId)'
r'(?![a-zA-Z0-9!:_?])', Keyword),
- #DefaultBehaviour Aspects
+ # DefaultBehaviour Aspects
(r'(after|around|before)(?![a-zA-Z0-9!:_?])', Keyword.Reserved),
# DefaultBehaviour
@@ -615,18 +615,18 @@ class IokeLexer(RegexLexer):
r'(?![a-zA-Z0-9!:_?])', Keyword),
(r'(use|destructuring)', Keyword.Reserved),
- #DefaultBehavior BaseBehavior
+ # DefaultBehavior BaseBehavior
(r'(cell\?|cellOwner\?|cellOwner|cellNames|cells|cell|'
r'documentation|identity|removeCell!|undefineCell)'
r'(?![a-zA-Z0-9!:_?])', Keyword),
- #DefaultBehavior Internal
+ # DefaultBehavior Internal
(r'(internal:compositeRegexp|internal:concatenateText|'
r'internal:createDecimal|internal:createNumber|'
r'internal:createRegexp|internal:createText)'
r'(?![a-zA-Z0-9!:_?])', Keyword.Reserved),
- #DefaultBehaviour Conditions
+ # DefaultBehaviour Conditions
(r'(availableRestarts|bind|error\!|findRestart|handle|'
r'invokeRestart|rescue|restart|signal\!|warn\!)'
r'(?![a-zA-Z0-9!:_?])', Keyword.Reserved),
@@ -658,7 +658,7 @@ class IokeLexer(RegexLexer):
(r'#\(', Punctuation),
- # Operators
+ # Operators
(r'(&&>>|\|\|>>|\*\*>>|:::|::|\.\.\.|===|\*\*>|\*\*=|&&>|&&=|'
r'\|\|>|\|\|=|\->>|\+>>|!>>|<>>>|<>>|&>>|%>>|#>>|@>>|/>>|\*>>|'
r'\?>>|\|>>|\^>>|~>>|\$>>|=>>|<<=|>>=|<=>|<\->|=~|!~|=>|\+\+|'
@@ -672,10 +672,10 @@ class IokeLexer(RegexLexer):
# Punctuation
(r'(\`\`|\`|\'\'|\'|\.|\,|@@|@|\[|\]|\(|\)|{|})', Punctuation),
- #kinds
+ # kinds
(r'[A-Z][\w!:?]*', Name.Class),
- #default cellnames
+ # default cellnames
(r'[a-z_][\w!:?]*', Name)
]
}
@@ -692,19 +692,19 @@ class ClojureLexer(RegexLexer):
filenames = ['*.clj']
mimetypes = ['text/x-clojure', 'application/x-clojure']
- special_forms = [
+ special_forms = (
'.', 'def', 'do', 'fn', 'if', 'let', 'new', 'quote', 'var', 'loop'
- ]
+ )
# It's safe to consider 'ns' a declaration thing because it defines a new
# namespace.
- declarations = [
+ declarations = (
'def-', 'defn', 'defn-', 'defmacro', 'defmulti', 'defmethod',
'defstruct', 'defonce', 'declare', 'definline', 'definterface',
'defprotocol', 'defrecord', 'deftype', 'defproject', 'ns'
- ]
+ )
- builtins = [
+ builtins = (
'*', '+', '-', '->', '/', '<', '<=', '=', '==', '>', '>=', '..',
'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
@@ -753,7 +753,7 @@ class ClojureLexer(RegexLexer):
'val', 'vals', 'var-get', 'var-set', 'var?', 'vector', 'vector-zip',
'vector?', 'when', 'when-first', 'when-let', 'when-not',
'with-local-vars', 'with-meta', 'with-open', 'with-out-str',
- 'xml-seq', 'xml-zip', 'zero?', 'zipmap', 'zipper']
+ 'xml-seq', 'xml-zip', 'zero?', 'zipmap', 'zipper')
# valid names for identifiers
# well, names can only not consist fully of numbers
@@ -763,9 +763,6 @@ class ClojureLexer(RegexLexer):
# but that's hard, so just pretend / is part of the name
valid_name = r'(?!#)[\w!$%*+<=>?/.#-]+'
- def _multi_escape(entries):
- return '(%s)' % ('|'.join(re.escape(entry) + ' ' for entry in entries))
-
tokens = {
'root': [
# the comments - always starting with semicolon
@@ -792,17 +789,17 @@ class ClojureLexer(RegexLexer):
(r'~@|[`\'#^~&@]', Operator),
# highlight the special forms
- (_multi_escape(special_forms), Keyword),
+ (words(special_forms, suffix=' '), Keyword),
# Technically, only the special forms are 'keywords'. The problem
# is that only treating them as keywords means that things like
# 'defn' and 'ns' need to be highlighted as builtins. This is ugly
# and weird for most styles. So, as a compromise we're going to
# highlight them as Keyword.Declarations.
- (_multi_escape(declarations), Keyword.Declaration),
+ (words(declarations, suffix=' '), Keyword.Declaration),
# highlight the builtins
- (_multi_escape(builtins), Name.Builtin),
+ (words(builtins, suffix=' '), Name.Builtin),
# the remaining functions
(r'(?<=\()' + valid_name, Name.Function),
@@ -848,9 +845,9 @@ class TeaLangLexer(RegexLexer):
tokens = {
'root': [
# method names
- (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
- r'([a-zA-Z_]\w*)' # method name
- r'(\s*)(\()', # signature start
+ (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
+ r'([a-zA-Z_]\w*)' # method name
+ r'(\s*)(\()', # signature start
bygroups(using(this), Name.Function, Text, Operator)),
(r'[^\S\n]+', Text),
(r'//.*?\n', Comment.Single),
@@ -902,9 +899,9 @@ class CeylonLexer(RegexLexer):
tokens = {
'root': [
# method names
- (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
- r'([a-zA-Z_]\w*)' # method name
- r'(\s*)(\()', # signature start
+ (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
+ r'([a-zA-Z_]\w*)' # method name
+ r'(\s*)(\()', # signature start
bygroups(using(this), Name.Function, Text, Operator)),
(r'[^\S\n]+', Text),
(r'//.*?\n', Comment.Single),
@@ -986,7 +983,7 @@ class KotlinLexer(RegexLexer):
'root': [
(r'^\s*\[.*?\]', Name.Attribute),
(r'[^\S\n]+', Text),
- (r'\\\n', Text), # line continuation
+ (r'\\\n', Text), # line continuation
(r'//.*?\n', Comment.Single),
(r'/[*].*?[*]/', Comment.Multiline),
(r'\n', Text),
@@ -1042,9 +1039,9 @@ class XtendLexer(RegexLexer):
tokens = {
'root': [
# method names
- (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
- r'([a-zA-Z_$][\w$]*)' # method name
- r'(\s*)(\()', # signature start
+ (r'^(\s*(?:[a-zA-Z_][\w\.\[\]]*\s+)+?)' # return arguments
+ r'([a-zA-Z_$][\w$]*)' # method name
+ r'(\s*)(\()', # signature start
bygroups(using(this), Name.Function, Text, Operator)),
(r'[^\S\n]+', Text),
(r'//.*?\n', Comment.Single),
@@ -1089,6 +1086,7 @@ class XtendLexer(RegexLexer):
],
}
+
class PigLexer(RegexLexer):
"""
For `Pig Latin <https://pig.apache.org/>`_ source code.
@@ -1124,7 +1122,7 @@ class PigLexer(RegexLexer):
bygroups(Name.Function, Text, Punctuation)),
(r'[()#:]', Text),
(r'[^(:#\'\")\s]+', Text),
- (r'\S+\s+', Text) # TODO: make tests pass without \s+
+ (r'\S+\s+', Text) # TODO: make tests pass without \s+
],
'keywords': [
(r'(assert|and|any|all|arrange|as|asc|bag|by|cache|CASE|cat|cd|cp|'
@@ -1328,41 +1326,44 @@ class JasminLexer(RegexLexer):
(r'method%s' % _break, Keyword.Reserved, 'enclosing-method'),
# Instructions
- (r'(aaload|aastore|aconst_null|aload|aload_0|aload_1|aload_2|'
- r'aload_3|aload_w|areturn|arraylength|astore|astore_0|astore_1|'
- r'astore_2|astore_3|astore_w|athrow|baload|bastore|bipush|'
- r'breakpoint|caload|castore|d2f|d2i|d2l|dadd|daload|dastore|'
- r'dcmpg|dcmpl|dconst_0|dconst_1|ddiv|dload|dload_0|dload_1|'
- r'dload_2|dload_3|dload_w|dmul|dneg|drem|dreturn|dstore|dstore_0|'
- r'dstore_1|dstore_2|dstore_3|dstore_w|dsub|dup|dup2|dup2_x1|'
- r'dup2_x2|dup_x1|dup_x2|f2d|f2i|f2l|fadd|faload|fastore|fcmpg|'
- r'fcmpl|fconst_0|fconst_1|fconst_2|fdiv|fload|fload_0|fload_1|'
- r'fload_2|fload_3|fload_w|fmul|fneg|frem|freturn|fstore|fstore_0|'
- r'fstore_1|fstore_2|fstore_3|fstore_w|fsub|i2b|i2c|i2d|i2f|i2l|'
- r'i2s|iadd|iaload|iand|iastore|iconst_0|iconst_1|iconst_2|'
- r'iconst_3|iconst_4|iconst_5|iconst_m1|idiv|iinc|iinc_w|iload|'
- r'iload_0|iload_1|iload_2|iload_3|iload_w|imul|ineg|int2byte|'
- r'int2char|int2short|ior|irem|ireturn|ishl|ishr|istore|istore_0|'
- r'istore_1|istore_2|istore_3|istore_w|isub|iushr|ixor|l2d|l2f|'
- r'l2i|ladd|laload|land|lastore|lcmp|lconst_0|lconst_1|ldc2_w|'
- r'ldiv|lload|lload_0|lload_1|lload_2|lload_3|lload_w|lmul|lneg|'
- r'lookupswitch|lor|lrem|lreturn|lshl|lshr|lstore|lstore_0|'
- r'lstore_1|lstore_2|lstore_3|lstore_w|lsub|lushr|lxor|'
- r'monitorenter|monitorexit|nop|pop|pop2|ret|ret_w|return|saload|'
- r'sastore|sipush|swap)%s' % _break, Keyword.Reserved),
+ (words((
+ 'aaload', 'aastore', 'aconst_null', 'aload', 'aload_0', 'aload_1', 'aload_2',
+ 'aload_3', 'aload_w', 'areturn', 'arraylength', 'astore', 'astore_0', 'astore_1',
+ 'astore_2', 'astore_3', 'astore_w', 'athrow', 'baload', 'bastore', 'bipush',
+ 'breakpoint', 'caload', 'castore', 'd2f', 'd2i', 'd2l', 'dadd', 'daload', 'dastore',
+ 'dcmpg', 'dcmpl', 'dconst_0', 'dconst_1', 'ddiv', 'dload', 'dload_0', 'dload_1',
+ 'dload_2', 'dload_3', 'dload_w', 'dmul', 'dneg', 'drem', 'dreturn', 'dstore', 'dstore_0',
+ 'dstore_1', 'dstore_2', 'dstore_3', 'dstore_w', 'dsub', 'dup', 'dup2', 'dup2_x1',
+ 'dup2_x2', 'dup_x1', 'dup_x2', 'f2d', 'f2i', 'f2l', 'fadd', 'faload', 'fastore', 'fcmpg',
+ 'fcmpl', 'fconst_0', 'fconst_1', 'fconst_2', 'fdiv', 'fload', 'fload_0', 'fload_1',
+ 'fload_2', 'fload_3', 'fload_w', 'fmul', 'fneg', 'frem', 'freturn', 'fstore', 'fstore_0',
+ 'fstore_1', 'fstore_2', 'fstore_3', 'fstore_w', 'fsub', 'i2b', 'i2c', 'i2d', 'i2f', 'i2l',
+ 'i2s', 'iadd', 'iaload', 'iand', 'iastore', 'iconst_0', 'iconst_1', 'iconst_2',
+ 'iconst_3', 'iconst_4', 'iconst_5', 'iconst_m1', 'idiv', 'iinc', 'iinc_w', 'iload',
+ 'iload_0', 'iload_1', 'iload_2', 'iload_3', 'iload_w', 'imul', 'ineg', 'int2byte',
+ 'int2char', 'int2short', 'ior', 'irem', 'ireturn', 'ishl', 'ishr', 'istore', 'istore_0',
+ 'istore_1', 'istore_2', 'istore_3', 'istore_w', 'isub', 'iushr', 'ixor', 'l2d', 'l2f',
+ 'l2i', 'ladd', 'laload', 'land', 'lastore', 'lcmp', 'lconst_0', 'lconst_1', 'ldc2_w',
+ 'ldiv', 'lload', 'lload_0', 'lload_1', 'lload_2', 'lload_3', 'lload_w', 'lmul', 'lneg',
+ 'lookupswitch', 'lor', 'lrem', 'lreturn', 'lshl', 'lshr', 'lstore', 'lstore_0',
+ 'lstore_1', 'lstore_2', 'lstore_3', 'lstore_w', 'lsub', 'lushr', 'lxor',
+ 'monitorenter', 'monitorexit', 'nop', 'pop', 'pop2', 'ret', 'ret_w', 'return', 'saload',
+ 'sastore', 'sipush', 'swap'), suffix=_break), Keyword.Reserved),
(r'(anewarray|checkcast|instanceof|ldc|ldc_w|new)%s' % _break,
Keyword.Reserved, 'class/no-dots'),
- (r'(invokedynamic|invokeinterface|invokenonvirtual|invokespecial|'
- r'invokestatic|invokevirtual)%s' % _break, Keyword.Reserved,
+ (r'invoke(dynamic|interface|nonvirtual|special|'
+ r'static|virtual)%s' % _break, Keyword.Reserved,
'invocation'),
(r'(getfield|putfield)%s' % _break, Keyword.Reserved,
('descriptor/no-dots', 'field')),
(r'(getstatic|putstatic)%s' % _break, Keyword.Reserved,
('descriptor/no-dots', 'static')),
- (r'(goto|goto_w|if_acmpeq|if_acmpne|if_icmpeq|if_icmpge|if_icmpgt|'
- r'if_icmple|if_icmplt|if_icmpne|ifeq|ifge|ifgt|ifle|iflt|ifne|'
- r'ifnonnull|ifnull|jsr|jsr_w)%s' % _break, Keyword.Reserved,
- 'label'),
+ (words((
+ 'goto', 'goto_w', 'if_acmpeq', 'if_acmpne', 'if_icmpeq',
+ 'if_icmpge', 'if_icmpgt', 'if_icmple', 'if_icmplt', 'if_icmpne',
+ 'ifeq', 'ifge', 'ifgt', 'ifle', 'iflt', 'ifne', 'ifnonnull',
+ 'ifnull', 'jsr', 'jsr_w'), suffix=_break),
+ Keyword.Reserved, 'label'),
(r'(multianewarray|newarray)%s' % _break, Keyword.Reserved,
'descriptor/convert-dots'),
(r'tableswitch%s' % _break, Keyword.Reserved, 'table')
diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py
index 01ffc84d..c287a717 100644
--- a/pygments/lexers/math.py
+++ b/pygments/lexers/math.py
@@ -19,10 +19,12 @@ from pygments.lexer import Lexer, RegexLexer, bygroups, include, \
from pygments.token import Comment, String, Punctuation, Keyword, Name, \
Operator, Number, Text, Generic
-from pygments.lexers.python import PythonLexer
from pygments.lexers import _scilab_builtins
from pygments.lexers import _stan_builtins
+
+from pygments.lexers.python import NumPyLexer
+
__all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer',
'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer',
'RConsoleLexer', 'SLexer', 'JagsLexer', 'BugsLexer', 'StanLexer',
@@ -909,101 +911,6 @@ class ScilabLexer(RegexLexer):
}
-class NumPyLexer(PythonLexer):
- """
- A Python lexer recognizing Numerical Python builtins.
-
- .. versionadded:: 0.10
- """
-
- name = 'NumPy'
- aliases = ['numpy']
-
- # override the mimetypes to not inherit them from python
- mimetypes = []
- filenames = []
-
- EXTRA_KEYWORDS = set((
- 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
- 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
- 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
- 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
- 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
- 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
- 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
- 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
- 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
- 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
- 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
- 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
- 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
- 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
- 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
- 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
- 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
- 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
- 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
- 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
- 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
- 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
- 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
- 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
- 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
- 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
- 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
- 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
- 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
- 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
- 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
- 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
- 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
- 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
- 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
- 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
- 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
- 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
- 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
- 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
- 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
- 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
- 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
- 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
- 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
- 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
- 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
- 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
- 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
- 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
- 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
- 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
- 'set_numeric_ops', 'set_printoptions', 'set_string_function',
- 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
- 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
- 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
- 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
- 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
- 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
- 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
- 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
- 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
- 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
- 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
- ))
-
- def get_tokens_unprocessed(self, text):
- for index, token, value in \
- PythonLexer.get_tokens_unprocessed(self, text):
- if token is Name and value in self.EXTRA_KEYWORDS:
- yield index, Keyword.Pseudo, value
- else:
- yield index, token, value
-
- def analyse_text(text):
- return (shebang_matches(text, r'pythonw?(2(\.\d)?)?') or
- 'import ' in text[:1000]) \
- and ('import numpy' in text or 'from numpy import' in text)
-
-
class RConsoleLexer(Lexer):
"""
For R console transcripts or R CMD BATCH output files.
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index db747d2e..6cd0a6c8 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -20,7 +20,7 @@ from pygments import unistring as uni
__all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
'Python3Lexer', 'Python3TracebackLexer', 'CythonLexer',
- 'DgLexer']
+ 'DgLexer', 'NumPyLexer']
line_re = re.compile('.*?\n')
@@ -733,3 +733,98 @@ class DgLexer(RegexLexer):
(r"'''", String, '#pop')
],
}
+
+
+class NumPyLexer(PythonLexer):
+ """
+ A Python lexer recognizing Numerical Python builtins.
+
+ .. versionadded:: 0.10
+ """
+
+ name = 'NumPy'
+ aliases = ['numpy']
+
+ # override the mimetypes to not inherit them from python
+ mimetypes = []
+ filenames = []
+
+ EXTRA_KEYWORDS = set((
+ 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
+ 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
+ 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
+ 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
+ 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
+ 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
+ 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
+ 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
+ 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
+ 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
+ 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
+ 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
+ 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
+ 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
+ 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
+ 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
+ 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
+ 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
+ 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
+ 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
+ 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
+ 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
+ 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
+ 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
+ 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
+ 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
+ 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
+ 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
+ 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
+ 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
+ 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
+ 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
+ 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
+ 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
+ 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
+ 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
+ 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
+ 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
+ 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
+ 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
+ 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
+ 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
+ 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
+ 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
+ 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
+ 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
+ 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
+ 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
+ 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
+ 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
+ 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
+ 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
+ 'set_numeric_ops', 'set_printoptions', 'set_string_function',
+ 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
+ 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
+ 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
+ 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
+ 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
+ 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
+ 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
+ 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
+ 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
+ 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
+ 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
+ ))
+
+ def get_tokens_unprocessed(self, text):
+ for index, token, value in \
+ PythonLexer.get_tokens_unprocessed(self, text):
+ if token is Name and value in self.EXTRA_KEYWORDS:
+ yield index, Keyword.Pseudo, value
+ else:
+ yield index, token, value
+
+ def analyse_text(text):
+ return (shebang_matches(text, r'pythonw?(2(\.\d)?)?') or
+ 'import ' in text[:1000]) \
+ and ('import numpy' in text or 'from numpy import' in text)