diff options
author | Tim Hatch <tim@timhatch.com> | 2012-11-12 13:56:16 -0800 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2012-11-12 13:56:16 -0800 |
commit | 5a138b415f212bc91bf8d9b99ef366618e251559 (patch) | |
tree | 8c27598b0f5b89ec870ade4bf79f38d3861c7ae0 | |
parent | e2bfc76c1dc5fec4e290612f76b50a6707505b91 (diff) | |
parent | abebcea4cfe6c3b78b0a86d311c323cd1414910e (diff) | |
download | pygments-5a138b415f212bc91bf8d9b99ef366618e251559.tar.gz |
Merged in tomstuart/pygments-main (pull request #125)
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | CHANGES | 4 | ||||
-rw-r--r-- | pygments/filters/__init__.py | 1 | ||||
-rw-r--r-- | pygments/formatters/other.py | 2 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 6 | ||||
-rw-r--r-- | pygments/lexers/_postgres_builtins.py | 1 | ||||
-rw-r--r-- | pygments/lexers/agile.py | 104 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 135 | ||||
-rw-r--r-- | pygments/lexers/dalvik.py | 104 | ||||
-rw-r--r-- | pygments/lexers/jvm.py | 22 | ||||
-rw-r--r-- | pygments/lexers/text.py | 47 | ||||
-rw-r--r-- | tests/examplefiles/example.monkey | 152 | ||||
-rw-r--r-- | tests/examplefiles/example.reg | 19 | ||||
-rw-r--r-- | tests/examplefiles/hello.smali | 40 | ||||
-rw-r--r-- | tests/examplefiles/inet_pton6.dg | 71 | ||||
-rw-r--r-- | tests/examplefiles/pytb_test3.pytb | 4 |
16 files changed, 688 insertions, 25 deletions
@@ -63,6 +63,7 @@ Other contributors, listed alphabetically, are: * Marek Kubica -- Scheme lexer * Jochen Kupperschmidt -- Markdown processor * Gerd Kurzbach -- Modelica lexer +* Jon Larimer, Google Inc. -- Smali lexer * Olov Lassus -- Dart lexer * Sylvestre Ledru -- Scilab lexer * Mark Lee -- Vala lexer @@ -26,11 +26,15 @@ Version 1.6 * VGL (PR#12) * SourcePawn (PR#39) * Ceylon (PR#86) + * Smali (Dalvik assembly) + * Windows Registry (#819) - Fix Template Haskell highlighting (PR#63) - Fix some S/R lexer errors (PR#91) +- Fix a bug in the Prolog lexer with names that start with 'is' (#810) + Version 1.5 ----------- diff --git a/pygments/filters/__init__.py b/pygments/filters/__init__.py index b816c58c..93e97563 100644 --- a/pygments/filters/__init__.py +++ b/pygments/filters/__init__.py @@ -331,7 +331,6 @@ class TokenMergeFilter(Filter): Filter.__init__(self, **options) def filter(self, lexer, stream): - output = [] current_type = None current_value = None for ttype, value in stream: diff --git a/pygments/formatters/other.py b/pygments/formatters/other.py index a19e4ce0..4b6d4c38 100644 --- a/pygments/formatters/other.py +++ b/pygments/formatters/other.py @@ -102,8 +102,6 @@ class RawTokenFormatter(Formatter): outfile.write(text.encode()) flush = outfile.flush - lasttype = None - lastval = u'' if self.error_color: for ttype, value in tokensource: line = "%s\t%r\n" % (ttype, value) diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index e3c45a47..2663b027 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -62,7 +62,7 @@ LEXERS = { 'ColdfusionLexer': ('pygments.lexers.templates', 'cfstatement', ('cfs',), (), ()), 'CommonLispLexer': ('pygments.lexers.functional', 'Common Lisp', ('common-lisp', 'cl'), ('*.cl', '*.lisp', '*.el'), ('text/x-common-lisp',)), 'CoqLexer': ('pygments.lexers.functional', 'Coq', ('coq',), ('*.v',), ('text/x-coq',)), - 'CppLexer': ('pygments.lexers.compiled', 'C++', ('cpp', 'c++'), ('*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx'), ('text/x-c++hdr', 'text/x-c++src')), + 'CppLexer': ('pygments.lexers.compiled', 'C++', ('cpp', 'c++'), ('*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx', '*.C', '*.H', '*.cp', '*.CPP'), ('text/x-c++hdr', 'text/x-c++src')), 'CppObjdumpLexer': ('pygments.lexers.asm', 'cpp-objdump', ('cpp-objdump', 'c++-objdumb', 'cxx-objdump'), ('*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'), ('text/x-cpp-objdump',)), 'CrocLexer': ('pygments.lexers.agile', 'Croc', ('croc',), ('*.croc',), ('text/x-crocsrc',)), 'CssDjangoLexer': ('pygments.lexers.templates', 'CSS+Django/Jinja', ('css+django', 'css+jinja'), (), ('text/css+django', 'text/css+jinja')), @@ -78,6 +78,7 @@ LEXERS = { 'DartLexer': ('pygments.lexers.web', 'Dart', ('dart',), ('*.dart',), ('text/x-dart',)), 'DebianControlLexer': ('pygments.lexers.text', 'Debian Control file', ('control',), ('control',), ()), 'DelphiLexer': ('pygments.lexers.compiled', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas',), ('text/x-pascal',)), + 'DgLexer': ('pygments.lexers.agile', 'dg', ('dg',), ('*.dg',), ('text/x-dg',)), 'DiffLexer': ('pygments.lexers.text', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')), 'DjangoLexer': ('pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), (), ('application/x-django-templating', 'application/x-jinja')), 'DtdLexer': ('pygments.lexers.web', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)), @@ -168,6 +169,7 @@ LEXERS = { 'ModelicaLexer': ('pygments.lexers.other', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)), 'Modula2Lexer': ('pygments.lexers.compiled', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)), 'MoinWikiLexer': ('pygments.lexers.text', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)), + 'MonkeyLexer': ('pygments.lexers.compiled', 'Monkey', ('monkey',), ('*.monkey',), ('text/x-monkey',)), 'MoonScriptLexer': ('pygments.lexers.agile', 'MoonScript', ('moon', 'moonscript'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')), 'MscgenLexer': ('pygments.lexers.other', 'Mscgen', ('mscgen', 'msc'), ('*.msc',), ()), 'MuPADLexer': ('pygments.lexers.math', 'MuPAD', ('mupad',), ('*.mu',), ()), @@ -224,6 +226,7 @@ LEXERS = { 'RdLexer': ('pygments.lexers.math', 'Rd', ('rd',), ('*.Rd',), ('text/x-r-doc',)), 'RebolLexer': ('pygments.lexers.other', 'REBOL', ('rebol',), ('*.r', '*.r3'), ('text/x-rebol',)), 'RedcodeLexer': ('pygments.lexers.other', 'Redcode', ('redcode',), ('*.cw',), ()), + 'RegeditLexer': ('pygments.lexers.text', 'reg', (), ('*.reg',), ('text/x-windows-registry',)), 'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)), 'RstLexer': ('pygments.lexers.text', 'reStructuredText', ('rst', 'rest', 'restructuredtext'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')), 'RubyConsoleLexer': ('pygments.lexers.agile', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)), @@ -237,6 +240,7 @@ LEXERS = { 'SchemeLexer': ('pygments.lexers.functional', 'Scheme', ('scheme', 'scm'), ('*.scm', '*.ss'), ('text/x-scheme', 'application/x-scheme')), 'ScilabLexer': ('pygments.lexers.math', 'Scilab', ('scilab',), ('*.sci', '*.sce', '*.tst'), ('text/scilab',)), 'ScssLexer': ('pygments.lexers.web', 'SCSS', ('scss',), ('*.scss',), ('text/x-scss',)), + 'SmaliLexer': ('pygments.lexers.dalvik', 'Smali', ('smali',), ('*.smali',), ('text/smali',)), 'SmalltalkLexer': ('pygments.lexers.other', 'Smalltalk', ('smalltalk', 'squeak'), ('*.st',), ('text/x-smalltalk',)), 'SmartyLexer': ('pygments.lexers.templates', 'Smarty', ('smarty',), ('*.tpl',), ('application/x-smarty',)), 'SnobolLexer': ('pygments.lexers.other', 'Snobol', ('snobol',), ('*.snobol',), ('text/x-snobol',)), diff --git a/pygments/lexers/_postgres_builtins.py b/pygments/lexers/_postgres_builtins.py index 45b3f22d..1f9918fe 100644 --- a/pygments/lexers/_postgres_builtins.py +++ b/pygments/lexers/_postgres_builtins.py @@ -41,7 +41,6 @@ def parse_keywords(f): def parse_datatypes(f): dt = set() - re_entry = re.compile('\s*<entry><type>([^<]+)</type></entry>') for line in f: if '<sect1' in line: break diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py index 85e157fb..61dc3827 100644 --- a/pygments/lexers/agile.py +++ b/pygments/lexers/agile.py @@ -22,7 +22,8 @@ from pygments import unistring as uni __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer', - 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', 'FancyLexer'] + 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer', + 'FancyLexer', 'DgLexer'] # b/w compatibility from pygments.lexers.functional import SchemeLexer @@ -392,7 +393,7 @@ class PythonTracebackLexer(RegexLexer): bygroups(Text, using(PythonLexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... - (r'^(.+)(: )(.+)(\n)', + (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', bygroups(Generic.Error, Text), '#pop') @@ -428,7 +429,7 @@ class Python3TracebackLexer(RegexLexer): bygroups(Text, using(Python3Lexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... - (r'^(.+)(: )(.+)(\n)', + (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', bygroups(Generic.Error, Text), '#pop') @@ -520,6 +521,8 @@ class RubyLexer(ExtendedRegexLexer): (r":'(\\\\|\\'|[^'])*'", String.Symbol), (r"'(\\\\|\\'|[^'])*'", String.Single), (r':"', String.Symbol, 'simple-sym'), + (r'([a-zA-Z_][a-zA-Z0-9]*)(:)', + bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9 (r'"', String.Double, 'simple-string'), (r'(?<!\.)`', String.Backtick, 'simple-backtick'), ] @@ -649,7 +652,7 @@ class RubyLexer(ExtendedRegexLexer): (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback), (r'__END__', Comment.Preproc, 'end-part'), # multiline regex (after keywords or assignments) - (r'(?:^|(?<=[=<>~!])|' + (r'(?:^|(?<=[=<>~!:])|' r'(?<=(?:\s|;)when\s)|' r'(?<=(?:\s|;)or\s)|' r'(?<=(?:\s|;)and\s)|' @@ -1815,3 +1818,96 @@ class FancyLexer(RegexLexer): (r'\d+', Number.Integer) ] } + + +class DgLexer(RegexLexer): + """ + Lexer for `dg <http://pyos.github.com/dg>`_, + a functional and object-oriented programming language + running on the CPython 3 VM. + """ + name = 'dg' + aliases = ['dg'] + filenames = ['*.dg'] + mimetypes = ['text/x-dg'] + + tokens = { + 'root': [ + # Whitespace: + (r'\s+', Text), + (r'#.*?$', Comment.Single), + # Lexemes: + # Numbers + (r'0[bB][01]+', Number.Bin), + (r'0[oO][0-7]+', Number.Oct), + (r'0[xX][\da-fA-F]+', Number.Hex), + (r'[+-]?\d+\.\d+([eE][+-]?\d+)?[jJ]?', Number.Float), + (r'[+-]?\d+[eE][+-]?\d+[jJ]?', Number.Float), + (r'[+-]?\d+[jJ]?', Number.Integer), + # Character/String Literals + (r"[br]*'''", String, combined('stringescape', 'tsqs', 'string')), + (r'[br]*"""', String, combined('stringescape', 'tdqs', 'string')), + (r"[br]*'", String, combined('stringescape', 'sqs', 'string')), + (r'[br]*"', String, combined('stringescape', 'dqs', 'string')), + # Operators + (r"`\w+'*`", Operator), # Infix links + # Reserved infix links + (r'\b(or|and|if|unless|else|where|is|in)\b', Operator.Word), + (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator), + # Identifiers + # Python 3 types + (r"(?<!\.)(bool|bytearray|bytes|classmethod|complex|dict'?|" + r"float|frozenset|int|list'?|memoryview|object|property|range|" + r"set'?|slice|staticmethod|str|super|tuple'?|type)" + r"(?!['\w])", Name.Builtin), + # Python 3 builtins + some more + (r'(?<!\.)(__import__|abs|all|any|bin|bind|chr|cmp|compile|complex|' + r'delattr|dir|divmod|drop|dropwhile|enumerate|eval|filter|flip|' + r'foldl1?|format|fst|getattr|globals|hasattr|hash|head|hex|id|' + r'init|input|isinstance|issubclass|iter|iterate|last|len|locals|' + r'map|max|min|next|oct|open|ord|pow|print|repr|reversed|round|' + r'setattr|scanl1?|snd|sorted|sum|tail|take|takewhile|vars|zip)' + r"(?!['\w])", Name.Builtin), + (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])", + Name.Builtin.Pseudo), + (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])", + Name.Exception), + (r"(?<!\.)(KeyboardInterrupt|SystemExit|StopIteration|" + r"GeneratorExit)(?!['\w])", Name.Exception), + # Compiler-defined identifiers + (r"(?<![\.\w])(import|inherit|for|while|switch|not|raise|unsafe|" + r"yield|with)(?!['\w])", Keyword.Reserved), + # Other links + (r"[A-Z_']+\b", Name), + (r"[A-Z][\w']*\b", Keyword.Type), + (r"\w+'*", Name), + # Blocks + (r'[()]', Punctuation), + ], + 'stringescape': [ + (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' + r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) + ], + 'string': [ + (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' + '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + (r'[^\\\'"%\n]+', String), + # quotes, percents and backslashes must be parsed one at a time + (r'[\'"\\]', String), + # unhandled string formatting sign + (r'%', String), + (r'\n', String) + ], + 'dqs': [ + (r'"', String, '#pop') + ], + 'sqs': [ + (r"'", String, '#pop') + ], + 'tdqs': [ + (r'"""', String, '#pop') + ], + 'tsqs': [ + (r"'''", String, '#pop') + ], + } diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index 450a9c4d..2ea74439 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -27,7 +27,7 @@ __all__ = ['CLexer', 'CppLexer', 'DLexer', 'DelphiLexer', 'ECLexer', 'DylanLexer', 'ObjectiveCLexer', 'FortranLexer', 'GLShaderLexer', 'PrologLexer', 'CythonLexer', 'ValaLexer', 'OocLexer', 'GoLexer', 'FelixLexer', 'AdaLexer', 'Modula2Lexer', 'BlitzMaxLexer', - 'NimrodLexer', 'FantomLexer', 'RustLexer', 'CUDALexer'] + 'NimrodLexer', 'FantomLexer', 'RustLexer', 'CUDALexer', 'MonkeyLexer'] class CLexer(RegexLexer): @@ -176,7 +176,8 @@ class CppLexer(RegexLexer): name = 'C++' aliases = ['cpp', 'c++'] filenames = ['*.cpp', '*.hpp', '*.c++', '*.h++', - '*.cc', '*.hh', '*.cxx', '*.hxx'] + '*.cc', '*.hh', '*.cxx', '*.hxx', + '*.C', '*.H', '*.cp', '*.CPP'] mimetypes = ['text/x-c++hdr', 'text/x-c++src'] #: optional Comment or Whitespace @@ -1226,6 +1227,10 @@ class ObjectiveCLexer(RegexLexer): ], 'method': [ include('whitespace'), + # TODO unsure if ellipses are allowed elsewhere, see discussion in + # Issue 789 + (r',', Punctuation), + (r'\.\.\.', Punctuation), (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this), Name.Variable)), (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function), @@ -1450,7 +1455,8 @@ class PrologLexer(RegexLexer): (r"'(?:''|[^'])*'", String.Atom), # quoted atom # Needs to not be followed by an atom. #(r'=(?=\s|[a-zA-Z\[])', Operator), - (r'(is|<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])', + (r'is\b', Operator), + (r'(<|>|=<|>=|==|=:=|=|/|//|\*|\+|-)(?=\s|[a-zA-Z0-9\[])', Operator), (r'(mod|div|not)\b', Operator), (r'_', Keyword), # The don't-care variable @@ -3050,3 +3056,126 @@ class CUDALexer(CLexer): elif value in self.functions: token = Name.Function yield index, token, value + + +class MonkeyLexer(RegexLexer): + """ + For + `Monkey <https://en.wikipedia.org/wiki/Monkey_(programming_language)>`_ + source code. + """ + + name = 'Monkey' + aliases = ['monkey'] + filenames = ['*.monkey'] + mimetypes = ['text/x-monkey'] + + name_variable = r'[a-z_][a-zA-Z0-9_]*' + name_function = r'[A-Z][a-zA-Z0-9_]*' + name_constant = r'[A-Z_][A-Z0-9_]*' + name_class = r'[A-Z][a-zA-Z0-9_]*' + name_module = r'[a-z0-9_]*' + + keyword_type = r'(?:Int|Float|String|Bool|Object|Array|Void)' + # ? == Bool // % == Int // # == Float // $ == String + keyword_type_special = r'[?%#$]' + + flags = re.MULTILINE + + tokens = { + 'root': [ + #Text + (r'\s+', Text), + # Comments + (r"'.*", Comment), + (r'(?i)^#rem\b', Comment.Multiline, 'comment'), + # preprocessor directives + (r'(?i)^(?:#If|#ElseIf|#Else|#EndIf|#End|#Print|#Error)\b', Comment.Preproc), + # preprocessor variable (any line starting with '#' that is not a directive) + (r'^#', Comment.Preproc, 'variables'), + # String + ('"', String.Double, 'string'), + # Numbers + (r'[0-9]+\.[0-9]*(?!\.)', Number.Float), + (r'\.[0-9]+(?!\.)', Number.Float), + (r'[0-9]+', Number.Integer), + (r'\$[0-9a-fA-Z]+', Number.Hex), + (r'\%[10]+', Number), # Binary + # Native data types + (r'\b%s\b' % keyword_type, Keyword.Type), + # Exception handling + (r'(?i)\b(?:Try|Catch|Throw)\b', Keyword.Reserved), + (r'Throwable', Name.Exception), + # Builtins + (r'(?i)\b(?:Null|True|False)\b', Name.Builtin), + (r'(?i)\b(?:Self|Super)\b', Name.Builtin.Pseudo), + (r'\b(?:HOST|LANG|TARGET|CONFIG)\b', Name.Constant), + # Keywords + (r'(?i)^(Import)(\s+)(.*)(\n)', bygroups(Keyword.Namespace, Text, Name.Namespace, Text)), + (r'(?i)^Strict\b.*\n', Keyword.Reserved), + (r'(?i)(Const|Local|Global|Field)(\s+)', bygroups(Keyword.Declaration, Text), 'variables'), + (r'(?i)(New|Class|Interface|Extends|Implements)(\s+)', bygroups(Keyword.Reserved, Text), 'classname'), + (r'(?i)(Function|Method)(\s+)', bygroups(Keyword.Reserved, Text), 'funcname'), + (r'(?i)(?:End|Return|Public|Private|Extern|Property|Final|Abstract)\b', Keyword.Reserved), + # Flow Control stuff + (r'(?i)(?:If|Then|Else|ElseIf|EndIf|' + r'Select|Case|Default|' + r'While|Wend|' + r'Repeat|Until|Forever|' + r'For|To|Until|Step|EachIn|Next|' + r'Exit|Continue)\s+', Keyword.Reserved), + # not used yet + (r'(?i)\b(?:Module|Inline)\b', Keyword.Reserved), + # Array + (r'[\[\]]', Punctuation), + # Other + (r'<=|>=|<>|\*=|/=|\+=|-=|&=|~=|\|=|[-&*/^+=<>|~]', Operator), + (r'(?i)(?:Not|Mod|Shl|Shr|And|Or)', Operator.Word), + (r'[\(\){}!#,.:]', Punctuation), + # catch the rest + (r'%s\b' % name_constant, Name.Constant), + (r'%s\b' % name_function, Name.Function), + (r'%s\b' % name_variable, Name.Variable), + ], + 'funcname': [ + (r'(?i)%s\b' % name_function, Name.Function), + (r':', Punctuation, 'classname'), + (r'\s+', Text), + (r'\(', Punctuation, 'variables'), + (r'\)', Punctuation, '#pop') + ], + 'classname': [ + (r'%s\.' % name_module, Name.Namespace), + (r'%s\b' % keyword_type, Keyword.Type), + (r'%s\b' % name_class, Name.Class), + # array (of given size) + (r'(\[)(\s*)(\d*)(\s*)(\])', + bygroups(Punctuation, Text, Number.Integer, Text, Punctuation)), + # generics + (r'\s+(?!<)', Text, '#pop'), + (r'<', Punctuation, '#push'), + (r'>', Punctuation, '#pop'), + (r'\n', Text, '#pop'), + (r'', Text, '#pop') + ], + 'variables': [ + (r'%s\b' % name_constant, Name.Constant), + (r'%s\b' % name_variable, Name.Variable), + (r'%s' % keyword_type_special, Keyword.Type), + (r'\s+', Text), + (r':', Punctuation, 'classname'), + (r',', Punctuation, '#push'), + (r'', Text, '#pop') + ], + 'string': [ + (r'[^"~]+', String.Double), + (r'~q|~n|~r|~t|~z|~~', String.Escape), + (r'"', String.Double, '#pop'), + ], + 'comment' : [ + (r'(?i)^#rem.*?', Comment.Multiline, "#push"), + (r'(?i)^#end.*?', Comment.Multiline, "#pop"), + (r'\n', Comment.Multiline), + (r'.+', Comment.Multiline), + ], + } diff --git a/pygments/lexers/dalvik.py b/pygments/lexers/dalvik.py new file mode 100644 index 00000000..861d769c --- /dev/null +++ b/pygments/lexers/dalvik.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.dalvik + ~~~~~~~~~~~~~~~~~~~~~~ + + Pygments lexers for Dalvik VM-related languages. + + :copyright: Copyright 2011-2012 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, bygroups, using +from pygments.token import Keyword, Text, Comment, Name, String, Number, \ + Punctuation + +__all__ = ['SmaliLexer'] + + +class SmaliLexer(RegexLexer): + """ + For `Smali <http://code.google.com/p/smali/>`_ (Android/Dalvik) assembly + code. + + *New in Pygments 1.6.* + """ + name = 'Smali' + aliases = ['smali'] + filenames = ['*.smali'] + mimetypes = ['text/smali'] + + tokens = { + 'root': [ + include('comment'), + include('label'), + include('field'), + include('method'), + include('class'), + include('directive'), + include('access-modifier'), + include('instruction'), + include('literal'), + include('punctuation'), + include('type'), + include('whitespace') + ], + 'directive': [ + (r'^[ \t]*\.(class|super|implements|field|subannotation|annotation|' + r'enum|method|registers|locals|array-data|packed-switch|' + r'sparse-switch|catchall|catch|line|parameter|local|prologue|' + r'epilogue|source)', Keyword), + (r'^[ \t]*\.end (field|subannotation|annotation|method|array-data|' + 'packed-switch|sparse-switch|parameter|local)', Keyword), + (r'^[ \t]*\.restart local', Keyword), + ], + 'access-modifier': [ + (r'(public|private|protected|static|final|synchronized|bridge|' + r'varargs|native|abstract|strictfp|synthetic|constructor|' + r'declared-synchronized|interface|enum|annotation|volatile|' + r'transient)', Keyword), + ], + 'whitespace': [ + (r'\n', Text), + (r'\s+', Text), + ], + 'instruction': [ + (r'\b[vp]\d+\b', Name.Builtin), # registers + (r'\b[a-z][A-Za-z0-9/-]+\s+', Text), # instructions + ], + 'literal': [ + (r'".*"', String), + (r'0x[0-9A-Fa-f]+t?', Number.Hex), + (r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'[0-9]+L?', Number.Integer), + ], + 'field': [ + (r'(\$?\b)([A-Za-z0-9_$]*)(:)', + bygroups(Punctuation, Name.Variable, Punctuation)), + ], + 'method': [ + (r'<(?:cl)?init>', Name.Function), # constructor + (r'(\$?\b)([A-Za-z0-9_$]*)(\()', + bygroups(Punctuation, Name.Function, Punctuation)), + ], + 'label': [ + (r':[A-Za-z0-9_]+', Name.Label), + ], + 'class': [ + # class names in the form Lcom/namespace/ClassName; + # I only want to color the ClassName part, so the namespace part is + # treated as 'Text' + (r'(L)((?:[A-Za-z0-9_$]+/)*)([A-Za-z0-9_$]+)(;)', + bygroups(Keyword.Type, Text, Name.Class, Text)), + ], + 'punctuation': [ + (r'->', Punctuation), + (r'[{},\(\):=\.-]', Punctuation), + ], + 'type': [ + (r'[ZBSCIJFDV\[]+', Keyword.Type), + ], + 'comment': [ + (r'#.*?\n', Comment), + ], + } diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 161a3382..83eb02f5 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -60,13 +60,13 @@ class JavaLexer(RegexLexer): (r'(class|interface)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'"(\\\\|\\"|[^"])*"', String), - (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), + (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), - (r'0x[0-9a-f]+', Number.Hex), + (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+L?', Number.Integer), (r'\n', Text) ], @@ -161,7 +161,7 @@ class ScalaLexer(RegexLexer): (r'(type)(\s+)', bygroups(Keyword, Text), 'type'), (r'""".*?"""(?!")', String), (r'"(\\\\|\\"|[^"])*"', String), - (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), + (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), # (ur'(\.)(%s|%s|`[^`]+`)' % (idrest, op), bygroups(Operator, # Name.Attribute)), (idrest, Name), @@ -171,7 +171,7 @@ class ScalaLexer(RegexLexer): (op, Operator), (r'([0-9][0-9]*\.[0-9]*|\.[0-9]+)([eE][+-]?[0-9]+)?[fFdD]?', Number.Float), - (r'0x[0-9a-f]+', Number.Hex), + (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+L?', Number.Integer), (r'\n', Text) ], @@ -357,13 +357,13 @@ class GroovyLexer(RegexLexer): (r"'(\\\\|\\'|[^'])*'", String.Single), (r'\$/((?!/\$).)*/\$', String), (r'/(\\\\|\\"|[^/])*/', String), - (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Char), + (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Char), (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), - (r'0x[0-9a-f]+', Number.Hex), + (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+L?', Number.Integer), (r'\n', Text) ], @@ -742,7 +742,7 @@ class TeaLangLexer(RegexLexer): (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), (r'(isa|[.]{3}|[.]{2}|[=#!<>+-/%&;,.\*\\\(\)\[\]\{\}])', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), - (r'0x[0-9a-f]+', Number.Hex), + (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+L?', Number.Integer), (r'\n', Text) ], @@ -795,8 +795,8 @@ class CeylonLexer(RegexLexer): (r'(class|interface|object)(\s+)', bygroups(Keyword.Declaration, Text), 'class'), (r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'"(\\\\|\\"|[^"])*"', String), - (r"'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'", String.Quoted), - (r"`\\.`|`[^\\]`|`\\u[0-9a-f]{4}`", String.Char), + (r"'\\.'|'[^\\]'|'\\u[0-9a-fA-F]{4}'", String.Quoted), + (r"`\\.`|`[^\\]`|`\\u[0-9a-fA-F]{4}`", String.Char), (r'(\.)([a-zA-Z_][a-zA-Z0-9_]*)', bygroups(Operator, Name.Attribute)), (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label), (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), @@ -805,7 +805,7 @@ class CeylonLexer(RegexLexer): (r'\d{1,3}(_\d{3})+\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?', Number.Float), (r'[0-9][0-9]*\.\d{1,3}(_\d{3})+[kMGTPmunpf]?', Number.Float), (r'[0-9][0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[kMGTPmunpf]?', Number.Float), - (r'0x[0-9a-f]+', Number.Hex), + (r'0x[0-9a-fA-F]+', Number.Hex), (r'\d{1,3}(_\d{3})+[kMGTP]?', Number.Integer), (r'[0-9]+[kMGTP]?', Number.Integer), (r'\n', Text) @@ -977,7 +977,7 @@ class XtendLexer(RegexLexer): (r'[a-zA-Z_\$][a-zA-Z0-9_]*', Name), (r'[~\^\*!%&\[\]\(\)\{\}<>\|+=:;,./?-]', Operator), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), - (r'0x[0-9a-f]+', Number.Hex), + (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+L?', Number.Integer), (r'\n', Text) ], diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index cff4ddd0..ca50665e 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -25,7 +25,7 @@ __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer', 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer', 'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer', 'HttpLexer', - 'PyPyLogLexer'] + 'PyPyLogLexer', 'RegeditLexer'] class IniLexer(RegexLexer): @@ -41,7 +41,7 @@ class IniLexer(RegexLexer): tokens = { 'root': [ (r'\s+', Text), - (r'[;#].*?$', Comment), + (r'[;#].*', Comment.Single), (r'\[.*?\]$', Keyword), (r'(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)', bygroups(Name.Attribute, Text, Operator, Text, String)) @@ -55,6 +55,49 @@ class IniLexer(RegexLexer): return text[0] == '[' and text[npos-1] == ']' +class RegeditLexer(RegexLexer): + """ + Lexer for `Windows Registry + <http://en.wikipedia.org/wiki/Windows_Registry#.REG_files>`_ files produced + by regedit. + + *New in Pygments 1.6.* + """ + + name = 'reg' + aliases = [] + filenames = ['*.reg'] + mimetypes = ['text/x-windows-registry'] + + tokens = { + 'root': [ + (r'Windows Registry Editor.*', Text), + (r'\s+', Text), + (r'[;#].*', Comment.Single), + (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$', + bygroups(Keyword, Operator, Name.Builtin, Keyword)), + # String keys, which obey somewhat normal escaping + (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)', + bygroups(Name.Attribute, Text, Operator, Text), + 'value'), + # Bare keys (includes @) + (r'(.*?)([ \t]*)(=)([ \t]*)', + bygroups(Name.Attribute, Text, Operator, Text), + 'value'), + ], + 'value': [ + (r'-', Operator, '#pop'), # delete value + (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)', + bygroups(Name.Variable, Punctuation, Number), '#pop'), + # As far as I know, .reg files do not support line continuation. + (r'.*', String, '#pop'), + ] + } + + def analyse_text(text): + return text.startswith('Windows Registry Editor') + + class PropertiesLexer(RegexLexer): """ Lexer for configuration files in Java's properties format. diff --git a/tests/examplefiles/example.monkey b/tests/examplefiles/example.monkey new file mode 100644 index 00000000..facd3a73 --- /dev/null +++ b/tests/examplefiles/example.monkey @@ -0,0 +1,152 @@ +Strict + +' single line comment + +#rem +multi +line +comment +#end + +#rem +nested +#rem +multi +line +#end +comment +#end + +Import mojo + +Const ONECONST:Int = 1 +Const TWOCONST := 2 +Const THREECONST := 3, FOURCONST:Int = 4 + +Global someVariable:Int = 4 + +' sample class from the documentation +Class Game Extends App + + Function New() + End + + Function DrawSpiral(clock) + Local w=DeviceWidth/2 + For Local i#=0 Until w*1.5 Step .2 + Local x#,y# + x=w+i*Sin(i*3+clock) + y=w+i*Cos(i*2+clock) + DrawRect x,y,1,1 + Next + hitbox.Collide(event.pos) + End + + Field updateCount + + Method OnCreate() + Print "spiral" + + SetUpdateRate 60 + End + + Method OnUpdate() + updateCount+=1 + End + + Method OnRender() + Cls + DrawSpiral updateCount + DrawSpiral updateCount*1.1 + End + +End + +Class Enemy + Method Die () Abstract +End + +' extending +Class Hoodlum Extends Enemy + ' field + Field testField:Bool = True + + ' naming class with modulepath + Local currentNode:list.Node<Vector2D> + + Method Die () + Print "B'oss, he-- he killed me, b'oss!" + End +End + +' extending with generics +Class VectorNode Extends Node<Vector2D> +End + +' interfaces +Interface Computer + Method Boot () + Method Process () + Method Display () +End + +Class PC Implements Computer +End + +' array syntax +Global listOfStuff:String[42] +Global lessStuff:String[5] = listOfStuff[4..8] +Global oneStuff:String = listOfStuff[23] + +'a comma separated sequence +Global scores:Int[]=[10,20,30] +'a comma separated sequence +Global text:String[]=["Hello","There","World"] +Global worstCase:worst.List<String[]> + +' string type +Global string1:String = "Hello world" +Global string2$ = "Hello world" + +' escape characers in strings +Global string3 := "Hello~zWorld" +Global string4 := "~qHello World~q" +Global string5 := "~tIndented~n" +Global string6 := "tilda is wavey... ~~" + +' string pseudofunctions +Print " Hello World ~n".Trim() ' prints "Hello World" +Print "Hello World".ToUpper() ' prints "HELLO WORLD" + +' Boolean shorttype +Global boolVariable1:Bool = True +Global boolVariable2? = False + +' number formats +Global hexNum1:Int = $3d0dead +Global hexNum2% = $CAFEBABE + +Global floatNum1:Float = 3.141516 +Global floatNum2# = 3.141516 +Global floatNum3 := .141516 + +' preprocessor keywords +#If TARGET = "android" +DoStuff() +#ElseIf TARGET = "ios" +DoOtherStuff() +#End + +' preprocessor variable +#SOMETHING = True +#Print SOMETHING +#If SOMETHING +#End + +' operators +Global a = 32 +Global b = 32 ~ 0 +b ~= 16 +b |= 16 +b &= 16 +Global c = a | b diff --git a/tests/examplefiles/example.reg b/tests/examplefiles/example.reg new file mode 100644 index 00000000..bc4e9df4 --- /dev/null +++ b/tests/examplefiles/example.reg @@ -0,0 +1,19 @@ +Windows Registry Editor Version 5.00 +; comment + +[HKEY_CURRENT_USER\SOFTWARE\Pygments] +@="Hello" +"Key With Spaces"="Something" +"Key With ="="With Quotes" +"Key With = 2"=dword:123 +"Key" = "Value" +"Hex"=hex(0):1,2,3,a,b,f +"Hex 2"=hex(5):80,00,00,ff + +[-HKEY_CURRENT_USER\SOFTWARE\Pygments\Subkey] + +[HKEY_CURRENT_USER\SOFTWARE\Pygments\Subkey2] +; comment +@=- +"Foo"=- +"Foo"="Value" diff --git a/tests/examplefiles/hello.smali b/tests/examplefiles/hello.smali new file mode 100644 index 00000000..e539f00e --- /dev/null +++ b/tests/examplefiles/hello.smali @@ -0,0 +1,40 @@ +# To Recreate: +# +# echo -e 'class hello {\n public static void main(String[] args) {\n +# System.out.println("hi");\n }\n}\n' > hello.java +# javac -target 1.4 -source 1.4 hello.java +# dx --dex --output=hello.dex hello.class +# baksmali hello.dex +# cat out/hello.smali + +.class Lhello; +.super Ljava/lang/Object; +.source "hello.java" + + +# direct methods +.method constructor <init>()V + .registers 1 + + .prologue + .line 1 + invoke-direct {p0}, Ljava/lang/Object;-><init>()V + + return-void +.end method + +.method public static main([Ljava/lang/String;)V + .registers 3 + .parameter + + .prologue + .line 3 + sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream; + + const-string v1, "hi" + + invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V + + .line 4 + return-void +.end method diff --git a/tests/examplefiles/inet_pton6.dg b/tests/examplefiles/inet_pton6.dg new file mode 100644 index 00000000..c56a66a3 --- /dev/null +++ b/tests/examplefiles/inet_pton6.dg @@ -0,0 +1,71 @@ +re = import! +sys = import! + + +# IPv6address = hexpart [ ":" IPv4address ] +# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT +# hexpart = [ hexseq ] [ "::" [ hexseq ] ] +# hexseq = hex4 *( ":" hex4) +# hex4 = 1*4HEXDIG +hexpart = r'({0}|)(?:::({0}|)|)'.format r'(?:[\da-f]{1,4})(?::[\da-f]{1,4})*' +addrv4 = r'(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})' +addrv6 = re.compile $ r'(?i)(?:{})(?::{})?$'.format hexpart addrv4 + + +# Parse a base-N number given a list of its digits. +# +# :param q: the number of digits in that numeral system +# +# :param digits: an iterable of integers in range [0..q] +# +# :return: a decimal integer +# +base_n = (q digits) -> foldl (x y) -> (x * q + y) 0 digits + + +# Parse a sequence of hexadecimal numbers +# +# :param q: a string of colon-separated base-16 integers +# +# :return: an iterable of Python ints +# +unhex = q -> q and map p -> (int p 16) (q.split ':') + + +# Parse an IPv6 address as specified in RFC 4291. +# +# :param address: a string, obviously. +# +# :return: an integer which, written in binary form, points to the same node. +# +inet_pton6 = address -> + raise $ ValueError 'not a valid IPv6 address' unless match = addrv6.match address + start, end, *ipv4 = match.groups! + + is_ipv4 = not $ None in ipv4 + shift = (7 - start.count ':' - 2 * is_ipv4) * 16 + + raise $ ValueError 'not a valid IPv6 address' if (end is None and shift) or shift < 0 + hexaddr = (base_n 0x10000 (unhex start) << shift) + base_n 0x10000 (unhex $ end or '') + hexaddr unless is_ipv4 else (hexaddr << 32) + base_n 0x100 (map int ipv4) + + +inet6_type = q -> switch + not q = 'unspecified' + q == 1 = 'loopback' + (q >> 32) == 0x000000000000ffff = 'IPv4-mapped' + (q >> 64) == 0xfe80000000000000 = 'link-local' + (q >> 120) != 0x00000000000000ff = 'general unicast' + (q >> 112) % (1 << 4) == 0x0000000000000000 = 'multicast w/ reserved scope value' + (q >> 112) % (1 << 4) == 0x000000000000000f = 'multicast w/ reserved scope value' + (q >> 112) % (1 << 4) == 0x0000000000000001 = 'interface-local multicast' + (q >> 112) % (1 << 4) == 0x0000000000000004 = 'admin-local multicast' + (q >> 112) % (1 << 4) == 0x0000000000000005 = 'site-local multicast' + (q >> 112) % (1 << 4) == 0x0000000000000008 = 'organization-local multicast' + (q >> 112) % (1 << 4) == 0x000000000000000e = 'global multicast' + (q >> 112) % (1 << 4) != 0x0000000000000002 = 'multicast w/ unknown scope value' + (q >> 24) % (1 << 112) == 0x00000000000001ff = 'solicited-node multicast' + True = 'link-local multicast' + + +print $ (x -> (inet6_type x, hex x)) $ inet_pton6 $ sys.stdin.read!.strip! diff --git a/tests/examplefiles/pytb_test3.pytb b/tests/examplefiles/pytb_test3.pytb new file mode 100644 index 00000000..6947c1ef --- /dev/null +++ b/tests/examplefiles/pytb_test3.pytb @@ -0,0 +1,4 @@ +>>> 3/"3" +Traceback (most recent call last): + File "<stdin>", line 1, in <module> +TypeError: unsupported operand type(s) for /: 'int' and 'str' |