Merge with pygments-main

author: thatch <devnull@localhost> 2009-01-04 09:01:06 -0600
committer: thatch <devnull@localhost> 2009-01-04 09:01:06 -0600
commit: 6b17d2b85ec9cb15976f7738cdc03544f0f86e27 (patch)
tree: b6763872b9fa331b405dedc7d276f2504bfc985c
parent: 64ef7976772355f0ab4af68c4a572483dcbc15ad (diff)
parent: 5ab91c0bb87e55c356a0342020afe6e0f599e5b0 (diff)
download: pygments-6b17d2b85ec9cb15976f7738cdc03544f0f86e27.tar.gz
11 files changed, 1595 insertions, 5 deletions
diff --git a/AUTHORS b/AUTHORS
index 5ab88bdb..6005e6eb 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -32,6 +32,7 @@ Other contributors, listed alphabetically, are:
 * Kirk McDonald -- D lexer
 * Lukas Meuser -- BBCode formatter, Lua lexer
 * Paulo Moura -- Logtalk lexer
+* Ana Nelson -- Ragel, ANTLR lexers
 * Ronny Pfannschmidt -- BBCode lexer
 * Benjamin Peterson -- Test suite refactoring
 * Andre Roberge -- Tango style
diff --git a/CHANGES b/CHANGES
index 830c9013..30cf0fee 100644
--- a/CHANGES
+++ b/CHANGES
@@ -7,10 +7,11 @@ Version 1.1
 
 - Lexers added:
 
-  * GLSL (#369)
+  * Antlr/Ragel, thanks to Ana Nelson
+  * (Ba)sh shell
   * Erlang shell
-  * (Ba)sh shell (#349)
-  * Prolog (#373)
+  * GLSL
+  * Prolog
 
 - Fix a bug lexing extended Ruby strings.
 
diff --git a/Makefile b/Makefile
index 4f3b9c3e..a81376f0 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ export PYTHONPATH = $(shell echo "$$PYTHONPATH"):$(shell python -c 'import os; p
 all: clean-pyc check test
 
 check:
-	@$(PYTHON) scripts/detect_missing_analyse_text.py
+	@$(PYTHON) scripts/detect_missing_analyse_text.py || true
 	@$(PYTHON) scripts/check_sources.py -i apidocs -i pygments/lexers/_mapping.py \
 		   -i docs/build -i pygments/formatters/_mapping.py -i pygments/unistring.py \
 		   -i pygments/lexers/_vimbuiltins.py
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 2a678764..1b4b8c37 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -16,6 +16,15 @@
 LEXERS = {
     'ActionScript3Lexer': ('pygments.lexers.web', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')),
     'ActionScriptLexer': ('pygments.lexers.web', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')),
+    'AntlrActionScriptLexer': ('pygments.lexers.parsers', 'ANTLR With ActionScript Target', ('antlr-as', 'antlr-actionscript'), ('*.G', '*.g'), ()),
+    'AntlrCSharpLexer': ('pygments.lexers.parsers', 'ANTLR With C# Target', ('antlr-csharp', 'antlr-c#'), ('*.G', '*.g'), ()),
+    'AntlrCppLexer': ('pygments.lexers.parsers', 'ANTLR With CPP Target', ('antlr-cpp',), ('*.G', '*.g'), ()),
+    'AntlrJavaLexer': ('pygments.lexers.parsers', 'ANTLR With Java Target', ('antlr-java',), ('*.G', '*.g'), ()),
+    'AntlrLexer': ('pygments.lexers.parsers', 'ANTLR', ('antlr',), (), ()),
+    'AntlrObjectiveCLexer': ('pygments.lexers.parsers', 'ANTLR With ObjectiveC Target', ('antlr-objc',), ('*.G', '*.g'), ()),
+    'AntlrPerlLexer': ('pygments.lexers.parsers', 'ANTLR With Perl Target', ('antlr-perl',), ('*.G', '*.g'), ()),
+    'AntlrPythonLexer': ('pygments.lexers.parsers', 'ANTLR With Python Target', ('antlr-python',), ('*.G', '*.g'), ()),
+    'AntlrRubyLexer': ('pygments.lexers.parsers', 'ANTLR With Ruby Target', ('antlr-ruby', 'antlr-rb'), ('*.G', '*.g'), ()),
     'ApacheConfLexer': ('pygments.lexers.text', 'ApacheConf', ('apacheconf', 'aconf', 'apache'), ('.htaccess', 'apache.conf', 'apache2.conf'), ('text/x-apacheconf',)),
     'AppleScriptLexer': ('pygments.lexers.other', 'AppleScript', ('applescript',), ('*.applescript',), ()),
     'BBCodeLexer': ('pygments.lexers.text', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)),
@@ -118,6 +127,14 @@ LEXERS = {
     'PythonConsoleLexer': ('pygments.lexers.agile', 'Python console session', ('pycon',), (), ('text/x-python-doctest',)),
     'PythonLexer': ('pygments.lexers.agile', 'Python', ('python', 'py'), ('*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript'), ('text/x-python', 'application/x-python')),
     'PythonTracebackLexer': ('pygments.lexers.agile', 'Python Traceback', ('pytb',), ('*.pytb',), ('text/x-python-traceback',)),
+    'RagelCLexer': ('pygments.lexers.parsers', 'Ragel in C Host', ('ragel-c',), ('*.rl',), ()),
+    'RagelCppLexer': ('pygments.lexers.parsers', 'Ragel in CPP Host', ('ragel-cpp',), ('*.rl',), ()),
+    'RagelDLexer': ('pygments.lexers.parsers', 'Ragel in D Host', ('ragel-d',), ('*.rl',), ()),
+    'RagelEmbeddedLexer': ('pygments.lexers.parsers', 'Embedded Ragel', ('ragel-em',), ('*.rl',), ()),
+    'RagelJavaLexer': ('pygments.lexers.parsers', 'Ragel in Java Host', ('ragel-java',), ('*.rl',), ()),
+    'RagelLexer': ('pygments.lexers.parsers', 'Ragel', ('ragel',), (), ()),
+    'RagelObjectiveCLexer': ('pygments.lexers.parsers', 'Ragel in Objective C Host', ('ragel-objc',), ('*.rl',), ()),
+    'RagelRubyLexer': ('pygments.lexers.parsers', 'Ragel in Ruby Host', ('ragel-ruby', 'ragel-rb'), ('*.rl',), ()),
     'RawTokenLexer': ('pygments.lexers.special', 'Raw token data', ('raw',), (), ('application/x-pygments-tokens',)),
     'RedcodeLexer': ('pygments.lexers.other', 'Redcode', ('redcode',), ('*.cw',), ()),
     'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)),
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index 3624b270..643ceaad 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -1340,6 +1340,9 @@ class GLShaderLexer(RegexLexer):
     }
 
 class PrologLexer(RegexLexer):
+    """
+    Lexer for Prolog files.
+    """
     name = 'Prolog'
     aliases = ['prolog']
     filenames = ['*.prolog', '*.pro', '*.pl']
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index 83913570..dd187130 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -347,7 +347,7 @@ class BashLexer(RegexLexer):
              r'export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|'
              r'local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|'
              r'shopt|source|suspend|test|time|times|trap|true|type|typeset|'
-             r'ulimit|umask|unalias|unset|wait)\s*\b',
+             r'ulimit|umask|unalias|unset|wait)\s*\b(?!\.)',
              Name.Builtin),
             (r'#.*\n', Comment),
             (r'\\[\w\W]', String.Escape),
diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py
new file mode 100644
index 00000000..00f1ee3e
--- /dev/null
+++ b/pygments/lexers/parsers.py
@@ -0,0 +1,670 @@
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers.parsers
+    ~~~~~~~~~~~~~~~~~~~~~~~
+
+    Lexers for parser generators.
+
+    :copyright: 2008-2009 by Ana Nelson <ana@ananelson.com>, Tim Hatch.
+
+    :license: BSD, see LICENSE for more details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, DelegatingLexer, \
+    include, bygroups, using, this
+from pygments.token import Error, Punctuation, Generic, Other, \
+    Text, Comment, Operator, Keyword, Name, String, Number, Whitespace
+from pygments.lexers.compiled import JavaLexer, CLexer, CppLexer, \
+    ObjectiveCLexer, DLexer
+from pygments.lexers.dotnet import CSharpLexer
+from pygments.lexers.agile import RubyLexer, PythonLexer, PerlLexer
+from pygments.lexers.web import ActionScriptLexer
+# Use TextLexer during development to just focus on one part of a delegating
+# lexer.
+from pygments.lexers.special import TextLexer
+
+__all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
+           'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
+           'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
+           'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
+           #'AntlrCLexer',
+           'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
+           'AntlrJavaLexer', "AntlrActionScriptLexer"]
+
+class RagelLexer(RegexLexer):
+    """
+    A pure `Ragel <http://www.complang.org/ragel/>`_ lexer.  Use this for
+    fragments of Ragel.  For ``.rl`` files, use RagelEmbeddedLexer instead
+    (or one of the language-specific subclasses).
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Ragel'
+    aliases = ['ragel']
+    filenames = []
+
+    tokens = {
+        'whitespace': [
+            (r'\s+', Whitespace)
+        ],
+        'comments': [
+            (r'\#.*$', Comment),
+        ],
+        'keywords': [
+            (r'(access|action|alphtype)\b', Keyword),
+            (r'(getkey|write|machine|include)\b', Keyword),
+            (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
+            (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
+        ],
+        'numbers': [
+            (r'0x[0-9A-Fa-f]+', Number.Hex),
+            (r'[+-]?[0-9]+', Number.Integer),
+        ],
+        'literals': [
+            (r'"(\\\\|\\"|[^"])*"', String), # double quote string
+            (r"'(\\\\|\\'|[^'])*'", String), # single quote string
+            (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
+            (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
+        ],
+        'identifiers': [
+            (r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
+        ],
+        'operators': [
+            (r',', Operator), # Join
+            (r'\||&|-|--', Operator), # Union, Intersection and Subtraction
+            (r'\.|<:|:>|:>>', Operator), # Concatention
+            (r':', Operator), # Label
+            (r'->', Operator), # Epsilon Transition
+            (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
+            (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
+            (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
+            (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
+            (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
+            (r'>|@|\$|%', Operator), # Transition Actions and Priorities
+            (r'\*|\?|\+|{[0-9]*,[0-9]*}', Operator), # Repetition
+            (r'!|\^', Operator), # Negation
+            (r'\(|\)', Operator), # Grouping
+        ],
+        'root': [
+            include('literals'),
+            include('whitespace'),
+            include('comments'),
+            include('keywords'),
+            include('numbers'),
+            include('identifiers'),
+            include('operators'),
+            (r'{', Punctuation, 'host'),
+            (r'=', Operator),
+            (r';', Punctuation),
+        ],
+        'host': [
+            (r'(' + r'|'.join(( # keep host code in largest possible chunks
+                r'[^{}\'"/#]+', # exclude unsafe characters
+                r'[^\\][\\][{}]', # allow escaped { or }
+
+                # strings and comments may safely contain unsafe characters
+                r'"(\\\\|\\"|[^"])*"', # double quote string
+                r"'(\\\\|\\'|[^'])*'", # single quote string
+                r'//.*$\n?', # single line comment
+                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+                r'\#.*$\n?', # ruby comment
+
+                # regular expression: There's no reason for it to start
+                # with a * and this stops confusion with comments.
+                r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+                # / is safe now that we've handled regex and javadoc comments
+                r'/',
+            )) + r')+', Other),
+
+            (r'{', Punctuation, '#push'),
+            (r'}', Punctuation, '#pop'),
+        ],
+    }
+
+class RagelEmbeddedLexer(RegexLexer):
+    """
+    A lexer for `Ragel`_ embedded in a host language file.
+
+    This will only highlight Ragel statements. If you want host language
+    highlighting then call the language-specific Ragel lexer.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Embedded Ragel'
+    aliases = ['ragel-em']
+    filenames = ['*.rl']
+
+    tokens = {
+        'root': [
+            (r'(' + r'|'.join(( # keep host code in largest possible chunks
+                r'[^%\'"/#]+', # exclude unsafe characters
+                r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
+
+                # strings and comments may safely contain unsafe characters
+                r'"(\\\\|\\"|[^"])*"', # double quote string
+                r"'(\\\\|\\'|[^'])*'", # single quote string
+                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+                r'//.*$\n?', # single line comment
+                r'\#.*$\n?', # ruby/ragel comment
+                r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
+
+                # / is safe now that we've handled regex and javadoc comments
+                r'/',
+            )) + r')+', Other),
+
+            # Single Line FSM.
+            # Please don't put a quoted newline in a single line FSM.
+            # That's just mean. It will break this.
+            (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
+                                                     using(RagelLexer),
+                                                     Punctuation, Text)),
+
+            # Multi Line FSM.
+            (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'),
+        ],
+        'multi-line-fsm': [
+            (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
+                r'(' + r'|'.join((
+                    r'[^}\'"\[/#]', # exclude unsafe characters
+                    r'}(?=[^%]|$)', # } is okay as long as it's not followed by %
+                    r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
+                    r'[^\\][\\][{}]', # ...and } is okay if it's escaped
+
+                    # allow / if it's preceded with one of these symbols
+                    # (ragel EOF actions)
+                    r'(>|\$|%|<|@|<>)/',
+
+                    # specifically allow regex followed immediately by *
+                    # so it doesn't get mistaken for a comment
+                    r'/(?!\*)(\\\\|\\/|[^/])*/\*',
+
+                    # allow / as long as it's not followed by another / or by a *
+                    r'/(?=[^/\*]|$)',
+
+                    # We want to match as many of these as we can in one block.
+                    # Not sure if we need the + sign here,
+                    # does it help performance?
+                    )) + r')+',
+
+                # strings and comments may safely contain unsafe characters
+                r'"(\\\\|\\"|[^"])*"', # double quote string
+                r"'(\\\\|\\'|[^'])*'", # single quote string
+                r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
+                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+                r'//.*$\n?', # single line comment
+                r'\#.*$\n?', # ruby/ragel comment
+            )) + r')+', using(RagelLexer)),
+
+            (r'}%%', Punctuation, '#pop'),
+        ]
+    }
+
+    def analyse_text(text):
+        return '@LANG: indep' in text or 0.1
+
+class RagelRubyLexer(DelegatingLexer):
+    """
+    A lexer for `Ragel`_ in a Ruby host file.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Ragel in Ruby Host'
+    aliases = ['ragel-ruby', 'ragel-rb']
+    filenames = ['*.rl']
+
+    def __init__(self, **options):
+        super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer,
+                                              **options)
+
+    def analyse_text(text):
+        return '@LANG: ruby' in text
+
+class RagelCLexer(DelegatingLexer):
+    """
+    A lexer for `Ragel`_ in a C host file.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Ragel in C Host'
+    aliases = ['ragel-c']
+    filenames = ['*.rl']
+
+    def __init__(self, **options):
+        super(RagelCLexer, self).__init__(CLexer, RagelEmbeddedLexer,
+                                          **options)
+
+    def analyse_text(text):
+        return '@LANG: c' in text
+
+class RagelDLexer(DelegatingLexer):
+    """
+    A lexer for `Ragel`_ in a D host file.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Ragel in D Host'
+    aliases = ['ragel-d']
+    filenames = ['*.rl']
+
+    def __init__(self, **options):
+        super(RagelDLexer, self).__init__(DLexer, RagelEmbeddedLexer, **options)
+
+    def analyse_text(text):
+        return '@LANG: d' in text
+
+class RagelCppLexer(DelegatingLexer):
+    """
+    A lexer for `Ragel`_ in a CPP host file.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Ragel in CPP Host'
+    aliases = ['ragel-cpp']
+    filenames = ['*.rl']
+
+    def __init__(self, **options):
+        super(RagelCppLexer, self).__init__(CppLexer, RagelEmbeddedLexer, **options)
+
+    def analyse_text(text):
+        return '@LANG: c++' in text
+
+class RagelObjectiveCLexer(DelegatingLexer):
+    """
+    A lexer for `Ragel`_ in an Objective C host file.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Ragel in Objective C Host'
+    aliases = ['ragel-objc']
+    filenames = ['*.rl']
+
+    def __init__(self, **options):
+        super(RagelObjectiveCLexer, self).__init__(ObjectiveCLexer,
+                                                   RagelEmbeddedLexer,
+                                                   **options)
+
+    def analyse_text(text):
+        return '@LANG: objc' in text
+
+class RagelJavaLexer(DelegatingLexer):
+    """
+    A lexer for `Ragel`_ in a Java host file.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'Ragel in Java Host'
+    aliases = ['ragel-java']
+    filenames = ['*.rl']
+
+    def __init__(self, **options):
+        super(RagelJavaLexer, self).__init__(JavaLexer, RagelEmbeddedLexer,
+                                             **options)
+
+    def analyse_text(text):
+        return '@LANG: java' in text
+
+class AntlrLexer(RegexLexer):
+    """
+    Generic ANTLR Lexer.
+    Should not be called directly, instead
+    use DelegatingLexer for your target language.
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR'
+    aliases = ['antlr']
+    filenames = []
+
+    _id =          r'[A-Za-z][A-Za-z_0-9]*'
+    _TOKEN_REF =   r'[A-Z][A-Za-z_0-9]*'
+    _RULE_REF =    r'[a-z][A-Za-z_0-9]*'
+    _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
+    _INT = r'[0-9]+'
+
+    tokens = {
+        'whitespace': [
+            (r'\s+', Whitespace),
+        ],
+        'comments': [
+            (r'//.*$', Comment),
+            (r'/\*(.|\n)*?\*/', Comment),
+        ],
+        'root': [
+            include('whitespace'),
+            include('comments'),
+
+            (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
+             bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
+                      Punctuation)),
+            # optionsSpec
+            (r'options\b', Keyword, 'options'),
+            # tokensSpec
+            (r'tokens\b', Keyword, 'tokens'),
+            # attrScope
+            (r'(scope)(\s*)(' + _id + ')(\s*)({)',
+             bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
+                      Punctuation), 'action'),
+            # exception
+            (r'(catch|finally)\b', Keyword, 'exception'),
+            # action
+            (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)({)',
+             bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
+                      Name.Label, Whitespace, Punctuation), 'action'),
+            # rule
+            (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \
+             bygroups(Keyword, Whitespace, Name.Label, Punctuation),
+             ('rule-alts', 'rule-prelims')),
+        ],
+        'exception': [
+            (r'\n', Whitespace, '#pop'),
+            (r'\s', Whitespace),
+            include('comments'),
+
+            (r'\[', Punctuation, 'nested-arg-action'),
+            (r'\{', Punctuation, 'action'),
+        ],
+        'rule-prelims': [
+            include('whitespace'),
+            include('comments'),
+
+            (r'returns\b', Keyword),
+            (r'\[', Punctuation, 'nested-arg-action'),
+            (r'\{', Punctuation, 'action'),
+            # throwsSpec
+            (r'(throws)(\s+)(' + _id + ')',
+             bygroups(Keyword, Whitespace, Name.Label)),
+            (r'(?:(,)(\s*)(' + _id + '))+',
+             bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
+            # optionsSpec
+            (r'options\b', Keyword, 'options'),
+            # ruleScopeSpec - scope followed by target language code or name of action
+            # TODO finish implementing other possibilities for scope
+            # L173 ANTLRv3.g from ANTLR book
+            (r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation),
+            'action'),
+            (r'(scope)(\s+)(' + _id + ')(\s*)(;)',
+             bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
+            # ruleAction
+            (r'(@' + _id + ')(\s*)({)',
+             bygroups(Name.Label, Whitespace, Punctuation), 'action'),
+            # finished prelims, go to rule alts!
+            (r':', Punctuation, '#pop')
+        ],
+        'rule-alts': [
+            include('whitespace'),
+            include('comments'),
+
+            # These might need to go in a separate 'block' state triggered by (
+            (r'options\b', Keyword, 'options'),
+            (r':', Punctuation),
+
+            # literals
+            (r"'(\\\\|\\'|[^'])*'", String),
+            (r'"(\\\\|\\"|[^"])*"', String),
+            (r'<<([^>]|>[^>])>>', String),
+            # identifiers
+            # Tokens start with capital letter.
+            (r'\$?[A-Z_][A-Za-z_0-9]*', Name.Constant),
+            # Rules start with small letter.
+            (r'\$?[a-z_][A-Za-z_0-9]*', Name.Variable),
+            # operators
+            (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
+            (r',', Punctuation),
+            (r'\[', Punctuation, 'nested-arg-action'),
+            (r'\{', Punctuation, 'action'),
+            (r';', Punctuation, '#pop')
+        ],
+        'tokens': [
+            include('whitespace'),
+            include('comments'),
+            (r'{', Punctuation),
+            (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL + ')?(\s*)(;)',
+             bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
+                      String, Whitespace, Punctuation)),
+            (r'}', Punctuation, '#pop'),
+        ],
+        'options': [
+            include('whitespace'),
+            include('comments'),
+            (r'{', Punctuation),
+            (r'(' + _id + r')(\s*)(=)(\s*)(' +
+             '|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)',
+             bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
+                      Text, Whitespace, Punctuation)),
+            (r'}', Punctuation, '#pop'),
+        ],
+        'action': [
+            (r'(' + r'|'.join(( # keep host code in largest possible chunks
+                r'[^\${}\'"/\\]+', # exclude unsafe characters
+
+                # strings and comments may safely contain unsafe characters
+                r'"(\\\\|\\"|[^"])*"', # double quote string
+                r"'(\\\\|\\'|[^'])*'", # single quote string
+                r'//.*$\n?', # single line comment
+                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+
+                # regular expression: There's no reason for it to start
+                # with a * and this stops confusion with comments.
+                r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+                # backslashes are okay, as long as we are not backslashing a %
+                r'\\(?!%)',
+
+                # Now that we've handled regex and javadoc comments
+                # it's safe to let / through.
+                r'/',
+            )) + r')+', Other),
+            (r'(\\)(%)', bygroups(Punctuation, Other)),
+            (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
+             bygroups(Name.Variable, Punctuation, Name.Property)),
+            (r'{', Punctuation, '#push'),
+            (r'}', Punctuation, '#pop'),
+        ],
+        'nested-arg-action': [
+            (r'(' + r'|'.join(( # keep host code in largest possible chunks.
+                r'[^\$\[\]\'"/]+', # exclude unsafe characters
+
+                # strings and comments may safely contain unsafe characters
+                r'"(\\\\|\\"|[^"])*"', # double quote string
+                r"'(\\\\|\\'|[^'])*'", # single quote string
+                r'//.*$\n?', # single line comment
+                r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+
+                # regular expression: There's no reason for it to start
+                # with a * and this stops confusion with comments.
+                r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+                # Now that we've handled regex and javadoc comments
+                # it's safe to let / through.
+                r'/',
+            )) + r')+', Other),
+
+
+            (r'\[', Punctuation, '#push'),
+            (r'\]', Punctuation, '#pop'),
+            (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
+             bygroups(Name.Variable, Punctuation, Name.Property)),
+            (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
+        ]
+    }
+
+# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
+
+# TH: I'm not aware of any language features of C++ that will cause
+# incorrect lexing of C files.  Antlr doesn't appear to make a distinction,
+# so just assume they're C++.  No idea how to make Objective C work in the
+# future.
+
+#class AntlrCLexer(DelegatingLexer):
+#    """
+#    ANTLR with C Target
+#
+#    *New in Pygments 1.1*
+#    """
+#
+#    name = 'ANTLR With C Target'
+#    aliases = ['antlr-c']
+#    filenames = ['*.G', '*.g']
+#
+#    def __init__(self, **options):
+#        super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options)
+#
+#    def analyse_text(text):
+#        return re.match(r'^\s*language\s*=\s*C\s*;', text)
+
+class AntlrCppLexer(DelegatingLexer):
+    """
+    ANTLR with CPP Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With CPP Target'
+    aliases = ['antlr-cpp']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrCppLexer, self).__init__(CppLexer, AntlrLexer, **options)
+
+    def analyse_text(text):
+        return re.match(r'^\s*language\s*=\s*C\s*;', text, re.M)
+
+class AntlrObjectiveCLexer(DelegatingLexer):
+    """
+    ANTLR with ObjectiveC Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With ObjectiveC Target'
+    aliases = ['antlr-objc']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrObjectiveCLexer, self).__init__(ObjectiveCLexer,
+                                                   AntlrLexer, **options)
+
+    def analyse_text(text):
+        return re.match(r'^\s*language\s*=\s*C\s*;', text)
+
+class AntlrCSharpLexer(DelegatingLexer):
+    """
+    ANTLR with C# Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With C# Target'
+    aliases = ['antlr-csharp', 'antlr-c#']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrCSharpLexer, self).__init__(CSharpLexer, AntlrLexer,
+                                               **options)
+
+    def analyse_text(text):
+        return re.match(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
+
+class AntlrPythonLexer(DelegatingLexer):
+    """
+    ANTLR with Python Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With Python Target'
+    aliases = ['antlr-python']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrPythonLexer, self).__init__(PythonLexer, AntlrLexer,
+                                               **options)
+
+    def analyse_text(text):
+        return re.match(r'^\s*language\s*=\s*Python\s*;', text, re.M)
+
+
+class AntlrJavaLexer(DelegatingLexer):
+    """
+    ANTLR with Java Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With Java Target'
+    aliases = ['antlr-java']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrJavaLexer, self).__init__(JavaLexer, AntlrLexer,
+                                             **options)
+
+    def analyse_text(text):
+        return 0.5 # Antlr is Java if not specified
+
+
+class AntlrRubyLexer(DelegatingLexer):
+    """
+    ANTLR with Ruby Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With Ruby Target'
+    aliases = ['antlr-ruby', 'antlr-rb']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrRubyLexer, self).__init__(RubyLexer, AntlrLexer,
+                                             **options)
+
+    def analyse_text(text):
+        return re.match(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
+
+class AntlrPerlLexer(DelegatingLexer):
+    """
+    ANTLR with Perl Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With Perl Target'
+    aliases = ['antlr-perl']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrPerlLexer, self).__init__(PerlLexer, AntlrLexer,
+                                             **options)
+
+    def analyse_text(text):
+        return re.match(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
+
+class AntlrActionScriptLexer(DelegatingLexer):
+    """
+    ANTLR with ActionScript Target
+
+    *New in Pygments 1.1*
+    """
+
+    name = 'ANTLR With ActionScript Target'
+    aliases = ['antlr-as', 'antlr-actionscript']
+    filenames = ['*.G', '*.g']
+
+    def __init__(self, **options):
+        super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer,
+                                                     AntlrLexer, **options)
+
+    def analyse_text(text):
+        return re.match(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py
index fe062a18..f34a69a8 100644
--- a/pygments/lexers/templates.py
+++ b/pygments/lexers/templates.py
@@ -426,11 +426,19 @@ class MakoLexer(RegexLexer):
             (r'''(?sx)
                 (.+?)                # anything, followed by:
                 (?:
+<<<<<<< local
+                 (?<=\n)(?=%|\#\#) |# an eval or comment line
+                 (?=\#\*) |         # multiline comment
+                 (?=</?%) |         # a python block
+                                    # call start or end
+                 (?=\$\{) |         # a substitution
+=======
                  (?<=\n)(?=%|\#\#) | # an eval or comment line
                  (?=\#\*) |          # multiline comment
                  (?=</?%) |          # a python block
                                      # call start or end
                  (?=\$\{) |          # a substitution
+>>>>>>> other
                  (?<=\n)(?=\s*%) |
                                      # - don't consume
                  (\\\n) |            # an escaped newline
diff --git a/tests/examplefiles/ANTLRv3.g b/tests/examplefiles/ANTLRv3.g
new file mode 100644
index 00000000..fbe6d654
--- /dev/null
+++ b/tests/examplefiles/ANTLRv3.g
@@ -0,0 +1,608 @@
+/*
+ [The "BSD licence"]
+ Copyright (c) 2005-2007 Terence Parr
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+    derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/** ANTLR v3 grammar written in ANTLR v3 with AST construction */
+grammar ANTLRv3;
+
+options {
+	output=AST;
+	ASTLabelType=CommonTree;
+}
+
+tokens {
+	DOC_COMMENT;
+	PARSER;	
+    LEXER;
+    RULE;
+    BLOCK;
+    OPTIONAL;
+    CLOSURE;
+    POSITIVE_CLOSURE;
+    SYNPRED;
+    RANGE;
+    CHAR_RANGE;
+    EPSILON;
+    ALT;
+    EOR;
+    EOB;
+    EOA; // end of alt
+    ID;
+    ARG;
+    ARGLIST;
+    RET;
+    LEXER_GRAMMAR;
+    PARSER_GRAMMAR;
+    TREE_GRAMMAR;
+    COMBINED_GRAMMAR;
+    INITACTION;
+    LABEL; // $x used in rewrite rules
+    TEMPLATE;
+    SCOPE='scope';
+    SEMPRED;
+    GATED_SEMPRED; // {p}? =>
+    SYN_SEMPRED; // (...) =>   it's a manually-specified synpred converted to sempred
+    BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
+    FRAGMENT='fragment';
+    TREE_BEGIN='^(';
+    ROOT='^';
+    BANG='!';
+    RANGE='..';
+    REWRITE='->';
+}
+
+@members {
+	int gtype;
+}
+
+grammarDef
+    :   DOC_COMMENT?
+    	(	'lexer'  {gtype=LEXER_GRAMMAR;}    // pure lexer
+    	|   'parser' {gtype=PARSER_GRAMMAR;}   // pure parser
+    	|   'tree'   {gtype=TREE_GRAMMAR;}     // a tree parser
+    	|		     {gtype=COMBINED_GRAMMAR;} // merged parser/lexer
+    	)
+    	g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action*
+    	rule+
+    	EOF
+    	-> ^( {adaptor.create(gtype,$g)}
+    		  id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+
+    		)
+    ;
+
+tokensSpec
+	:	TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+)
+	;
+
+tokenSpec
+	:	TOKEN_REF
+		(	'=' (lit=STRING_LITERAL|lit=CHAR_LITERAL)	-> ^('=' TOKEN_REF $lit)
+		|												-> TOKEN_REF
+		)
+		';'
+	;
+
+attrScope
+	:	'scope' id ACTION -> ^('scope' id ACTION)
+	;
+
+/** Match stuff like @parser::members {int i;} */
+action
+	:	'@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION)
+	;
+
+/** Sometimes the scope names will collide with keywords; allow them as
+ *  ids for action scopes.
+ */
+actionScopeName
+	:	id
+	|	l='lexer'	-> ID[$l]
+    |   p='parser'	-> ID[$p]
+	;
+
+optionsSpec
+	:	OPTIONS (option ';')+ '}' -> ^(OPTIONS option+)
+	;
+
+option
+    :   id '=' optionValue -> ^('=' id optionValue)
+ 	;
+ 	
+optionValue
+    :   id
+    |   STRING_LITERAL
+    |   CHAR_LITERAL
+    |   INT
+    |	s='*' -> STRING_LITERAL[$s]  // used for k=*
+    ;
+
+rule
+scope {
+	String name;
+}
+	:	DOC_COMMENT?
+		( modifier=('protected'|'public'|'private'|'fragment') )?
+		id {$rule::name = $id.text;}
+		'!'?
+		( arg=ARG_ACTION )?
+		( 'returns' rt=ARG_ACTION  )?
+		throwsSpec? optionsSpec? ruleScopeSpec? ruleAction*
+		':'	altList	';'
+		exceptionGroup?
+	    -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG $arg)? ^(RET $rt)?
+	    	  optionsSpec? ruleScopeSpec? ruleAction*
+	    	  altList
+	    	  exceptionGroup?
+	    	  EOR["EOR"]
+	    	)
+	;
+
+/** Match stuff like @init {int i;} */
+ruleAction
+	:	'@' id ACTION -> ^('@' id ACTION)
+	;
+
+throwsSpec
+	:	'throws' id ( ',' id )* -> ^('throws' id+)
+	;
+
+ruleScopeSpec
+	:	'scope' ACTION -> ^('scope' ACTION)
+	|	'scope' id (',' id)* ';' -> ^('scope' id+)
+	|	'scope' ACTION
+		'scope' id (',' id)* ';'
+		-> ^('scope' ACTION id+ )
+	;
+
+block
+    :   lp='('
+		( (opts=optionsSpec)? ':' )?
+		a1=alternative rewrite ( '|' a2=alternative rewrite )*
+        rp=')'
+        -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? alternative+ EOB[$rp,"EOB"] )
+    ;
+
+altList
+@init {
+	// must create root manually as it's used by invoked rules in real antlr tool.
+	// leave here to demonstrate use of {...} in rewrite rule
+	// it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token.
+    CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK");
+}
+    :   a1=alternative rewrite ( '|' a2=alternative rewrite )*
+		-> ^( {blkRoot} (alternative rewrite?)+ EOB["EOB"] )
+    ;
+
+alternative
+@init {
+	Token firstToken = input.LT(1);
+	Token prevToken = input.LT(-1); // either : or | I think
+}
+    :   element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"])
+    |   -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"])
+    ;
+
+exceptionGroup
+	:	( exceptionHandler )+ ( finallyClause )?
+	|	finallyClause
+    ;
+
+exceptionHandler
+    :    'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION)
+    ;
+
+finallyClause
+    :    'finally' ACTION -> ^('finally' ACTION)
+    ;
+
+element
+	:	elementNoOptionSpec
+	;
+
+elementNoOptionSpec
+	:	id (labelOp='='|labelOp='+=') atom
+		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"]))
+		|				-> ^($labelOp id atom)
+		)
+	|	id (labelOp='='|labelOp='+=') block
+		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"]))
+		|				-> ^($labelOp id block)
+		)
+	|	atom
+		(	ebnfSuffix	-> ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"])
+		|				-> atom
+		)
+	|	ebnf
+	|   ACTION
+	|   SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
+	|   treeSpec
+	;
+
+atom:   range ( (op='^'|op='!') -> ^($op range) | -> range )
+    |   terminal
+    |	notSet ( (op='^'|op='!') -> ^($op notSet) | -> notSet )
+    |   RULE_REF ( arg=ARG_ACTION )? ( (op='^'|op='!') )?
+    	-> {$arg!=null&&op!=null}?	^($op RULE_REF $arg)
+    	-> {$arg!=null}?			^(RULE_REF $arg)
+    	-> {$op!=null}?				^($op RULE_REF)
+    	-> RULE_REF
+    ;
+
+notSet
+	:	'~'
+		(	notTerminal	-> ^('~' notTerminal)
+		|	block		-> ^('~' block)
+		)
+	;
+
+treeSpec
+	:	'^(' element ( element )+ ')' -> ^(TREE_BEGIN element+)
+	;
+
+/** Matches ENBF blocks (and token sets via block rule) */
+ebnf
+@init {
+    Token firstToken = input.LT(1);
+}
+@after {
+	$ebnf.tree.getToken().setLine(firstToken.getLine());
+	$ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
+}
+	:	block {Token op=input.LT(1);}
+		(	'?'		-> ^(OPTIONAL[op] block)
+		|	'*'		-> ^(CLOSURE[op] block)
+		|	'+'		-> ^(POSITIVE_CLOSURE[op] block)
+		|   '^'		-> ^('^' block)
+		|   '!'		-> ^('!' block)
+		|   '=>'	// syntactic predicate
+					-> {gtype==COMBINED_GRAMMAR &&
+					    Character.isUpperCase($rule::name.charAt(0))}?
+					   // if lexer rule in combined, leave as pred for lexer
+					   ^(SYNPRED["=>"] block)
+					// in real antlr tool, text for SYN_SEMPRED is predname
+					-> SYN_SEMPRED
+        |			-> block
+		)
+	;
+
+range!
+	:	c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL -> ^(CHAR_RANGE[$c1,".."] $c1 $c2)
+	;
+
+terminal
+    :   (	CHAR_LITERAL				-> CHAR_LITERAL
+    		// Args are only valid for lexer rules
+		|   TOKEN_REF
+			( ARG_ACTION				-> ^(TOKEN_REF ARG_ACTION)
+			|							-> TOKEN_REF
+			)
+		|   STRING_LITERAL				-> STRING_LITERAL
+		|   '.'							-> '.'
+		)	
+		(	'^'							-> ^('^' $terminal)
+		|	'!' 						-> ^('!' $terminal)
+		)?
+	;
+
+notTerminal
+	:   CHAR_LITERAL
+	|	TOKEN_REF
+	|	STRING_LITERAL
+	;
+	
+ebnfSuffix
+@init {
+	Token op = input.LT(1);
+}
+	:	'?'	-> OPTIONAL[op]
+  	|	'*' -> CLOSURE[op]
+   	|	'+' -> POSITIVE_CLOSURE[op]
+	;
+	
+
+
+// R E W R I T E  S Y N T A X
+
+rewrite
+@init {
+	Token firstToken = input.LT(1);
+}
+	:	(rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)*
+		rew2='->' last=rewrite_alternative
+        -> ^($rew $preds $predicated)* ^($rew2 $last)
+	|
+	;
+
+rewrite_alternative
+	:	rewrite_template
+	|	rewrite_tree_alternative
+   	|   /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"])
+	;
+	
+rewrite_template_block
+    :   lp='(' rewrite_template ')' -> ^(BLOCK[$lp,"BLOCK"] rewrite_template EOB[$lp,"EOB"])
+    ;
+
+rewrite_tree_block
+    :   lp='(' rewrite_tree_alternative ')'
+    	-> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"])
+    ;
+
+rewrite_tree_alternative
+    :	rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"])
+    ;
+
+rewrite_tree_element
+	:	rewrite_tree_atom
+	|	rewrite_tree_atom ebnfSuffix
+		-> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"]))
+	|   rewrite_tree
+		(	ebnfSuffix
+			-> ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"])
+		|	-> rewrite_tree
+		)
+	|   rewrite_tree_ebnf
+	;
+
+rewrite_tree_atom
+    :   CHAR_LITERAL
+	|   TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes
+    |   RULE_REF
+	|   STRING_LITERAL
+	|   d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule
+	|	ACTION
+	;
+
+rewrite_tree_ebnf
+@init {
+    Token firstToken = input.LT(1);
+}
+@after {
+	$rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine());
+	$rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
+}
+	:	rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block)
+	;
+	
+rewrite_tree
+	:	'^(' rewrite_tree_atom rewrite_tree_element* ')'
+		-> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* )
+	;
+
+/** Build a tree for a template rewrite:
+      ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
+    where ARGLIST is always there even if no args exist.
+    ID can be "template" keyword.  If first child is ACTION then it's
+    an indirect template ref
+
+    -> foo(a={...}, b={...})
+    -> ({string-e})(a={...}, b={...})  // e evaluates to template name
+    -> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
+	-> {st-expr} // st-expr evaluates to ST
+ */
+rewrite_template
+	:   // -> template(a={...},...) "..."    inline template
+		{input.LT(1).getText().equals("template")}?
+		id lp='(' rewrite_template_args	')'
+		st=( DOUBLE_QUOTE_STRING_LITERAL | DOUBLE_ANGLE_STRING_LITERAL )
+		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $st)
+
+	|	// -> foo(a={...}, ...)
+		rewrite_template_ref
+
+	|	// -> ({expr})(a={...}, ...)
+		rewrite_indirect_template_head
+
+	|	// -> {...}
+		ACTION
+	;
+
+/** -> foo(a={...}, ...) */
+rewrite_template_ref
+	:	id lp='(' rewrite_template_args	')'
+		-> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args)
+	;
+
+/** -> ({expr})(a={...}, ...) */
+rewrite_indirect_template_head
+	:	lp='(' ACTION ')' '(' rewrite_template_args ')'
+		-> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args)
+	;
+
+rewrite_template_args
+	:	rewrite_template_arg (',' rewrite_template_arg)*
+		-> ^(ARGLIST rewrite_template_arg+)
+	|	-> ARGLIST
+	;
+
+rewrite_template_arg
+	:   id '=' ACTION -> ^(ARG[$id.start] id ACTION)
+	;
+
+id	:	TOKEN_REF -> ID[$TOKEN_REF]
+	|	RULE_REF  -> ID[$RULE_REF]
+	;
+
+// L E X I C A L   R U L E S
+
+SL_COMMENT
+ 	:	'//'
+ 	 	(	' $ANTLR ' SRC // src directive
+ 		|	~('\r'|'\n')*
+		)
+		'\r'? '\n'
+		{$channel=HIDDEN;}
+	;
+
+ML_COMMENT
+	:	'/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/'
+	;
+
+CHAR_LITERAL
+	:	'\'' LITERAL_CHAR '\''
+	;
+
+STRING_LITERAL
+	:	'\'' LITERAL_CHAR LITERAL_CHAR* '\''
+	;
+
+fragment
+LITERAL_CHAR
+	:	ESC
+	|	~('\''|'\\')
+	;
+
+DOUBLE_QUOTE_STRING_LITERAL
+	:	'"' LITERAL_CHAR* '"'
+	;
+
+DOUBLE_ANGLE_STRING_LITERAL
+	:	'<<' .* '>>'
+	;
+
+fragment
+ESC	:	'\\'
+		(	'n'
+		|	'r'
+		|	't'
+		|	'b'
+		|	'f'
+		|	'"'
+		|	'\''
+		|	'\\'
+		|	'>'
+		|	'u' XDIGIT XDIGIT XDIGIT XDIGIT
+		|	. // unknown, leave as it is
+		)
+	;
+
+fragment
+XDIGIT :
+		'0' .. '9'
+	|	'a' .. 'f'
+	|	'A' .. 'F'
+	;
+
+INT	:	'0'..'9'+
+	;
+
+ARG_ACTION
+	:	NESTED_ARG_ACTION
+	;
+
+fragment
+NESTED_ARG_ACTION :
+	'['
+	(	options {greedy=false; k=1;}
+	:	NESTED_ARG_ACTION
+	|	ACTION_STRING_LITERAL
+	|	ACTION_CHAR_LITERAL
+	|	.
+	)*
+	']'
+	{setText(getText().substring(1, getText().length()-1));}
+	;
+
+ACTION
+	:	NESTED_ACTION ( '?' {$type = SEMPRED;} )?
+	;
+
+fragment
+NESTED_ACTION :
+	'{'
+	(	options {greedy=false; k=3;}
+	:	NESTED_ACTION
+	|	SL_COMMENT
+	|	ML_COMMENT
+	|	ACTION_STRING_LITERAL
+	|	ACTION_CHAR_LITERAL
+	|	.
+	)*
+	'}'
+	{$channel = DEFAULT_TOKEN_CHANNEL;}
+   ;
+
+fragment
+ACTION_CHAR_LITERAL
+	:	'\'' (ACTION_ESC|~('\\'|'\'')) '\''
+	;
+
+fragment
+ACTION_STRING_LITERAL
+	:	'"' (ACTION_ESC|~('\\'|'"'))+ '"'
+	;
+
+fragment
+ACTION_ESC
+	:	'\\\''
+	|	'\\"'
+	|	'\\' ~('\''|'"')
+	;
+
+TOKEN_REF
+	:	'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
+	;
+
+RULE_REF
+	:	'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
+	;
+
+/** Match the start of an options section.  Don't allow normal
+ *  action processing on the {...} as it's not a action.
+ */
+OPTIONS
+	:	'options' WS_LOOP '{' {$channel=DEFAULT_TOKEN_CHANNEL;} // WS_LOOP sets channel
+	;
+	
+TOKENS
+	:	'tokens' WS_LOOP '{' {$channel=DEFAULT_TOKEN_CHANNEL;}
+	;
+
+/** Reset the file and line information; useful when the grammar
+ *  has been generated so that errors are shown relative to the
+ *  original file like the old C preprocessor used to do.
+ */
+fragment
+SRC	:	'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT {$channel=HIDDEN;}
+	;
+
+WS	:	(	' '
+		|	'\t'
+		|	'\r'? '\n'
+		)+
+		{$channel=HIDDEN;}
+	;
+
+fragment
+WS_LOOP
+	:	(	WS
+		|	SL_COMMENT
+		|	ML_COMMENT
+		)*
+		{$channel=HIDDEN;}
+	;
+
diff --git a/tests/examplefiles/ragel-cpp_rlscan b/tests/examplefiles/ragel-cpp_rlscan
new file mode 100644
index 00000000..4b146329
--- /dev/null
+++ b/tests/examplefiles/ragel-cpp_rlscan
@@ -0,0 +1,280 @@
+/*
+ * Lexes Ragel input files.
+ *
+ * @LANG: c++
+ *
+ * Test works with split code gen.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+void escapeXML( const char *data )
+{
+	while ( *data != 0 ) {
+		switch ( *data ) {
+			case '<': cout << "&lt;"; break;
+			case '>': cout << "&gt;"; break;
+			case '&': cout << "&amp;"; break;
+			default: cout << *data; break;
+		}
+		data += 1;
+	}
+}
+
+void escapeXML( char c )
+{
+	switch ( c ) {
+		case '<': cout << "&lt;"; break;
+		case '>': cout << "&gt;"; break;
+		case '&': cout << "&amp;"; break;
+		default: cout << c; break;
+	}
+}
+
+void escapeXML( const char *data, int len )
+{
+	for ( const char *end = data + len; data != end; data++  ) {
+		switch ( *data ) {
+			case '<': cout << "&lt;"; break;
+			case '>': cout << "&gt;"; break;
+			case '&': cout << "&amp;"; break;
+			default: cout << *data; break;
+		}
+	}
+}
+
+inline void write( const char *data )
+{
+	cout << data;
+}
+
+inline void write( char c )
+{
+	cout << c;
+}
+
+inline void write( const char *data, int len )
+{
+	cout.write( data, len );
+}
+
+
+%%{
+	machine RagelScan;
+
+	word = [a-zA-Z_][a-zA-Z_0-9]*;
+	integer = [0-9]+;
+	hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
+
+	default = ^0;
+	EOF = 0;
+
+	# Handles comments in outside code and inline blocks.
+	c_comment := 
+		( default* :>> '*/' )
+		${ escapeXML( fc ); }
+		@{ fret; };
+
+	action emit {
+		escapeXML( ts, te-ts );
+	}
+
+	#
+	# Inline action code
+	#
+
+	ilscan := |*
+
+		"'" ( [^'\\] | /\\./ )* "'" => emit;
+		'"' ( [^"\\] | /\\./ )* '"' => emit;
+		'/*' {
+			write( "/*" );
+			fcall c_comment;
+		};
+		'//' [^\n]* '\n' => emit;
+
+		'{' {
+			write( '{' );
+			inline_depth += 1; 
+		};
+
+		'}' {
+			write( '}' );
+			/* If dropping down to the last } then return 
+			 * to ragel code. */
+			if ( --inline_depth == 0 ) {
+				write( "</inline>\n" );
+				fgoto rlscan;
+			}
+		};
+
+		default => { escapeXML( *ts ); };
+	*|;
+
+	#
+	# Ragel Tokens
+	#
+
+	rlscan := |*
+		'}%%' {
+			if ( !single_line ) {
+				write( "</section>\n" );
+				fgoto main;
+			}
+		};
+
+		'\n' {
+			if ( single_line ) {
+				write( "</section>\n" );
+				fgoto main;
+			}
+		};
+
+		# Word
+		word {
+			write( "<word>" );
+			write( ts, te-ts );
+			write( "</word>\n" );
+		};
+
+		# Decimal integer.
+		integer {
+			write( "<int>" );
+			write( ts, te-ts );
+			write( "</int>\n" );
+		};
+
+		# Hexidecimal integer.
+		hex {
+			write( "<hex>" );
+			write( ts, te-ts );
+			write( "</hex>\n" );
+		};
+
+		# Consume comments.
+		'#' [^\n]* '\n';
+
+		# Single literal string.
+		"'" ( [^'\\] | /\\./ )* "'" {
+			write( "<single_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</single_lit>\n" );
+		};
+
+		# Double literal string.
+		'"' ( [^"\\] | /\\./ )* '"' {
+			write( "<double_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</double_lit>\n" );
+		};
+
+		# Or literal.
+		'[' ( [^\]\\] | /\\./ )* ']' {
+			write( "<or_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</or_lit>\n" );
+		};
+
+		# Regex Literal.
+		'/' ( [^/\\] | /\\./ ) * '/' {
+			write( "<re_lit>" );
+			escapeXML( ts, te-ts );
+			write( "</re_lit>\n" );
+		};
+
+		# Open an inline block
+		'{' {
+			inline_depth = 1;
+			write( "<inline>{" );
+			fgoto ilscan;
+		};
+
+		punct {
+			write( "<symbol>" );
+			escapeXML( fc );
+			write( "</symbol>\n" );
+		};
+		
+		default;
+	*|;
+
+	#
+	# Outside code.
+	#
+
+	main := |*
+
+		"'" ( [^'\\] | /\\./ )* "'" => emit;
+		'"' ( [^"\\] | /\\./ )* '"' => emit;
+
+		'/*' {
+			escapeXML( ts, te-ts );
+			fcall c_comment;
+		};
+
+		'//' [^\n]* '\n' => emit;
+
+		'%%{' { 
+			write( "<section>\n" );
+			single_line = false;
+			fgoto rlscan;
+		};
+
+		'%%' {
+			write( "<section>\n" ); 
+			single_line = true; 
+			fgoto rlscan;
+		};
+
+		default { 
+			escapeXML( *ts );
+		};
+
+		# EOF.
+		EOF;
+	*|;
+}%%
+
+%% write data nofinal;
+
+void test( const char *data )
+{
+	std::ios::sync_with_stdio(false);
+
+	int cs, act;
+	const char *ts, *te;
+	int stack[1], top;
+
+	bool single_line = false;
+	int inline_depth = 0;
+
+	%% write init;
+
+	/* Read in a block. */
+	const char *p = data;
+	const char *pe = data + strlen( data );
+	const char *eof = pe;
+	%% write exec;
+
+	if ( cs == RagelScan_error ) {
+		/* Machine failed before finding a token. */
+		cerr << "PARSE ERROR" << endl;
+		exit(1);
+	}
+}
+
+#define BUFSIZE 2048
+
+int main()
+{
+	std::ios::sync_with_stdio(false);
+
+	test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n");
+
+	return 0;
+}
diff --git a/tests/examplefiles/ragel-cpp_snippet b/tests/examplefiles/ragel-cpp_snippet
new file mode 100644
index 00000000..203ae28b
--- /dev/null
+++ b/tests/examplefiles/ragel-cpp_snippet
@@ -0,0 +1,2 @@
+	%% write init;
+	/* Read in a block. */
author	thatch <devnull@localhost>	2009-01-04 09:01:06 -0600
committer	thatch <devnull@localhost>	2009-01-04 09:01:06 -0600
commit	6b17d2b85ec9cb15976f7738cdc03544f0f86e27 (patch)
tree	b6763872b9fa331b405dedc7d276f2504bfc985c
parent	64ef7976772355f0ab4af68c4a572483dcbc15ad (diff)
parent	5ab91c0bb87e55c356a0342020afe6e0f599e5b0 (diff)
download	pygments-6b17d2b85ec9cb15976f7738cdc03544f0f86e27.tar.gz