summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorthatch <devnull@localhost>2009-01-04 09:01:06 -0600
committerthatch <devnull@localhost>2009-01-04 09:01:06 -0600
commit6b17d2b85ec9cb15976f7738cdc03544f0f86e27 (patch)
treeb6763872b9fa331b405dedc7d276f2504bfc985c
parent64ef7976772355f0ab4af68c4a572483dcbc15ad (diff)
parent5ab91c0bb87e55c356a0342020afe6e0f599e5b0 (diff)
downloadpygments-6b17d2b85ec9cb15976f7738cdc03544f0f86e27.tar.gz
Merge with pygments-main
-rw-r--r--AUTHORS1
-rw-r--r--CHANGES7
-rw-r--r--Makefile2
-rw-r--r--pygments/lexers/_mapping.py17
-rw-r--r--pygments/lexers/compiled.py3
-rw-r--r--pygments/lexers/other.py2
-rw-r--r--pygments/lexers/parsers.py670
-rw-r--r--pygments/lexers/templates.py8
-rw-r--r--tests/examplefiles/ANTLRv3.g608
-rw-r--r--tests/examplefiles/ragel-cpp_rlscan280
-rw-r--r--tests/examplefiles/ragel-cpp_snippet2
11 files changed, 1595 insertions, 5 deletions
diff --git a/AUTHORS b/AUTHORS
index 5ab88bdb..6005e6eb 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -32,6 +32,7 @@ Other contributors, listed alphabetically, are:
* Kirk McDonald -- D lexer
* Lukas Meuser -- BBCode formatter, Lua lexer
* Paulo Moura -- Logtalk lexer
+* Ana Nelson -- Ragel, ANTLR lexers
* Ronny Pfannschmidt -- BBCode lexer
* Benjamin Peterson -- Test suite refactoring
* Andre Roberge -- Tango style
diff --git a/CHANGES b/CHANGES
index 830c9013..30cf0fee 100644
--- a/CHANGES
+++ b/CHANGES
@@ -7,10 +7,11 @@ Version 1.1
- Lexers added:
- * GLSL (#369)
+ * Antlr/Ragel, thanks to Ana Nelson
+ * (Ba)sh shell
* Erlang shell
- * (Ba)sh shell (#349)
- * Prolog (#373)
+ * GLSL
+ * Prolog
- Fix a bug lexing extended Ruby strings.
diff --git a/Makefile b/Makefile
index 4f3b9c3e..a81376f0 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ export PYTHONPATH = $(shell echo "$$PYTHONPATH"):$(shell python -c 'import os; p
all: clean-pyc check test
check:
- @$(PYTHON) scripts/detect_missing_analyse_text.py
+ @$(PYTHON) scripts/detect_missing_analyse_text.py || true
@$(PYTHON) scripts/check_sources.py -i apidocs -i pygments/lexers/_mapping.py \
-i docs/build -i pygments/formatters/_mapping.py -i pygments/unistring.py \
-i pygments/lexers/_vimbuiltins.py
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 2a678764..1b4b8c37 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -16,6 +16,15 @@
LEXERS = {
'ActionScript3Lexer': ('pygments.lexers.web', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')),
'ActionScriptLexer': ('pygments.lexers.web', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')),
+ 'AntlrActionScriptLexer': ('pygments.lexers.parsers', 'ANTLR With ActionScript Target', ('antlr-as', 'antlr-actionscript'), ('*.G', '*.g'), ()),
+ 'AntlrCSharpLexer': ('pygments.lexers.parsers', 'ANTLR With C# Target', ('antlr-csharp', 'antlr-c#'), ('*.G', '*.g'), ()),
+ 'AntlrCppLexer': ('pygments.lexers.parsers', 'ANTLR With CPP Target', ('antlr-cpp',), ('*.G', '*.g'), ()),
+ 'AntlrJavaLexer': ('pygments.lexers.parsers', 'ANTLR With Java Target', ('antlr-java',), ('*.G', '*.g'), ()),
+ 'AntlrLexer': ('pygments.lexers.parsers', 'ANTLR', ('antlr',), (), ()),
+ 'AntlrObjectiveCLexer': ('pygments.lexers.parsers', 'ANTLR With ObjectiveC Target', ('antlr-objc',), ('*.G', '*.g'), ()),
+ 'AntlrPerlLexer': ('pygments.lexers.parsers', 'ANTLR With Perl Target', ('antlr-perl',), ('*.G', '*.g'), ()),
+ 'AntlrPythonLexer': ('pygments.lexers.parsers', 'ANTLR With Python Target', ('antlr-python',), ('*.G', '*.g'), ()),
+ 'AntlrRubyLexer': ('pygments.lexers.parsers', 'ANTLR With Ruby Target', ('antlr-ruby', 'antlr-rb'), ('*.G', '*.g'), ()),
'ApacheConfLexer': ('pygments.lexers.text', 'ApacheConf', ('apacheconf', 'aconf', 'apache'), ('.htaccess', 'apache.conf', 'apache2.conf'), ('text/x-apacheconf',)),
'AppleScriptLexer': ('pygments.lexers.other', 'AppleScript', ('applescript',), ('*.applescript',), ()),
'BBCodeLexer': ('pygments.lexers.text', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)),
@@ -118,6 +127,14 @@ LEXERS = {
'PythonConsoleLexer': ('pygments.lexers.agile', 'Python console session', ('pycon',), (), ('text/x-python-doctest',)),
'PythonLexer': ('pygments.lexers.agile', 'Python', ('python', 'py'), ('*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript'), ('text/x-python', 'application/x-python')),
'PythonTracebackLexer': ('pygments.lexers.agile', 'Python Traceback', ('pytb',), ('*.pytb',), ('text/x-python-traceback',)),
+ 'RagelCLexer': ('pygments.lexers.parsers', 'Ragel in C Host', ('ragel-c',), ('*.rl',), ()),
+ 'RagelCppLexer': ('pygments.lexers.parsers', 'Ragel in CPP Host', ('ragel-cpp',), ('*.rl',), ()),
+ 'RagelDLexer': ('pygments.lexers.parsers', 'Ragel in D Host', ('ragel-d',), ('*.rl',), ()),
+ 'RagelEmbeddedLexer': ('pygments.lexers.parsers', 'Embedded Ragel', ('ragel-em',), ('*.rl',), ()),
+ 'RagelJavaLexer': ('pygments.lexers.parsers', 'Ragel in Java Host', ('ragel-java',), ('*.rl',), ()),
+ 'RagelLexer': ('pygments.lexers.parsers', 'Ragel', ('ragel',), (), ()),
+ 'RagelObjectiveCLexer': ('pygments.lexers.parsers', 'Ragel in Objective C Host', ('ragel-objc',), ('*.rl',), ()),
+ 'RagelRubyLexer': ('pygments.lexers.parsers', 'Ragel in Ruby Host', ('ragel-ruby', 'ragel-rb'), ('*.rl',), ()),
'RawTokenLexer': ('pygments.lexers.special', 'Raw token data', ('raw',), (), ('application/x-pygments-tokens',)),
'RedcodeLexer': ('pygments.lexers.other', 'Redcode', ('redcode',), ('*.cw',), ()),
'RhtmlLexer': ('pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)),
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index 3624b270..643ceaad 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -1340,6 +1340,9 @@ class GLShaderLexer(RegexLexer):
}
class PrologLexer(RegexLexer):
+ """
+ Lexer for Prolog files.
+ """
name = 'Prolog'
aliases = ['prolog']
filenames = ['*.prolog', '*.pro', '*.pl']
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index 83913570..dd187130 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -347,7 +347,7 @@ class BashLexer(RegexLexer):
r'export|false|fc|fg|getopts|hash|help|history|jobs|kill|let|'
r'local|logout|popd|printf|pushd|pwd|read|readonly|set|shift|'
r'shopt|source|suspend|test|time|times|trap|true|type|typeset|'
- r'ulimit|umask|unalias|unset|wait)\s*\b',
+ r'ulimit|umask|unalias|unset|wait)\s*\b(?!\.)',
Name.Builtin),
(r'#.*\n', Comment),
(r'\\[\w\W]', String.Escape),
diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py
new file mode 100644
index 00000000..00f1ee3e
--- /dev/null
+++ b/pygments/lexers/parsers.py
@@ -0,0 +1,670 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.parsers
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ Lexers for parser generators.
+
+ :copyright: 2008-2009 by Ana Nelson <ana@ananelson.com>, Tim Hatch.
+
+ :license: BSD, see LICENSE for more details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, DelegatingLexer, \
+ include, bygroups, using, this
+from pygments.token import Error, Punctuation, Generic, Other, \
+ Text, Comment, Operator, Keyword, Name, String, Number, Whitespace
+from pygments.lexers.compiled import JavaLexer, CLexer, CppLexer, \
+ ObjectiveCLexer, DLexer
+from pygments.lexers.dotnet import CSharpLexer
+from pygments.lexers.agile import RubyLexer, PythonLexer, PerlLexer
+from pygments.lexers.web import ActionScriptLexer
+# Use TextLexer during development to just focus on one part of a delegating
+# lexer.
+from pygments.lexers.special import TextLexer
+
+__all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
+ 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
+ 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
+ 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
+ #'AntlrCLexer',
+ 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
+ 'AntlrJavaLexer', "AntlrActionScriptLexer"]
+
+class RagelLexer(RegexLexer):
+ """
+ A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for
+ fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead
+ (or one of the language-specific subclasses).
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Ragel'
+ aliases = ['ragel']
+ filenames = []
+
+ tokens = {
+ 'whitespace': [
+ (r'\s+', Whitespace)
+ ],
+ 'comments': [
+ (r'\#.*$', Comment),
+ ],
+ 'keywords': [
+ (r'(access|action|alphtype)\b', Keyword),
+ (r'(getkey|write|machine|include)\b', Keyword),
+ (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
+ (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
+ ],
+ 'numbers': [
+ (r'0x[0-9A-Fa-f]+', Number.Hex),
+ (r'[+-]?[0-9]+', Number.Integer),
+ ],
+ 'literals': [
+ (r'"(\\\\|\\"|[^"])*"', String), # double quote string
+ (r"'(\\\\|\\'|[^'])*'", String), # single quote string
+ (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
+ (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
+ ],
+ 'identifiers': [
+ (r'[a-zA-Z_][a-zA-Z_0-9]*', Name.Variable),
+ ],
+ 'operators': [
+ (r',', Operator), # Join
+ (r'\||&|-|--', Operator), # Union, Intersection and Subtraction
+ (r'\.|<:|:>|:>>', Operator), # Concatention
+ (r':', Operator), # Label
+ (r'->', Operator), # Epsilon Transition
+ (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
+ (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
+ (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
+ (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
+ (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
+ (r'>|@|\$|%', Operator), # Transition Actions and Priorities
+ (r'\*|\?|\+|{[0-9]*,[0-9]*}', Operator), # Repetition
+ (r'!|\^', Operator), # Negation
+ (r'\(|\)', Operator), # Grouping
+ ],
+ 'root': [
+ include('literals'),
+ include('whitespace'),
+ include('comments'),
+ include('keywords'),
+ include('numbers'),
+ include('identifiers'),
+ include('operators'),
+ (r'{', Punctuation, 'host'),
+ (r'=', Operator),
+ (r';', Punctuation),
+ ],
+ 'host': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks
+ r'[^{}\'"/#]+', # exclude unsafe characters
+ r'[^\\][\\][{}]', # allow escaped { or }
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'//.*$\n?', # single line comment
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+ r'\#.*$\n?', # ruby comment
+
+ # regular expression: There's no reason for it to start
+ # with a * and this stops confusion with comments.
+ r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+ # / is safe now that we've handled regex and javadoc comments
+ r'/',
+ )) + r')+', Other),
+
+ (r'{', Punctuation, '#push'),
+ (r'}', Punctuation, '#pop'),
+ ],
+ }
+
+class RagelEmbeddedLexer(RegexLexer):
+ """
+ A lexer for `Ragel`_ embedded in a host language file.
+
+ This will only highlight Ragel statements. If you want host language
+ highlighting then call the language-specific Ragel lexer.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Embedded Ragel'
+ aliases = ['ragel-em']
+ filenames = ['*.rl']
+
+ tokens = {
+ 'root': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks
+ r'[^%\'"/#]+', # exclude unsafe characters
+ r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+ r'//.*$\n?', # single line comment
+ r'\#.*$\n?', # ruby/ragel comment
+ r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
+
+ # / is safe now that we've handled regex and javadoc comments
+ r'/',
+ )) + r')+', Other),
+
+ # Single Line FSM.
+ # Please don't put a quoted newline in a single line FSM.
+ # That's just mean. It will break this.
+ (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
+ using(RagelLexer),
+ Punctuation, Text)),
+
+ # Multi Line FSM.
+ (r'(%%%%|%%){', Punctuation, 'multi-line-fsm'),
+ ],
+ 'multi-line-fsm': [
+ (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
+ r'(' + r'|'.join((
+ r'[^}\'"\[/#]', # exclude unsafe characters
+ r'}(?=[^%]|$)', # } is okay as long as it's not followed by %
+ r'}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
+ r'[^\\][\\][{}]', # ...and } is okay if it's escaped
+
+ # allow / if it's preceded with one of these symbols
+ # (ragel EOF actions)
+ r'(>|\$|%|<|@|<>)/',
+
+ # specifically allow regex followed immediately by *
+ # so it doesn't get mistaken for a comment
+ r'/(?!\*)(\\\\|\\/|[^/])*/\*',
+
+ # allow / as long as it's not followed by another / or by a *
+ r'/(?=[^/\*]|$)',
+
+ # We want to match as many of these as we can in one block.
+ # Not sure if we need the + sign here,
+ # does it help performance?
+ )) + r')+',
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+ r'//.*$\n?', # single line comment
+ r'\#.*$\n?', # ruby/ragel comment
+ )) + r')+', using(RagelLexer)),
+
+ (r'}%%', Punctuation, '#pop'),
+ ]
+ }
+
+ def analyse_text(text):
+ return '@LANG: indep' in text or 0.1
+
+class RagelRubyLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a Ruby host file.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Ragel in Ruby Host'
+ aliases = ['ragel-ruby', 'ragel-rb']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: ruby' in text
+
+class RagelCLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a C host file.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Ragel in C Host'
+ aliases = ['ragel-c']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelCLexer, self).__init__(CLexer, RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: c' in text
+
+class RagelDLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a D host file.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Ragel in D Host'
+ aliases = ['ragel-d']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelDLexer, self).__init__(DLexer, RagelEmbeddedLexer, **options)
+
+ def analyse_text(text):
+ return '@LANG: d' in text
+
+class RagelCppLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a CPP host file.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Ragel in CPP Host'
+ aliases = ['ragel-cpp']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelCppLexer, self).__init__(CppLexer, RagelEmbeddedLexer, **options)
+
+ def analyse_text(text):
+ return '@LANG: c++' in text
+
+class RagelObjectiveCLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in an Objective C host file.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Ragel in Objective C Host'
+ aliases = ['ragel-objc']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelObjectiveCLexer, self).__init__(ObjectiveCLexer,
+ RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: objc' in text
+
+class RagelJavaLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a Java host file.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'Ragel in Java Host'
+ aliases = ['ragel-java']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelJavaLexer, self).__init__(JavaLexer, RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: java' in text
+
+class AntlrLexer(RegexLexer):
+ """
+ Generic ANTLR Lexer.
+ Should not be called directly, instead
+ use DelegatingLexer for your target language.
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR'
+ aliases = ['antlr']
+ filenames = []
+
+ _id = r'[A-Za-z][A-Za-z_0-9]*'
+ _TOKEN_REF = r'[A-Z][A-Za-z_0-9]*'
+ _RULE_REF = r'[a-z][A-Za-z_0-9]*'
+ _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
+ _INT = r'[0-9]+'
+
+ tokens = {
+ 'whitespace': [
+ (r'\s+', Whitespace),
+ ],
+ 'comments': [
+ (r'//.*$', Comment),
+ (r'/\*(.|\n)*?\*/', Comment),
+ ],
+ 'root': [
+ include('whitespace'),
+ include('comments'),
+
+ (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
+ Punctuation)),
+ # optionsSpec
+ (r'options\b', Keyword, 'options'),
+ # tokensSpec
+ (r'tokens\b', Keyword, 'tokens'),
+ # attrScope
+ (r'(scope)(\s*)(' + _id + ')(\s*)({)',
+ bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
+ Punctuation), 'action'),
+ # exception
+ (r'(catch|finally)\b', Keyword, 'exception'),
+ # action
+ (r'(@' + _id + ')(\s*)(::)?(\s*)(' + _id + ')(\s*)({)',
+ bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
+ Name.Label, Whitespace, Punctuation), 'action'),
+ # rule
+ (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', \
+ bygroups(Keyword, Whitespace, Name.Label, Punctuation),
+ ('rule-alts', 'rule-prelims')),
+ ],
+ 'exception': [
+ (r'\n', Whitespace, '#pop'),
+ (r'\s', Whitespace),
+ include('comments'),
+
+ (r'\[', Punctuation, 'nested-arg-action'),
+ (r'\{', Punctuation, 'action'),
+ ],
+ 'rule-prelims': [
+ include('whitespace'),
+ include('comments'),
+
+ (r'returns\b', Keyword),
+ (r'\[', Punctuation, 'nested-arg-action'),
+ (r'\{', Punctuation, 'action'),
+ # throwsSpec
+ (r'(throws)(\s+)(' + _id + ')',
+ bygroups(Keyword, Whitespace, Name.Label)),
+ (r'(?:(,)(\s*)(' + _id + '))+',
+ bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
+ # optionsSpec
+ (r'options\b', Keyword, 'options'),
+ # ruleScopeSpec - scope followed by target language code or name of action
+ # TODO finish implementing other possibilities for scope
+ # L173 ANTLRv3.g from ANTLR book
+ (r'(scope)(\s+)({)', bygroups(Keyword, Whitespace, Punctuation),
+ 'action'),
+ (r'(scope)(\s+)(' + _id + ')(\s*)(;)',
+ bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
+ # ruleAction
+ (r'(@' + _id + ')(\s*)({)',
+ bygroups(Name.Label, Whitespace, Punctuation), 'action'),
+ # finished prelims, go to rule alts!
+ (r':', Punctuation, '#pop')
+ ],
+ 'rule-alts': [
+ include('whitespace'),
+ include('comments'),
+
+ # These might need to go in a separate 'block' state triggered by (
+ (r'options\b', Keyword, 'options'),
+ (r':', Punctuation),
+
+ # literals
+ (r"'(\\\\|\\'|[^'])*'", String),
+ (r'"(\\\\|\\"|[^"])*"', String),
+ (r'<<([^>]|>[^>])>>', String),
+ # identifiers
+ # Tokens start with capital letter.
+ (r'\$?[A-Z_][A-Za-z_0-9]*', Name.Constant),
+ # Rules start with small letter.
+ (r'\$?[a-z_][A-Za-z_0-9]*', Name.Variable),
+ # operators
+ (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
+ (r',', Punctuation),
+ (r'\[', Punctuation, 'nested-arg-action'),
+ (r'\{', Punctuation, 'action'),
+ (r';', Punctuation, '#pop')
+ ],
+ 'tokens': [
+ include('whitespace'),
+ include('comments'),
+ (r'{', Punctuation),
+ (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL + ')?(\s*)(;)',
+ bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
+ String, Whitespace, Punctuation)),
+ (r'}', Punctuation, '#pop'),
+ ],
+ 'options': [
+ include('whitespace'),
+ include('comments'),
+ (r'{', Punctuation),
+ (r'(' + _id + r')(\s*)(=)(\s*)(' +
+ '|'.join((_id, _STRING_LITERAL, _INT, '\*'))+ ')(\s*)(;)',
+ bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
+ Text, Whitespace, Punctuation)),
+ (r'}', Punctuation, '#pop'),
+ ],
+ 'action': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks
+ r'[^\${}\'"/\\]+', # exclude unsafe characters
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'//.*$\n?', # single line comment
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+
+ # regular expression: There's no reason for it to start
+ # with a * and this stops confusion with comments.
+ r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+ # backslashes are okay, as long as we are not backslashing a %
+ r'\\(?!%)',
+
+ # Now that we've handled regex and javadoc comments
+ # it's safe to let / through.
+ r'/',
+ )) + r')+', Other),
+ (r'(\\)(%)', bygroups(Punctuation, Other)),
+ (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
+ bygroups(Name.Variable, Punctuation, Name.Property)),
+ (r'{', Punctuation, '#push'),
+ (r'}', Punctuation, '#pop'),
+ ],
+ 'nested-arg-action': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks.
+ r'[^\$\[\]\'"/]+', # exclude unsafe characters
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'//.*$\n?', # single line comment
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+
+ # regular expression: There's no reason for it to start
+ # with a * and this stops confusion with comments.
+ r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+ # Now that we've handled regex and javadoc comments
+ # it's safe to let / through.
+ r'/',
+ )) + r')+', Other),
+
+
+ (r'\[', Punctuation, '#push'),
+ (r'\]', Punctuation, '#pop'),
+ (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
+ bygroups(Name.Variable, Punctuation, Name.Property)),
+ (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
+ ]
+ }
+
+# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
+
+# TH: I'm not aware of any language features of C++ that will cause
+# incorrect lexing of C files. Antlr doesn't appear to make a distinction,
+# so just assume they're C++. No idea how to make Objective C work in the
+# future.
+
+#class AntlrCLexer(DelegatingLexer):
+# """
+# ANTLR with C Target
+#
+# *New in Pygments 1.1*
+# """
+#
+# name = 'ANTLR With C Target'
+# aliases = ['antlr-c']
+# filenames = ['*.G', '*.g']
+#
+# def __init__(self, **options):
+# super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options)
+#
+# def analyse_text(text):
+# return re.match(r'^\s*language\s*=\s*C\s*;', text)
+
+class AntlrCppLexer(DelegatingLexer):
+ """
+ ANTLR with CPP Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With CPP Target'
+ aliases = ['antlr-cpp']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrCppLexer, self).__init__(CppLexer, AntlrLexer, **options)
+
+ def analyse_text(text):
+ return re.match(r'^\s*language\s*=\s*C\s*;', text, re.M)
+
+class AntlrObjectiveCLexer(DelegatingLexer):
+ """
+ ANTLR with ObjectiveC Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With ObjectiveC Target'
+ aliases = ['antlr-objc']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrObjectiveCLexer, self).__init__(ObjectiveCLexer,
+ AntlrLexer, **options)
+
+ def analyse_text(text):
+ return re.match(r'^\s*language\s*=\s*C\s*;', text)
+
+class AntlrCSharpLexer(DelegatingLexer):
+ """
+ ANTLR with C# Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With C# Target'
+ aliases = ['antlr-csharp', 'antlr-c#']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrCSharpLexer, self).__init__(CSharpLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return re.match(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
+
+class AntlrPythonLexer(DelegatingLexer):
+ """
+ ANTLR with Python Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With Python Target'
+ aliases = ['antlr-python']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrPythonLexer, self).__init__(PythonLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return re.match(r'^\s*language\s*=\s*Python\s*;', text, re.M)
+
+
+class AntlrJavaLexer(DelegatingLexer):
+ """
+ ANTLR with Java Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With Java Target'
+ aliases = ['antlr-java']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrJavaLexer, self).__init__(JavaLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return 0.5 # Antlr is Java if not specified
+
+
+class AntlrRubyLexer(DelegatingLexer):
+ """
+ ANTLR with Ruby Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With Ruby Target'
+ aliases = ['antlr-ruby', 'antlr-rb']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrRubyLexer, self).__init__(RubyLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return re.match(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
+
+class AntlrPerlLexer(DelegatingLexer):
+ """
+ ANTLR with Perl Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With Perl Target'
+ aliases = ['antlr-perl']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrPerlLexer, self).__init__(PerlLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return re.match(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
+
+class AntlrActionScriptLexer(DelegatingLexer):
+ """
+ ANTLR with ActionScript Target
+
+ *New in Pygments 1.1*
+ """
+
+ name = 'ANTLR With ActionScript Target'
+ aliases = ['antlr-as', 'antlr-actionscript']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer,
+ AntlrLexer, **options)
+
+ def analyse_text(text):
+ return re.match(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py
index fe062a18..f34a69a8 100644
--- a/pygments/lexers/templates.py
+++ b/pygments/lexers/templates.py
@@ -426,11 +426,19 @@ class MakoLexer(RegexLexer):
(r'''(?sx)
(.+?) # anything, followed by:
(?:
+<<<<<<< local
+ (?<=\n)(?=%|\#\#) |# an eval or comment line
+ (?=\#\*) | # multiline comment
+ (?=</?%) | # a python block
+ # call start or end
+ (?=\$\{) | # a substitution
+=======
(?<=\n)(?=%|\#\#) | # an eval or comment line
(?=\#\*) | # multiline comment
(?=</?%) | # a python block
# call start or end
(?=\$\{) | # a substitution
+>>>>>>> other
(?<=\n)(?=\s*%) |
# - don't consume
(\\\n) | # an escaped newline
diff --git a/tests/examplefiles/ANTLRv3.g b/tests/examplefiles/ANTLRv3.g
new file mode 100644
index 00000000..fbe6d654
--- /dev/null
+++ b/tests/examplefiles/ANTLRv3.g
@@ -0,0 +1,608 @@
+/*
+ [The "BSD licence"]
+ Copyright (c) 2005-2007 Terence Parr
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/** ANTLR v3 grammar written in ANTLR v3 with AST construction */
+grammar ANTLRv3;
+
+options {
+ output=AST;
+ ASTLabelType=CommonTree;
+}
+
+tokens {
+ DOC_COMMENT;
+ PARSER;
+ LEXER;
+ RULE;
+ BLOCK;
+ OPTIONAL;
+ CLOSURE;
+ POSITIVE_CLOSURE;
+ SYNPRED;
+ RANGE;
+ CHAR_RANGE;
+ EPSILON;
+ ALT;
+ EOR;
+ EOB;
+ EOA; // end of alt
+ ID;
+ ARG;
+ ARGLIST;
+ RET;
+ LEXER_GRAMMAR;
+ PARSER_GRAMMAR;
+ TREE_GRAMMAR;
+ COMBINED_GRAMMAR;
+ INITACTION;
+ LABEL; // $x used in rewrite rules
+ TEMPLATE;
+ SCOPE='scope';
+ SEMPRED;
+ GATED_SEMPRED; // {p}? =>
+ SYN_SEMPRED; // (...) => it's a manually-specified synpred converted to sempred
+ BACKTRACK_SEMPRED; // auto backtracking mode syn pred converted to sempred
+ FRAGMENT='fragment';
+ TREE_BEGIN='^(';
+ ROOT='^';
+ BANG='!';
+ RANGE='..';
+ REWRITE='->';
+}
+
+@members {
+ int gtype;
+}
+
+grammarDef
+ : DOC_COMMENT?
+ ( 'lexer' {gtype=LEXER_GRAMMAR;} // pure lexer
+ | 'parser' {gtype=PARSER_GRAMMAR;} // pure parser
+ | 'tree' {gtype=TREE_GRAMMAR;} // a tree parser
+ | {gtype=COMBINED_GRAMMAR;} // merged parser/lexer
+ )
+ g='grammar' id ';' optionsSpec? tokensSpec? attrScope* action*
+ rule+
+ EOF
+ -> ^( {adaptor.create(gtype,$g)}
+ id DOC_COMMENT? optionsSpec? tokensSpec? attrScope* action* rule+
+ )
+ ;
+
+tokensSpec
+ : TOKENS tokenSpec+ '}' -> ^(TOKENS tokenSpec+)
+ ;
+
+tokenSpec
+ : TOKEN_REF
+ ( '=' (lit=STRING_LITERAL|lit=CHAR_LITERAL) -> ^('=' TOKEN_REF $lit)
+ | -> TOKEN_REF
+ )
+ ';'
+ ;
+
+attrScope
+ : 'scope' id ACTION -> ^('scope' id ACTION)
+ ;
+
+/** Match stuff like @parser::members {int i;} */
+action
+ : '@' (actionScopeName '::')? id ACTION -> ^('@' actionScopeName? id ACTION)
+ ;
+
+/** Sometimes the scope names will collide with keywords; allow them as
+ * ids for action scopes.
+ */
+actionScopeName
+ : id
+ | l='lexer' -> ID[$l]
+ | p='parser' -> ID[$p]
+ ;
+
+optionsSpec
+ : OPTIONS (option ';')+ '}' -> ^(OPTIONS option+)
+ ;
+
+option
+ : id '=' optionValue -> ^('=' id optionValue)
+ ;
+
+optionValue
+ : id
+ | STRING_LITERAL
+ | CHAR_LITERAL
+ | INT
+ | s='*' -> STRING_LITERAL[$s] // used for k=*
+ ;
+
+rule
+scope {
+ String name;
+}
+ : DOC_COMMENT?
+ ( modifier=('protected'|'public'|'private'|'fragment') )?
+ id {$rule::name = $id.text;}
+ '!'?
+ ( arg=ARG_ACTION )?
+ ( 'returns' rt=ARG_ACTION )?
+ throwsSpec? optionsSpec? ruleScopeSpec? ruleAction*
+ ':' altList ';'
+ exceptionGroup?
+ -> ^( RULE id {modifier!=null?adaptor.create(modifier):null} ^(ARG $arg)? ^(RET $rt)?
+ optionsSpec? ruleScopeSpec? ruleAction*
+ altList
+ exceptionGroup?
+ EOR["EOR"]
+ )
+ ;
+
+/** Match stuff like @init {int i;} */
+ruleAction
+ : '@' id ACTION -> ^('@' id ACTION)
+ ;
+
+throwsSpec
+ : 'throws' id ( ',' id )* -> ^('throws' id+)
+ ;
+
+ruleScopeSpec
+ : 'scope' ACTION -> ^('scope' ACTION)
+ | 'scope' id (',' id)* ';' -> ^('scope' id+)
+ | 'scope' ACTION
+ 'scope' id (',' id)* ';'
+ -> ^('scope' ACTION id+ )
+ ;
+
+block
+ : lp='('
+ ( (opts=optionsSpec)? ':' )?
+ a1=alternative rewrite ( '|' a2=alternative rewrite )*
+ rp=')'
+ -> ^( BLOCK[$lp,"BLOCK"] optionsSpec? alternative+ EOB[$rp,"EOB"] )
+ ;
+
+altList
+@init {
+ // must create root manually as it's used by invoked rules in real antlr tool.
+ // leave here to demonstrate use of {...} in rewrite rule
+ // it's really BLOCK[firstToken,"BLOCK"]; set line/col to previous ( or : token.
+ CommonTree blkRoot = (CommonTree)adaptor.create(BLOCK,input.LT(-1),"BLOCK");
+}
+ : a1=alternative rewrite ( '|' a2=alternative rewrite )*
+ -> ^( {blkRoot} (alternative rewrite?)+ EOB["EOB"] )
+ ;
+
+alternative
+@init {
+ Token firstToken = input.LT(1);
+ Token prevToken = input.LT(-1); // either : or | I think
+}
+ : element+ -> ^(ALT[firstToken,"ALT"] element+ EOA["EOA"])
+ | -> ^(ALT[prevToken,"ALT"] EPSILON[prevToken,"EPSILON"] EOA["EOA"])
+ ;
+
+exceptionGroup
+ : ( exceptionHandler )+ ( finallyClause )?
+ | finallyClause
+ ;
+
+exceptionHandler
+ : 'catch' ARG_ACTION ACTION -> ^('catch' ARG_ACTION ACTION)
+ ;
+
+finallyClause
+ : 'finally' ACTION -> ^('finally' ACTION)
+ ;
+
+element
+ : elementNoOptionSpec
+ ;
+
+elementNoOptionSpec
+ : id (labelOp='='|labelOp='+=') atom
+ ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id atom) EOA["EOA"]) EOB["EOB"]))
+ | -> ^($labelOp id atom)
+ )
+ | id (labelOp='='|labelOp='+=') block
+ ( ebnfSuffix -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] ^($labelOp id block) EOA["EOA"]) EOB["EOB"]))
+ | -> ^($labelOp id block)
+ )
+ | atom
+ ( ebnfSuffix -> ^(BLOCK["BLOCK"] ^(ALT["ALT"] atom EOA["EOA"]) EOB["EOB"])
+ | -> atom
+ )
+ | ebnf
+ | ACTION
+ | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
+ | treeSpec
+ ;
+
+atom: range ( (op='^'|op='!') -> ^($op range) | -> range )
+ | terminal
+ | notSet ( (op='^'|op='!') -> ^($op notSet) | -> notSet )
+ | RULE_REF ( arg=ARG_ACTION )? ( (op='^'|op='!') )?
+ -> {$arg!=null&&op!=null}? ^($op RULE_REF $arg)
+ -> {$arg!=null}? ^(RULE_REF $arg)
+ -> {$op!=null}? ^($op RULE_REF)
+ -> RULE_REF
+ ;
+
+notSet
+ : '~'
+ ( notTerminal -> ^('~' notTerminal)
+ | block -> ^('~' block)
+ )
+ ;
+
+treeSpec
+ : '^(' element ( element )+ ')' -> ^(TREE_BEGIN element+)
+ ;
+
+/** Matches ENBF blocks (and token sets via block rule) */
+ebnf
+@init {
+ Token firstToken = input.LT(1);
+}
+@after {
+ $ebnf.tree.getToken().setLine(firstToken.getLine());
+ $ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
+}
+ : block {Token op=input.LT(1);}
+ ( '?' -> ^(OPTIONAL[op] block)
+ | '*' -> ^(CLOSURE[op] block)
+ | '+' -> ^(POSITIVE_CLOSURE[op] block)
+ | '^' -> ^('^' block)
+ | '!' -> ^('!' block)
+ | '=>' // syntactic predicate
+ -> {gtype==COMBINED_GRAMMAR &&
+ Character.isUpperCase($rule::name.charAt(0))}?
+ // if lexer rule in combined, leave as pred for lexer
+ ^(SYNPRED["=>"] block)
+ // in real antlr tool, text for SYN_SEMPRED is predname
+ -> SYN_SEMPRED
+ | -> block
+ )
+ ;
+
+range!
+ : c1=CHAR_LITERAL RANGE c2=CHAR_LITERAL -> ^(CHAR_RANGE[$c1,".."] $c1 $c2)
+ ;
+
+terminal
+ : ( CHAR_LITERAL -> CHAR_LITERAL
+ // Args are only valid for lexer rules
+ | TOKEN_REF
+ ( ARG_ACTION -> ^(TOKEN_REF ARG_ACTION)
+ | -> TOKEN_REF
+ )
+ | STRING_LITERAL -> STRING_LITERAL
+ | '.' -> '.'
+ )
+ ( '^' -> ^('^' $terminal)
+ | '!' -> ^('!' $terminal)
+ )?
+ ;
+
+notTerminal
+ : CHAR_LITERAL
+ | TOKEN_REF
+ | STRING_LITERAL
+ ;
+
+ebnfSuffix
+@init {
+ Token op = input.LT(1);
+}
+ : '?' -> OPTIONAL[op]
+ | '*' -> CLOSURE[op]
+ | '+' -> POSITIVE_CLOSURE[op]
+ ;
+
+
+
+// R E W R I T E S Y N T A X
+
+rewrite
+@init {
+ Token firstToken = input.LT(1);
+}
+ : (rew+='->' preds+=SEMPRED predicated+=rewrite_alternative)*
+ rew2='->' last=rewrite_alternative
+ -> ^($rew $preds $predicated)* ^($rew2 $last)
+ |
+ ;
+
+rewrite_alternative
+ : rewrite_template
+ | rewrite_tree_alternative
+ | /* empty rewrite */ -> ^(ALT["ALT"] EPSILON["EPSILON"] EOA["EOA"])
+ ;
+
+rewrite_template_block
+ : lp='(' rewrite_template ')' -> ^(BLOCK[$lp,"BLOCK"] rewrite_template EOB[$lp,"EOB"])
+ ;
+
+rewrite_tree_block
+ : lp='(' rewrite_tree_alternative ')'
+ -> ^(BLOCK[$lp,"BLOCK"] rewrite_tree_alternative EOB[$lp,"EOB"])
+ ;
+
+rewrite_tree_alternative
+ : rewrite_tree_element+ -> ^(ALT["ALT"] rewrite_tree_element+ EOA["EOA"])
+ ;
+
+rewrite_tree_element
+ : rewrite_tree_atom
+ | rewrite_tree_atom ebnfSuffix
+ -> ^( ebnfSuffix ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree_atom EOA["EOA"]) EOB["EOB"]))
+ | rewrite_tree
+ ( ebnfSuffix
+ -> ^(BLOCK["BLOCK"] ^(ALT["ALT"] rewrite_tree EOA["EOA"]) EOB["EOB"])
+ | -> rewrite_tree
+ )
+ | rewrite_tree_ebnf
+ ;
+
+rewrite_tree_atom
+ : CHAR_LITERAL
+ | TOKEN_REF ARG_ACTION? -> ^(TOKEN_REF ARG_ACTION?) // for imaginary nodes
+ | RULE_REF
+ | STRING_LITERAL
+ | d='$' id -> LABEL[$d,$id.text] // reference to a label in a rewrite rule
+ | ACTION
+ ;
+
+rewrite_tree_ebnf
+@init {
+ Token firstToken = input.LT(1);
+}
+@after {
+ $rewrite_tree_ebnf.tree.getToken().setLine(firstToken.getLine());
+ $rewrite_tree_ebnf.tree.getToken().setCharPositionInLine(firstToken.getCharPositionInLine());
+}
+ : rewrite_tree_block ebnfSuffix -> ^(ebnfSuffix rewrite_tree_block)
+ ;
+
+rewrite_tree
+ : '^(' rewrite_tree_atom rewrite_tree_element* ')'
+ -> ^(TREE_BEGIN rewrite_tree_atom rewrite_tree_element* )
+ ;
+
+/** Build a tree for a template rewrite:
+ ^(TEMPLATE (ID|ACTION) ^(ARGLIST ^(ARG ID ACTION) ...) )
+ where ARGLIST is always there even if no args exist.
+ ID can be "template" keyword. If first child is ACTION then it's
+ an indirect template ref
+
+ -> foo(a={...}, b={...})
+ -> ({string-e})(a={...}, b={...}) // e evaluates to template name
+ -> {%{$ID.text}} // create literal template from string (done in ActionTranslator)
+ -> {st-expr} // st-expr evaluates to ST
+ */
+rewrite_template
+ : // -> template(a={...},...) "..." inline template
+ {input.LT(1).getText().equals("template")}?
+ id lp='(' rewrite_template_args ')'
+ st=( DOUBLE_QUOTE_STRING_LITERAL | DOUBLE_ANGLE_STRING_LITERAL )
+ -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args $st)
+
+ | // -> foo(a={...}, ...)
+ rewrite_template_ref
+
+ | // -> ({expr})(a={...}, ...)
+ rewrite_indirect_template_head
+
+ | // -> {...}
+ ACTION
+ ;
+
+/** -> foo(a={...}, ...) */
+rewrite_template_ref
+ : id lp='(' rewrite_template_args ')'
+ -> ^(TEMPLATE[$lp,"TEMPLATE"] id rewrite_template_args)
+ ;
+
+/** -> ({expr})(a={...}, ...) */
+rewrite_indirect_template_head
+ : lp='(' ACTION ')' '(' rewrite_template_args ')'
+ -> ^(TEMPLATE[$lp,"TEMPLATE"] ACTION rewrite_template_args)
+ ;
+
+rewrite_template_args
+ : rewrite_template_arg (',' rewrite_template_arg)*
+ -> ^(ARGLIST rewrite_template_arg+)
+ | -> ARGLIST
+ ;
+
+rewrite_template_arg
+ : id '=' ACTION -> ^(ARG[$id.start] id ACTION)
+ ;
+
+id : TOKEN_REF -> ID[$TOKEN_REF]
+ | RULE_REF -> ID[$RULE_REF]
+ ;
+
+// L E X I C A L R U L E S
+
+SL_COMMENT
+ : '//'
+ ( ' $ANTLR ' SRC // src directive
+ | ~('\r'|'\n')*
+ )
+ '\r'? '\n'
+ {$channel=HIDDEN;}
+ ;
+
+ML_COMMENT
+ : '/*' {if (input.LA(1)=='*') $type=DOC_COMMENT; else $channel=HIDDEN;} .* '*/'
+ ;
+
+CHAR_LITERAL
+ : '\'' LITERAL_CHAR '\''
+ ;
+
+STRING_LITERAL
+ : '\'' LITERAL_CHAR LITERAL_CHAR* '\''
+ ;
+
+fragment
+LITERAL_CHAR
+ : ESC
+ | ~('\''|'\\')
+ ;
+
+DOUBLE_QUOTE_STRING_LITERAL
+ : '"' LITERAL_CHAR* '"'
+ ;
+
+DOUBLE_ANGLE_STRING_LITERAL
+ : '<<' .* '>>'
+ ;
+
+fragment
+ESC : '\\'
+ ( 'n'
+ | 'r'
+ | 't'
+ | 'b'
+ | 'f'
+ | '"'
+ | '\''
+ | '\\'
+ | '>'
+ | 'u' XDIGIT XDIGIT XDIGIT XDIGIT
+ | . // unknown, leave as it is
+ )
+ ;
+
+fragment
+XDIGIT :
+ '0' .. '9'
+ | 'a' .. 'f'
+ | 'A' .. 'F'
+ ;
+
+INT : '0'..'9'+
+ ;
+
+ARG_ACTION
+ : NESTED_ARG_ACTION
+ ;
+
+fragment
+NESTED_ARG_ACTION :
+ '['
+ ( options {greedy=false; k=1;}
+ : NESTED_ARG_ACTION
+ | ACTION_STRING_LITERAL
+ | ACTION_CHAR_LITERAL
+ | .
+ )*
+ ']'
+ {setText(getText().substring(1, getText().length()-1));}
+ ;
+
+ACTION
+ : NESTED_ACTION ( '?' {$type = SEMPRED;} )?
+ ;
+
+fragment
+NESTED_ACTION :
+ '{'
+ ( options {greedy=false; k=3;}
+ : NESTED_ACTION
+ | SL_COMMENT
+ | ML_COMMENT
+ | ACTION_STRING_LITERAL
+ | ACTION_CHAR_LITERAL
+ | .
+ )*
+ '}'
+ {$channel = DEFAULT_TOKEN_CHANNEL;}
+ ;
+
+fragment
+ACTION_CHAR_LITERAL
+ : '\'' (ACTION_ESC|~('\\'|'\'')) '\''
+ ;
+
+fragment
+ACTION_STRING_LITERAL
+ : '"' (ACTION_ESC|~('\\'|'"'))+ '"'
+ ;
+
+fragment
+ACTION_ESC
+ : '\\\''
+ | '\\"'
+ | '\\' ~('\''|'"')
+ ;
+
+TOKEN_REF
+ : 'A'..'Z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
+ ;
+
+RULE_REF
+ : 'a'..'z' ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
+ ;
+
+/** Match the start of an options section. Don't allow normal
+ * action processing on the {...} as it's not a action.
+ */
+OPTIONS
+ : 'options' WS_LOOP '{' {$channel=DEFAULT_TOKEN_CHANNEL;} // WS_LOOP sets channel
+ ;
+
+TOKENS
+ : 'tokens' WS_LOOP '{' {$channel=DEFAULT_TOKEN_CHANNEL;}
+ ;
+
+/** Reset the file and line information; useful when the grammar
+ * has been generated so that errors are shown relative to the
+ * original file like the old C preprocessor used to do.
+ */
+fragment
+SRC : 'src' ' ' file=ACTION_STRING_LITERAL ' ' line=INT {$channel=HIDDEN;}
+ ;
+
+WS : ( ' '
+ | '\t'
+ | '\r'? '\n'
+ )+
+ {$channel=HIDDEN;}
+ ;
+
+fragment
+WS_LOOP
+ : ( WS
+ | SL_COMMENT
+ | ML_COMMENT
+ )*
+ {$channel=HIDDEN;}
+ ;
+
diff --git a/tests/examplefiles/ragel-cpp_rlscan b/tests/examplefiles/ragel-cpp_rlscan
new file mode 100644
index 00000000..4b146329
--- /dev/null
+++ b/tests/examplefiles/ragel-cpp_rlscan
@@ -0,0 +1,280 @@
+/*
+ * Lexes Ragel input files.
+ *
+ * @LANG: c++
+ *
+ * Test works with split code gen.
+ */
+
+#include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+using namespace std;
+
+void escapeXML( const char *data )
+{
+ while ( *data != 0 ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ data += 1;
+ }
+}
+
+void escapeXML( char c )
+{
+ switch ( c ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << c; break;
+ }
+}
+
+void escapeXML( const char *data, int len )
+{
+ for ( const char *end = data + len; data != end; data++ ) {
+ switch ( *data ) {
+ case '<': cout << "&lt;"; break;
+ case '>': cout << "&gt;"; break;
+ case '&': cout << "&amp;"; break;
+ default: cout << *data; break;
+ }
+ }
+}
+
+inline void write( const char *data )
+{
+ cout << data;
+}
+
+inline void write( char c )
+{
+ cout << c;
+}
+
+inline void write( const char *data, int len )
+{
+ cout.write( data, len );
+}
+
+
+%%{
+ machine RagelScan;
+
+ word = [a-zA-Z_][a-zA-Z_0-9]*;
+ integer = [0-9]+;
+ hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
+
+ default = ^0;
+ EOF = 0;
+
+ # Handles comments in outside code and inline blocks.
+ c_comment :=
+ ( default* :>> '*/' )
+ ${ escapeXML( fc ); }
+ @{ fret; };
+
+ action emit {
+ escapeXML( ts, te-ts );
+ }
+
+ #
+ # Inline action code
+ #
+
+ ilscan := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+ '/*' {
+ write( "/*" );
+ fcall c_comment;
+ };
+ '//' [^\n]* '\n' => emit;
+
+ '{' {
+ write( '{' );
+ inline_depth += 1;
+ };
+
+ '}' {
+ write( '}' );
+ /* If dropping down to the last } then return
+ * to ragel code. */
+ if ( --inline_depth == 0 ) {
+ write( "</inline>\n" );
+ fgoto rlscan;
+ }
+ };
+
+ default => { escapeXML( *ts ); };
+ *|;
+
+ #
+ # Ragel Tokens
+ #
+
+ rlscan := |*
+ '}%%' {
+ if ( !single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ '\n' {
+ if ( single_line ) {
+ write( "</section>\n" );
+ fgoto main;
+ }
+ };
+
+ # Word
+ word {
+ write( "<word>" );
+ write( ts, te-ts );
+ write( "</word>\n" );
+ };
+
+ # Decimal integer.
+ integer {
+ write( "<int>" );
+ write( ts, te-ts );
+ write( "</int>\n" );
+ };
+
+ # Hexidecimal integer.
+ hex {
+ write( "<hex>" );
+ write( ts, te-ts );
+ write( "</hex>\n" );
+ };
+
+ # Consume comments.
+ '#' [^\n]* '\n';
+
+ # Single literal string.
+ "'" ( [^'\\] | /\\./ )* "'" {
+ write( "<single_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</single_lit>\n" );
+ };
+
+ # Double literal string.
+ '"' ( [^"\\] | /\\./ )* '"' {
+ write( "<double_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</double_lit>\n" );
+ };
+
+ # Or literal.
+ '[' ( [^\]\\] | /\\./ )* ']' {
+ write( "<or_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</or_lit>\n" );
+ };
+
+ # Regex Literal.
+ '/' ( [^/\\] | /\\./ ) * '/' {
+ write( "<re_lit>" );
+ escapeXML( ts, te-ts );
+ write( "</re_lit>\n" );
+ };
+
+ # Open an inline block
+ '{' {
+ inline_depth = 1;
+ write( "<inline>{" );
+ fgoto ilscan;
+ };
+
+ punct {
+ write( "<symbol>" );
+ escapeXML( fc );
+ write( "</symbol>\n" );
+ };
+
+ default;
+ *|;
+
+ #
+ # Outside code.
+ #
+
+ main := |*
+
+ "'" ( [^'\\] | /\\./ )* "'" => emit;
+ '"' ( [^"\\] | /\\./ )* '"' => emit;
+
+ '/*' {
+ escapeXML( ts, te-ts );
+ fcall c_comment;
+ };
+
+ '//' [^\n]* '\n' => emit;
+
+ '%%{' {
+ write( "<section>\n" );
+ single_line = false;
+ fgoto rlscan;
+ };
+
+ '%%' {
+ write( "<section>\n" );
+ single_line = true;
+ fgoto rlscan;
+ };
+
+ default {
+ escapeXML( *ts );
+ };
+
+ # EOF.
+ EOF;
+ *|;
+}%%
+
+%% write data nofinal;
+
+void test( const char *data )
+{
+ std::ios::sync_with_stdio(false);
+
+ int cs, act;
+ const char *ts, *te;
+ int stack[1], top;
+
+ bool single_line = false;
+ int inline_depth = 0;
+
+ %% write init;
+
+ /* Read in a block. */
+ const char *p = data;
+ const char *pe = data + strlen( data );
+ const char *eof = pe;
+ %% write exec;
+
+ if ( cs == RagelScan_error ) {
+ /* Machine failed before finding a token. */
+ cerr << "PARSE ERROR" << endl;
+ exit(1);
+ }
+}
+
+#define BUFSIZE 2048
+
+int main()
+{
+ std::ios::sync_with_stdio(false);
+
+ test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n");
+
+ return 0;
+}
diff --git a/tests/examplefiles/ragel-cpp_snippet b/tests/examplefiles/ragel-cpp_snippet
new file mode 100644
index 00000000..203ae28b
--- /dev/null
+++ b/tests/examplefiles/ragel-cpp_snippet
@@ -0,0 +1,2 @@
+ %% write init;
+ /* Read in a block. */