summaryrefslogtreecommitdiff
path: root/pygments/lexers/dsls.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexers/dsls.py')
-rw-r--r--pygments/lexers/dsls.py125
1 files changed, 117 insertions, 8 deletions
diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py
index 6032017f..a1426bd6 100644
--- a/pygments/lexers/dsls.py
+++ b/pygments/lexers/dsls.py
@@ -5,20 +5,20 @@
Lexers for various domain-specific languages.
- :copyright: Copyright 2006-2016 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
-from pygments.lexer import RegexLexer, bygroups, words, include, default, \
- this, using, combined
+from pygments.lexer import ExtendedRegexLexer, RegexLexer, bygroups, words, \
+ include, default, this, using, combined
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Literal, Whitespace
__all__ = ['ProtoBufLexer', 'BroLexer', 'PuppetLexer', 'RslLexer',
'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer',
- 'CrmshLexer', 'ThriftLexer', 'FlatlineLexer']
+ 'CrmshLexer', 'ThriftLexer', 'FlatlineLexer', 'SnowballLexer']
class ProtoBufLexer(RegexLexer):
@@ -36,7 +36,7 @@ class ProtoBufLexer(RegexLexer):
tokens = {
'root': [
(r'[ \t]+', Text),
- (r'[,;{}\[\]()]', Punctuation),
+ (r'[,;{}\[\]()<>]', Punctuation),
(r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
(r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
(words((
@@ -156,7 +156,7 @@ class ThriftLexer(RegexLexer):
Keyword.Namespace),
(words((
'void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double',
- 'string', 'binary', 'void', 'map', 'list', 'set', 'slist',
+ 'string', 'binary', 'map', 'list', 'set', 'slist',
'senum'), suffix=r'\b'),
Keyword.Type),
(words((
@@ -581,7 +581,7 @@ class PanLexer(RegexLexer):
'if', 'for', 'with', 'else', 'type', 'bind', 'while', 'valid', 'final',
'prefix', 'unique', 'object', 'foreach', 'include', 'template',
'function', 'variable', 'structure', 'extensible', 'declaration'),
- prefix=r'\b', suffix=r'\s*\b'),
+ prefix=r'\b', suffix=r'\s*\b'),
Keyword),
(words((
'file_contents', 'format', 'index', 'length', 'match', 'matches',
@@ -593,7 +593,7 @@ class PanLexer(RegexLexer):
'is_number', 'is_property', 'is_resource', 'is_string', 'to_boolean',
'to_double', 'to_long', 'to_string', 'clone', 'delete', 'exists',
'path_exists', 'if_exists', 'return', 'value'),
- prefix=r'\b', suffix=r'\s*\b'),
+ prefix=r'\b', suffix=r'\s*\b'),
Name.Builtin),
(r'#.*', Comment),
(r'\\[\w\W]', String.Escape),
@@ -767,3 +767,112 @@ class FlatlineLexer(RegexLexer):
(r'(\(|\))', Punctuation),
],
}
+
+
+class SnowballLexer(ExtendedRegexLexer):
+ """
+ Lexer for `Snowball <http://snowballstem.org/>`_ source code.
+
+ .. versionadded:: 2.2
+ """
+
+ name = 'Snowball'
+ aliases = ['snowball']
+ filenames = ['*.sbl']
+
+ _ws = r'\n\r\t '
+
+ def __init__(self, **options):
+ self._reset_stringescapes()
+ ExtendedRegexLexer.__init__(self, **options)
+
+ def _reset_stringescapes(self):
+ self._start = "'"
+ self._end = "'"
+
+ def _string(do_string_first):
+ def callback(lexer, match, ctx):
+ s = match.start()
+ text = match.group()
+ string = re.compile(r'([^%s]*)(.)' % re.escape(lexer._start)).match
+ escape = re.compile(r'([^%s]*)(.)' % re.escape(lexer._end)).match
+ pos = 0
+ do_string = do_string_first
+ while pos < len(text):
+ if do_string:
+ match = string(text, pos)
+ yield s + match.start(1), String.Single, match.group(1)
+ if match.group(2) == "'":
+ yield s + match.start(2), String.Single, match.group(2)
+ ctx.stack.pop()
+ break
+ yield s + match.start(2), String.Escape, match.group(2)
+ pos = match.end()
+ match = escape(text, pos)
+ yield s + match.start(), String.Escape, match.group()
+ if match.group(2) != lexer._end:
+ ctx.stack[-1] = 'escape'
+ break
+ pos = match.end()
+ do_string = True
+ ctx.pos = s + match.end()
+ return callback
+
+ def _stringescapes(lexer, match, ctx):
+ lexer._start = match.group(3)
+ lexer._end = match.group(5)
+ return bygroups(Keyword.Reserved, Text, String.Escape, Text,
+ String.Escape)(lexer, match, ctx)
+
+ tokens = {
+ 'root': [
+ (words(('len', 'lenof'), suffix=r'\b'), Operator.Word),
+ include('root1'),
+ ],
+ 'root1': [
+ (r'[%s]+' % _ws, Text),
+ (r'\d+', Number.Integer),
+ (r"'", String.Single, 'string'),
+ (r'[()]', Punctuation),
+ (r'/\*[\w\W]*?\*/', Comment.Multiline),
+ (r'//.*', Comment.Single),
+ (r'[!*+\-/<=>]=|[-=]>|<[+-]|[$*+\-/<=>?\[\]]', Operator),
+ (words(('as', 'get', 'hex', 'among', 'define', 'decimal',
+ 'backwardmode'), suffix=r'\b'),
+ Keyword.Reserved),
+ (words(('strings', 'booleans', 'integers', 'routines', 'externals',
+ 'groupings'), suffix=r'\b'),
+ Keyword.Reserved, 'declaration'),
+ (words(('do', 'or', 'and', 'for', 'hop', 'non', 'not', 'set', 'try',
+ 'fail', 'goto', 'loop', 'next', 'test', 'true',
+ 'false', 'unset', 'atmark', 'attach', 'delete', 'gopast',
+ 'insert', 'repeat', 'sizeof', 'tomark', 'atleast',
+ 'atlimit', 'reverse', 'setmark', 'tolimit', 'setlimit',
+ 'backwards', 'substring'), suffix=r'\b'),
+ Operator.Word),
+ (words(('size', 'limit', 'cursor', 'maxint', 'minint'),
+ suffix=r'\b'),
+ Name.Builtin),
+ (r'(stringdef\b)([%s]*)([^%s]+)' % (_ws, _ws),
+ bygroups(Keyword.Reserved, Text, String.Escape)),
+ (r'(stringescapes\b)([%s]*)(.)([%s]*)(.)' % (_ws, _ws),
+ _stringescapes),
+ (r'[A-Za-z]\w*', Name),
+ ],
+ 'declaration': [
+ (r'\)', Punctuation, '#pop'),
+ (words(('len', 'lenof'), suffix=r'\b'), Name,
+ ('root1', 'declaration')),
+ include('root1'),
+ ],
+ 'string': [
+ (r"[^']*'", _string(True)),
+ ],
+ 'escape': [
+ (r"[^']*'", _string(False)),
+ ],
+ }
+
+ def get_tokens_unprocessed(self, text=None, context=None):
+ self._reset_stringescapes()
+ return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context)