1 files changed, 117 insertions, 8 deletions
diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py
index 6032017f..a1426bd6 100644
--- a/pygments/lexers/dsls.py
+++ b/pygments/lexers/dsls.py
@@ -5,20 +5,20 @@
 
     Lexers for various domain-specific languages.
 
-    :copyright: Copyright 2006-2016 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
 import re
 
-from pygments.lexer import RegexLexer, bygroups, words, include, default, \
-    this, using, combined
+from pygments.lexer import ExtendedRegexLexer, RegexLexer, bygroups, words, \
+    include, default, this, using, combined
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
     Number, Punctuation, Literal, Whitespace
 
 __all__ = ['ProtoBufLexer', 'BroLexer', 'PuppetLexer', 'RslLexer',
            'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer',
-           'CrmshLexer', 'ThriftLexer', 'FlatlineLexer']
+           'CrmshLexer', 'ThriftLexer', 'FlatlineLexer', 'SnowballLexer']
 
 
 class ProtoBufLexer(RegexLexer):
@@ -36,7 +36,7 @@ class ProtoBufLexer(RegexLexer):
     tokens = {
         'root': [
             (r'[ \t]+', Text),
-            (r'[,;{}\[\]()]', Punctuation),
+            (r'[,;{}\[\]()<>]', Punctuation),
             (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
             (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
             (words((
@@ -156,7 +156,7 @@ class ThriftLexer(RegexLexer):
              Keyword.Namespace),
             (words((
                 'void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double',
-                'string', 'binary', 'void', 'map', 'list', 'set', 'slist',
+                'string', 'binary', 'map', 'list', 'set', 'slist',
                 'senum'), suffix=r'\b'),
              Keyword.Type),
             (words((
@@ -581,7 +581,7 @@ class PanLexer(RegexLexer):
                 'if', 'for', 'with', 'else', 'type', 'bind', 'while', 'valid', 'final',
                 'prefix', 'unique', 'object', 'foreach', 'include', 'template',
                 'function', 'variable', 'structure', 'extensible', 'declaration'),
-                   prefix=r'\b', suffix=r'\s*\b'),
+                prefix=r'\b', suffix=r'\s*\b'),
              Keyword),
             (words((
                 'file_contents', 'format', 'index', 'length', 'match', 'matches',
@@ -593,7 +593,7 @@ class PanLexer(RegexLexer):
                 'is_number', 'is_property', 'is_resource', 'is_string', 'to_boolean',
                 'to_double', 'to_long', 'to_string', 'clone', 'delete', 'exists',
                 'path_exists', 'if_exists', 'return', 'value'),
-                   prefix=r'\b', suffix=r'\s*\b'),
+                prefix=r'\b', suffix=r'\s*\b'),
              Name.Builtin),
             (r'#.*', Comment),
             (r'\\[\w\W]', String.Escape),
@@ -767,3 +767,112 @@ class FlatlineLexer(RegexLexer):
             (r'(\(|\))', Punctuation),
         ],
     }
+
+
+class SnowballLexer(ExtendedRegexLexer):
+    """
+    Lexer for `Snowball <http://snowballstem.org/>`_ source code.
+
+    .. versionadded:: 2.2
+    """
+
+    name = 'Snowball'
+    aliases = ['snowball']
+    filenames = ['*.sbl']
+
+    _ws = r'\n\r\t '
+
+    def __init__(self, **options):
+        self._reset_stringescapes()
+        ExtendedRegexLexer.__init__(self, **options)
+
+    def _reset_stringescapes(self):
+        self._start = "'"
+        self._end = "'"
+
+    def _string(do_string_first):
+        def callback(lexer, match, ctx):
+            s = match.start()
+            text = match.group()
+            string = re.compile(r'([^%s]*)(.)' % re.escape(lexer._start)).match
+            escape = re.compile(r'([^%s]*)(.)' % re.escape(lexer._end)).match
+            pos = 0
+            do_string = do_string_first
+            while pos < len(text):
+                if do_string:
+                    match = string(text, pos)
+                    yield s + match.start(1), String.Single, match.group(1)
+                    if match.group(2) == "'":
+                        yield s + match.start(2), String.Single, match.group(2)
+                        ctx.stack.pop()
+                        break
+                    yield s + match.start(2), String.Escape, match.group(2)
+                    pos = match.end()
+                match = escape(text, pos)
+                yield s + match.start(), String.Escape, match.group()
+                if match.group(2) != lexer._end:
+                    ctx.stack[-1] = 'escape'
+                    break
+                pos = match.end()
+                do_string = True
+            ctx.pos = s + match.end()
+        return callback
+
+    def _stringescapes(lexer, match, ctx):
+        lexer._start = match.group(3)
+        lexer._end = match.group(5)
+        return bygroups(Keyword.Reserved, Text, String.Escape, Text,
+                        String.Escape)(lexer, match, ctx)
+
+    tokens = {
+        'root': [
+            (words(('len', 'lenof'), suffix=r'\b'), Operator.Word),
+            include('root1'),
+        ],
+        'root1': [
+            (r'[%s]+' % _ws, Text),
+            (r'\d+', Number.Integer),
+            (r"'", String.Single, 'string'),
+            (r'[()]', Punctuation),
+            (r'/\*[\w\W]*?\*/', Comment.Multiline),
+            (r'//.*', Comment.Single),
+            (r'[!*+\-/<=>]=|[-=]>|<[+-]|[$*+\-/<=>?\[\]]', Operator),
+            (words(('as', 'get', 'hex', 'among', 'define', 'decimal',
+                    'backwardmode'), suffix=r'\b'),
+             Keyword.Reserved),
+            (words(('strings', 'booleans', 'integers', 'routines', 'externals',
+                    'groupings'), suffix=r'\b'),
+             Keyword.Reserved, 'declaration'),
+            (words(('do', 'or', 'and', 'for', 'hop', 'non', 'not', 'set', 'try',
+                    'fail', 'goto', 'loop', 'next', 'test', 'true',
+                    'false', 'unset', 'atmark', 'attach', 'delete', 'gopast',
+                    'insert', 'repeat', 'sizeof', 'tomark', 'atleast',
+                    'atlimit', 'reverse', 'setmark', 'tolimit', 'setlimit',
+                    'backwards', 'substring'), suffix=r'\b'),
+             Operator.Word),
+            (words(('size', 'limit', 'cursor', 'maxint', 'minint'),
+                   suffix=r'\b'),
+             Name.Builtin),
+            (r'(stringdef\b)([%s]*)([^%s]+)' % (_ws, _ws),
+             bygroups(Keyword.Reserved, Text, String.Escape)),
+            (r'(stringescapes\b)([%s]*)(.)([%s]*)(.)' % (_ws, _ws),
+             _stringescapes),
+            (r'[A-Za-z]\w*', Name),
+        ],
+        'declaration': [
+            (r'\)', Punctuation, '#pop'),
+            (words(('len', 'lenof'), suffix=r'\b'), Name,
+             ('root1', 'declaration')),
+            include('root1'),
+        ],
+        'string': [
+            (r"[^']*'", _string(True)),
+        ],
+        'escape': [
+            (r"[^']*'", _string(False)),
+        ],
+    }
+
+    def get_tokens_unprocessed(self, text=None, context=None):
+        self._reset_stringescapes()
+        return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context)