Merged in yloiseau/pygments-main (pull request #309)

author: Tim Hatch <tim@timhatch.com> 2014-04-14 13:47:40 -0400
committer: Tim Hatch <tim@timhatch.com> 2014-04-14 13:47:40 -0400
commit: 06a720cca67ff19f873f8066c17cf4ea90ab0f0f (patch)
tree: 2901fe8e218cce5a8e788645d41aec654f297e23
parent: 02683b5def213065f6b893f91fc54f313141fbdf (diff)
parent: 5d57fe78405ac06a306f5ed2dd1b630a909cbdfb (diff)
download: pygments-06a720cca67ff19f873f8066c17cf4ea90ab0f0f.tar.gz
18 files changed, 659 insertions, 21 deletions
diff --git a/AUTHORS b/AUTHORS
index c4da54f6..83c0eaca 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -65,6 +65,7 @@ Other contributors, listed alphabetically, are:
 * Rob Hoelz -- Perl 6 lexer
 * Doug Hogan -- Mscgen lexer
 * Ben Hollis -- Mason lexer
+* Max Horn -- GAP lexer
 * Dustin Howett -- Logos lexer
 * Alastair Houghton -- Lexer inheritance facility
 * Tim Howard -- BlitzMax lexer
@@ -98,6 +99,7 @@ Other contributors, listed alphabetically, are:
 * Brian McKenna -- F# lexer
 * Charles McLaughlin -- Puppet lexer
 * Lukas Meuser -- BBCode formatter, Lua lexer
+* Cat Miller -- Pig lexer
 * Paul Miller -- LiveScript lexer
 * Hong Minhee -- HTTP lexer
 * Michael Mior -- Awk lexer
diff --git a/doc/languages.rst b/doc/languages.rst
index 0fc9d9d3..426a576b 100644
--- a/doc/languages.rst
+++ b/doc/languages.rst
@@ -35,6 +35,7 @@ Programming languages
 * Fancy
 * Fortran
 * F#
+* GAP
 * Gherkin (Cucumber)
 * GL shaders
 * Groovy
@@ -81,6 +82,7 @@ Programming languages
 * Visual Basic.NET
 * Visual FoxPro
 * XQuery
+* Zephir
   </ul>
 
 Template languages
diff --git a/pygments/cmdline.py b/pygments/cmdline.py
index af8d48ea..7c23ebee 100644
--- a/pygments/cmdline.py
+++ b/pygments/cmdline.py
@@ -19,6 +19,7 @@ from pygments import __version__, highlight
 from pygments.util import ClassNotFound, OptionError, docstring_headline
 from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \
      find_lexer_class, guess_lexer, TextLexer
+from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
 from pygments.formatters import get_all_formatters, get_formatter_by_name, \
      get_formatter_for_filename, find_formatter_class, \
      TerminalFormatter  # pylint:disable-msg=E0611
@@ -405,6 +406,15 @@ def main(args=sys.argv):
         else:
             code = sys.stdin.read()
 
+    # When using the LaTeX formatter and the option `escapeinside` is
+    # specified, we need a special lexer which collects escaped text
+    # before running the chosen language lexer.
+    escapeinside = parsed_opts.get('escapeinside', '')
+    if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
+        left = escapeinside[0]
+        right = escapeinside[1]
+        lexer = LatexEmbeddedLexer(left, right, lexer)
+
     # No encoding given? Use latin1 if output file given,
     # stdin/stdout encoding otherwise.
     # (This is a compromise, I'm not too happy with it...)
diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py
index 0f2397eb..fee177c5 100644
--- a/pygments/formatters/latex.py
+++ b/pygments/formatters/latex.py
@@ -12,6 +12,7 @@
 from __future__ import division
 
 from pygments.formatter import Formatter
+from pygments.lexer import Lexer
 from pygments.token import Token, STANDARD_TYPES
 from pygments.util import get_bool_opt, get_int_opt, StringIO, xrange, \
     iteritems
@@ -226,6 +227,15 @@ class LatexFormatter(Formatter):
         ``False``).
 
         .. versionadded:: 1.2
+
+    `escapeinside`
+        If set to a string of length 2, enables escaping to LaTeX. Text
+        delimited by these 2 characters is read as LaTeX code and
+        typeset accordingly. It has no effect in string literals. It has
+        no effect in comments if `texcomments` or `mathescape` is
+        set. (default: ``''``).
+
+        .. versionadded:: 2.0
     """
     name = 'LaTeX'
     aliases = ['latex', 'tex']
@@ -243,6 +253,13 @@ class LatexFormatter(Formatter):
         self.commandprefix = options.get('commandprefix', 'PY')
         self.texcomments = get_bool_opt(options, 'texcomments', False)
         self.mathescape = get_bool_opt(options, 'mathescape', False)
+        self.escapeinside = options.get('escapeinside', '')
+
+        if len(self.escapeinside) == 2:
+            self.left = self.escapeinside[0]
+            self.right = self.escapeinside[1]
+        else:
+            self.escapeinside = ''
 
         self._create_stylesheet()
 
@@ -314,14 +331,14 @@ class LatexFormatter(Formatter):
             realoutfile = outfile
             outfile = StringIO()
 
-        outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}')
+        outfile.write(u'\\begin{Verbatim}[commandchars=\\\\\\{\\}')
         if self.linenos:
             start, step = self.linenostart, self.linenostep
             outfile.write(u',numbers=left' +
                           (start and u',firstnumber=%d' % start or u'') +
                           (step and u',stepnumber=%d' % step or u''))
-        if self.mathescape or self.texcomments:
-            outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}')
+        if self.mathescape or self.texcomments or self.escapeinside:
+            outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
         if self.verboptions:
             outfile.write(u',' + self.verboptions)
         outfile.write(u']\n')
@@ -350,9 +367,22 @@ class LatexFormatter(Formatter):
                             parts[i] = escape_tex(part, self.commandprefix)
                         in_math = not in_math
                     value = '$'.join(parts)
+                elif self.escapeinside:
+                    text = value
+                    value = ''
+                    while len(text) > 0:
+                        a,sep1,text = text.partition(self.left)
+                        if len(sep1) > 0:
+                            b,sep2,text = text.partition(self.right)
+                            if len(sep2) > 0:
+                                value = value + escape_tex(a, self.commandprefix) + b
+                            else:
+                                value = value + escape_tex(a + sep1 + b, self.commandprefix)
+                        else:
+                            value = value + escape_tex(a, self.commandprefix)
                 else:
                     value = escape_tex(value, self.commandprefix)
-            else:
+            elif not (ttype in Token.Escape):
                 value = escape_tex(value, self.commandprefix)
             styles = []
             while ttype is not Token:
@@ -384,3 +414,57 @@ class LatexFormatter(Formatter):
                      encoding  = self.encoding or 'latin1',
                      styledefs = self.get_style_defs(),
                      code      = outfile.getvalue()))
+
+
+class LatexEmbeddedLexer(Lexer):
+    r"""
+
+    This lexer takes one lexer as argument, the lexer for the language
+    being formatted, and the left and right delimiters for escaped text.
+
+    First everything is scanned using the language lexer to obtain
+    strings and comments. All other consecutive tokens are merged and
+    the resulting text is scanned for escaped segments, which are given
+    the Token.Escape type. Finally text that is not escaped is scanned
+    again with the language lexer.
+    """
+    def __init__(self, left, right, lang, **options):
+        self.left = left
+        self.right = right
+        self.lang = lang
+        Lexer.__init__(self, **options)
+
+    def get_tokens_unprocessed(self, text):
+        buf = ''
+        for i, t, v in self.lang.get_tokens_unprocessed(text):
+            if t in Token.Comment or t in Token.String:
+                if buf:
+                    for x in self.get_tokens_aux(idx, buf):
+                        yield x
+                    buf = ''
+                yield i, t, v
+            else:
+                if not buf:
+                    idx = i;
+                buf += v
+        if buf:
+            for x in self.get_tokens_aux(idx, buf):
+                yield x
+
+    def get_tokens_aux(self, index, text):
+        while text:
+            a, sep1, text = text.partition(self.left)
+            if a:
+                for i, t, v in self.lang.get_tokens_unprocessed(a):
+                    yield index + i, t, v
+                    index += len(a)
+            if sep1:
+                b, sep2, text = text.partition(self.right)
+                if sep2:
+                    yield index + len(sep1), Token.Escape, b
+                    index += len(sep1) + len(b) + len(sep2)
+                else:
+                    yield index, Token.Error, sep1
+                    index += len(sep1)
+                    text = b
+
diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py
index 59d97742..9d87e8f1 100644
--- a/pygments/formatters/rtf.py
+++ b/pygments/formatters/rtf.py
@@ -10,6 +10,7 @@
 """
 
 from pygments.formatter import Formatter
+from pygments.util import get_int_opt
 
 
 __all__ = ['RtfFormatter']
@@ -32,6 +33,12 @@ class RtfFormatter(Formatter):
     `fontface`
         The used font famliy, for example ``Bitstream Vera Sans``. Defaults to
         some generic font which is supposed to have fixed width.
+
+    `fontsize`
+        Size of the font used. Size is specified in half points. The
+        default is 24 half-points, giving a size 12 font.
+
+        .. versionadded:: 2.0
     """
     name = 'RTF'
     aliases = ['rtf']
@@ -49,9 +56,11 @@ class RtfFormatter(Formatter):
             specification claims that ``\fmodern`` are "Fixed-pitch serif
             and sans serif fonts". Hope every RTF implementation thinks
             the same about modern...
+
         """
         Formatter.__init__(self, **options)
         self.fontface = options.get('fontface') or ''
+        self.fontsize = get_int_opt(options, 'fontsize', 0)
 
     def _escape(self, text):
         return text.replace('\\', '\\\\') \
@@ -106,6 +115,8 @@ class RtfFormatter(Formatter):
                     ))
                     offset += 1
         outfile.write(r'}\f0')
+        if self.fontsize:
+            outfile.write(r'\fs%d' % (self.fontsize))
 
         # highlight stream
         for ttype, value in tokensource:
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index b90e0d8f..c937d908 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -67,6 +67,7 @@ LEXERS = {
     'CirruLexer': ('pygments.lexers.web', 'Cirru', ('cirru',), ('*.cirru', '*.cr'), ('text/x-cirru',)),
     'ClayLexer': ('pygments.lexers.compiled', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)),
     'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')),
+    'ClojureScriptLexer': ('pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')),
     'CobolFreeformatLexer': ('pygments.lexers.compiled', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()),
     'CobolLexer': ('pygments.lexers.compiled', 'COBOL', ('cobol',), ('*.cob', '*.COB', '*.cpy', '*.CPY'), ('text/x-cobol',)),
     'CoffeeScriptLexer': ('pygments.lexers.web', 'CoffeeScript', ('coffee-script', 'coffeescript', 'coffee'), ('*.coffee',), ('text/coffeescript',)),
@@ -119,6 +120,7 @@ LEXERS = {
     'FelixLexer': ('pygments.lexers.compiled', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)),
     'FortranLexer': ('pygments.lexers.compiled', 'Fortran', ('fortran',), ('*.f', '*.f90', '*.F', '*.F90'), ('text/x-fortran',)),
     'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()),
+    'GAPLexer': ('pygments.lexers.math', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()),
     'GLShaderLexer': ('pygments.lexers.compiled', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)),
     'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)),
     'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')),
@@ -239,6 +241,7 @@ LEXERS = {
     'Perl6Lexer': ('pygments.lexers.agile', 'Perl6', ('perl6', 'pl6'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t'), ('text/x-perl6', 'application/x-perl6')),
     'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm', '*.t'), ('text/x-perl', 'application/x-perl')),
     'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]', '*.inc'), ('text/x-php',)),
+    'PigLexer': ('pygments.lexers.jvm', 'Pig', ('pig',), ('*.pig',), ('text/x-pig',)),
     'PikeLexer': ('pygments.lexers.compiled', 'Pike', ('pike',), ('*.pike', '*.pmod'), ('text/x-pike',)),
     'PlPgsqlLexer': ('pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)),
     'PostScriptLexer': ('pygments.lexers.other', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
@@ -329,6 +332,7 @@ LEXERS = {
     'XsltLexer': ('pygments.lexers.web', 'XSLT', ('xslt',), ('*.xsl', '*.xslt', '*.xpl'), ('application/xsl+xml', 'application/xslt+xml')),
     'XtendLexer': ('pygments.lexers.jvm', 'Xtend', ('xtend',), ('*.xtend',), ('text/x-xtend',)),
     'YamlLexer': ('pygments.lexers.text', 'YAML', ('yaml',), ('*.yaml', '*.yml'), ('text/x-yaml',)),
+    'ZephirLexer': ('pygments.lexers.web', 'Zephir', ('zephir',), ('*.zep',), ()),
 }
 
 if __name__ == '__main__':
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index db69fdc6..e6e10098 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -469,20 +469,23 @@ class DLexer(RegexLexer):
             (r'(abstract|alias|align|asm|assert|auto|body|break|case|cast'
              r'|catch|class|const|continue|debug|default|delegate|delete'
              r'|deprecated|do|else|enum|export|extern|finally|final'
-             r'|foreach_reverse|foreach|for|function|goto|if|import|inout'
-             r'|interface|invariant|in|is|lazy|mixin|module|new|nothrow|out'
+             r'|foreach_reverse|foreach|for|function|goto|if|immutable|import'
+             r'|interface|invariant|inout|in|is|lazy|mixin|module|new|nothrow|out'
              r'|override|package|pragma|private|protected|public|pure|ref|return'
-             r'|scope|static|struct|super|switch|synchronized|template|this'
+             r'|scope|shared|static|struct|super|switch|synchronized|template|this'
              r'|throw|try|typedef|typeid|typeof|union|unittest|version|volatile'
-             r'|while|with|__traits)\b', Keyword
+             r'|while|with|__gshared|__traits|__vector|__parameters)\b', Keyword
             ),
             (r'(bool|byte|cdouble|cent|cfloat|char|creal|dchar|double|float'
              r'|idouble|ifloat|int|ireal|long|real|short|ubyte|ucent|uint|ulong'
              r'|ushort|void|wchar)\b', Keyword.Type
             ),
             (r'(false|true|null)\b', Keyword.Constant),
+            (r'(__FILE__|__MODULE__|__LINE__|__FUNCTION__|__PRETTY_FUNCTION__'
+             r'|__DATE__|__EOF__|__TIME__|__TIMESTAMP__|__VENDOR__|__VERSION__)\b',
+             Keyword.Pseudo),
             (r'macro\b', Keyword.Reserved),
-            (r'(string|wstring|dstring)\b', Name.Builtin),
+            (r'(string|wstring|dstring|size_t|ptrdiff_t)\b', Name.Builtin),
             # FloatLiteral
             # -- HexFloat
             (r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)'
@@ -528,6 +531,8 @@ class DLexer(RegexLexer):
             (r'q"(.).*?\1"', String),
             # -- TokenString
             (r'q{', String, 'token_string'),
+            # Attributes
+            (r'@([a-zA-Z_]\w*)?', Name.Decorator),
             # Tokens
             (r'(~=|\^=|%=|\*=|==|!>=|!<=|!<>=|!<>|!<|!>|!=|>>>=|>>>|>>=|>>|>='
              r'|<>=|<>|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.\.|\.\.|/=)'
@@ -535,6 +540,8 @@ class DLexer(RegexLexer):
             ),
             # Identifier
             (r'[a-zA-Z_]\w*', Name),
+            # Line
+            (r'#line\s.*\n', Comment.Special),
         ],
         'nested_comment': [
             (r'[^+/]+', Comment.Multiline),
@@ -1434,8 +1441,8 @@ def objective(baselexer):
                 # discussion in Issue 789
                 (r',', Punctuation),
                 (r'\.\.\.', Punctuation),
-                (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this),
-                                                                  Name.Variable)),
+                (r'(\(.*?\))(\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)',
+                 bygroups(using(this), Text, Name.Variable)),
                 (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function),
                 (';', Punctuation, '#pop'),
                 ('{', Punctuation, 'function'),
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index 30a7ddd7..64c47b6e 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -19,8 +19,9 @@ from pygments import unistring as uni
 
 
 __all__ = ['JavaLexer', 'ScalaLexer', 'GosuLexer', 'GosuTemplateLexer',
-           'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'KotlinLexer',
-           'XtendLexer', 'AspectJLexer', 'CeylonLexer']
+           'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'ClojureScriptLexer',
+           'KotlinLexer', 'XtendLexer', 'AspectJLexer', 'CeylonLexer',
+           'PigLexer']
 
 
 class JavaLexer(RegexLexer):
@@ -813,6 +814,19 @@ class ClojureLexer(RegexLexer):
     }
 
 
+class ClojureScriptLexer(ClojureLexer):
+    """
+    Lexer for `ClojureScript <http://clojure.org/clojurescript>`_
+    source code.
+
+    .. versionadded:: 2.0
+    """
+    name = 'ClojureScript'
+    aliases = ['clojurescript', 'cljs']
+    filenames = ['*.cljs']
+    mimetypes = ['text/x-clojurescript', 'application/x-clojurescript']
+
+
 class TeaLangLexer(RegexLexer):
     """
     For `Tea <http://teatrove.org/>`_ source code. Only used within a
@@ -1066,3 +1080,69 @@ class XtendLexer(RegexLexer):
             (r'.', String)
         ],
     }
+
+class PigLexer(RegexLexer):
+    """
+    For `Pig Latin <https://pig.apache.org/>`_ source code.
+
+    .. versionadded:: 2.0
+    """
+
+    name = 'Pig'
+    aliases = ['pig']
+    filenames = ['*.pig']
+    mimetypes = ['text/x-pig']
+
+    flags = re.MULTILINE | re.IGNORECASE
+
+    tokens = {
+        'root': [
+            (r'\s+', Text),
+            (r'--.*', Comment),
+            (r'/\*[\w\W]*?\*/', Comment.Multiline),
+            (r'\\\n', Text),
+            (r'\\', Text),
+            (r'\'(?:\\[ntbrf\\\']|\\u[0-9a-f]{4}|[^\'\\\n\r])*\'', String),
+            include('keywords'),
+            include('types'),
+            include('builtins'),
+            include('punct'),
+            include('operators'),
+            (r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
+            (r'0x[0-9a-f]+', Number.Hex),
+            (r'[0-9]+L?', Number.Integer),
+            (r'\n', Text),
+            (r'([a-z_][a-z0-9_]*)(\s*)(\()',
+             bygroups(Name.Function, Text, Punctuation)),
+            (r'[()#:]', Text),
+            (r'[^(:#\'\")\s]+', Text),
+            (r'\S+\s+', Text) # TODO: make tests pass without \s+
+        ],
+        'keywords': [
+            (r'(assert|and|any|all|arrange|as|asc|bag|by|cache|CASE|cat|cd|cp|'
+             r'%declare|%default|define|dense|desc|describe|distinct|du|dump|'
+             r'eval|exex|explain|filter|flatten|foreach|full|generate|group|'
+             r'help|if|illustrate|import|inner|input|into|is|join|kill|left|'
+             r'limit|load|ls|map|matches|mkdir|mv|not|null|onschema|or|order|'
+             r'outer|output|parallel|pig|pwd|quit|register|returns|right|rm|'
+             r'rmf|rollup|run|sample|set|ship|split|stderr|stdin|stdout|store|'
+             r'stream|through|union|using|void)\b', Keyword)
+        ],
+        'builtins': [
+            (r'(AVG|BinStorage|cogroup|CONCAT|copyFromLocal|copyToLocal|COUNT|'
+             r'cross|DIFF|MAX|MIN|PigDump|PigStorage|SIZE|SUM|TextLoader|'
+             r'TOKENIZE)\b', Name.Builtin)
+        ],
+        'types': [
+            (r'(bytearray|BIGINTEGER|BIGDECIMAL|chararray|datetime|double|float|'
+             r'int|long|tuple)\b', Keyword.Type)
+        ],
+        'punct': [
+            (r'[;(){}\[\]]', Punctuation),
+        ],
+        'operators': [
+            (r'[#=,./%+\-?]', Operator),
+            (r'(eq|gt|lt|gte|lte|neq|matches)\b', Operator),
+            (r'(==|<=|<|>=|>|!=)', Operator),
+        ],
+    }
diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py
index 1bce106c..e7a8948b 100644
--- a/pygments/lexers/math.py
+++ b/pygments/lexers/math.py
@@ -26,7 +26,7 @@ from pygments.lexers import _stan_builtins
 __all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer',
            'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer',
            'RConsoleLexer', 'SLexer', 'JagsLexer', 'BugsLexer', 'StanLexer',
-           'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer']
+           'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer', 'GAPLexer']
 
 
 class JuliaLexer(RegexLexer):
@@ -1972,3 +1972,53 @@ class MathematicaLexer(RegexLexer):
             (r'\s+', Text.Whitespace),
         ],
     }
+
+class GAPLexer(RegexLexer):
+    """
+    For `GAP <http://www.gap-system.org>`_ source code.
+
+    .. versionadded:: 2.0
+    """
+    name = 'GAP'
+    aliases = ['gap']
+    filenames = ['*.g', '*.gd', '*.gi', '*.gap']
+
+    tokens = {
+      'root' : [
+        (r'#.*$', Comment.Single),
+        (r'"(?:[^"\\]|\\.)*"', String),
+        (r'\(|\)|\[|\]|\{|\}', Punctuation),
+        (r'''(?x)\b(?:
+            if|then|elif|else|fi|
+            for|while|do|od|
+            repeat|until|
+            break|continue|
+            function|local|return|end|
+            rec|
+            quit|QUIT|
+            IsBound|Unbind|
+            TryNextMethod|
+            Info|Assert
+          )\b''', Keyword),
+        (r'''(?x)\b(?:
+            true|false|fail|infinity
+          )\b''',
+          Name.Constant),
+        (r'''(?x)\b(?:
+            (Declare|Install)([A-Z][A-Za-z]+)|
+               BindGlobal|BIND_GLOBAL
+          )\b''',
+          Name.Builtin),
+        (r'\.|,|:=|;|=|\+|-|\*|/|\^|>|<', Operator),
+        (r'''(?x)\b(?:
+            and|or|not|mod|in
+          )\b''',
+          Operator.Word),
+        (r'''(?x)
+          (?:[a-zA-Z_0-9]+|`[^`]*`)
+          (?:::[a-zA-Z_0-9]+|`[^`]*`)*''', Name.Variable),
+        (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number),
+        (r'\.[0-9]+(?:e[0-9]+)?', Number),
+        (r'.', Text)
+      ]
+    }
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index 1b8ce892..ba777e28 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -1235,9 +1235,9 @@ class ModelicaLexer(RegexLexer):
         'root': [
             include('whitespace'),
             include('keywords'),
+            include('classes'),
             include('functions'),
             include('operators'),
-            include('classes'),
             (r'("<html>|<html>)', Name.Tag, 'html-content'),
             include('statements'),
         ],
@@ -1264,9 +1264,9 @@ class ModelicaLexer(RegexLexer):
              r'terminate)\b', Name.Builtin),
         ],
         'classes': [
-            (r'(block|class|connector|end|function|model|package|'
+            (r'(operator)?(\s+)?(block|class|connector|end|function|model|operator|package|'
              r'record|type)(\s+)((?!if|when|while)[A-Za-z_]\w*|[\'][^\']+[\'])([;]?)',
-             bygroups(Keyword, Text, Name.Class, Text))
+             bygroups(Keyword, Text, Keyword, Text, Name.Class, Text))
         ],
         'quoted_ident': [
             (r'\'', Name, '#pop'),
@@ -2528,11 +2528,11 @@ class AwkLexer(RegexLexer):
         'root': [
             (r'^(?=\s|/)', Text, 'slashstartsregex'),
             include('commentsandwhitespace'),
-            (r'\+\+|--|\|\||&&|in|\$|!?~|'
+            (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
              r'(\*\*|[-<>+*%\^/!=])=?', Operator, 'slashstartsregex'),
             (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
             (r'[})\].]', Punctuation),
-            (r'(break|continue|do|while|exit|for|if|'
+            (r'(break|continue|do|while|exit|for|if|else|'
              r'return)\b', Keyword, 'slashstartsregex'),
             (r'function\b', Keyword.Declaration, 'slashstartsregex'),
             (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py
index c975ad80..7d3073f1 100644
--- a/pygments/lexers/web.py
+++ b/pygments/lexers/web.py
@@ -28,7 +28,7 @@ __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'JsonLexer', 'CssLexer',
            'ObjectiveJLexer', 'CoffeeScriptLexer', 'LiveScriptLexer',
            'DuelLexer', 'ScamlLexer', 'JadeLexer', 'XQueryLexer',
            'DtdLexer', 'DartLexer', 'LassoLexer', 'QmlLexer', 'TypeScriptLexer',
-           'KalLexer', 'CirruLexer', 'MaskLexer']
+           'KalLexer', 'CirruLexer', 'MaskLexer', 'ZephirLexer']
 
 
 class JavascriptLexer(RegexLexer):
@@ -3691,7 +3691,7 @@ class DartLexer(RegexLexer):
              r'native|operator|set|static|typedef|var)\b', Keyword.Declaration),
             (r'\b(bool|double|Dynamic|int|num|Object|String|void)\b', Keyword.Type),
             (r'\b(false|null|true)\b', Keyword.Constant),
-            (r'[~!%^&*+=|?:<>/-]|as', Operator),
+            (r'[~!%^&*+=|?:<>/-]|as\b', Operator),
             (r'[a-zA-Z_$][a-zA-Z0-9_]*:', Name.Label),
             (r'[a-zA-Z_$][a-zA-Z0-9_]*', Name),
             (r'[(){}\[\],.;]', Punctuation),
@@ -4336,5 +4336,68 @@ class MaskLexer(RegexLexer):
         'string-double-pop2':[
             (r'"', String.Single, '#pop:2'),
             include('string-base')
+        ],
+    }
+
+
+class ZephirLexer(RegexLexer):
+    """
+    For `Zephir language <http://zephir-lang.com/>`_ source code.
+
+    Zephir is a compiled high level language aimed
+    to the creation of C-extensions for PHP.
+
+    .. versionadded:: 2.0
+    """
+
+    name = 'Zephir'
+    aliases = ['zephir']
+    filenames = ['*.zep']
+
+    zephir_keywords = [ 'fetch', 'echo', 'isset', 'empty']
+    zephir_type = [ 'bit', 'bits' , 'string' ]
+
+    flags = re.DOTALL | re.MULTILINE
+
+    tokens = {
+        'commentsandwhitespace': [
+            (r'\s+', Text),
+            (r'//.*?\n', Comment.Single),
+            (r'/\*.*?\*/', Comment.Multiline)
+        ],
+        'slashstartsregex': [
+            include('commentsandwhitespace'),
+            (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
+             r'([gim]+\b|\B)', String.Regex, '#pop'),
+            (r'', Text, '#pop')
+        ],
+        'badregex': [
+            (r'\n', Text, '#pop')
+        ],
+        'root': [
+            (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
+            include('commentsandwhitespace'),
+            (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
+             r'(<<|>>>?|==?|!=?|->|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'),
+            (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
+            (r'[})\].]', Punctuation),
+            (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|require|inline|'
+             r'throw|try|catch|finally|new|delete|typeof|instanceof|void|namespace|use|extends|'
+             r'this|fetch|isset|unset|echo|fetch|likely|unlikely|empty)\b', Keyword, 'slashstartsregex'),
+            (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
+            (r'(abstract|boolean|bool|char|class|const|double|enum|export|'
+             r'extends|final|float|goto|implements|import|int|string|interface|long|ulong|char|uchar|native|unsigned|'
+             r'private|protected|public|short|static|self|throws|reverse|'
+             r'transient|volatile)\b', Keyword.Reserved),
+            (r'(true|false|null|undefined)\b', Keyword.Constant),
+            (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
+             r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
+             r'window)\b', Name.Builtin),
+            (r'[$a-zA-Z_][a-zA-Z0-9_\\]*', Name.Other),
+            (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
+            (r'0x[0-9a-fA-F]+', Number.Hex),
+            (r'[0-9]+', Number.Integer),
+            (r'"(\\\\|\\"|[^"])*"', String.Double),
+            (r"'(\\\\|\\'|[^'])*'", String.Single),
         ]
     }
diff --git a/pygments/token.py b/pygments/token.py
index f6c3066d..c40ffd33 100644
--- a/pygments/token.py
+++ b/pygments/token.py
@@ -49,6 +49,7 @@ Token       = _TokenType()
 # Special token types
 Text        = Token.Text
 Whitespace  = Text.Whitespace
+Escape      = Token.Escape
 Error       = Token.Error
 # Text that doesn't belong to this lexer (e.g. HTML in PHP)
 Other       = Token.Other
@@ -116,6 +117,7 @@ STANDARD_TYPES = {
 
     Text:                          '',
     Whitespace:                    'w',
+    Escape:                        'esc',
     Error:                         'err',
     Other:                         'x',
 
diff --git a/tests/examplefiles/core.cljs b/tests/examplefiles/core.cljs
new file mode 100644
index 00000000..f135b832
--- /dev/null
+++ b/tests/examplefiles/core.cljs
@@ -0,0 +1,52 @@
+
+(ns bounder.core
+  (:require [bounder.html :as html]
+            [domina :refer [value set-value! single-node]]
+            [domina.css :refer [sel]]
+            [lowline.functions :refer [debounce]]
+            [enfocus.core :refer [at]]
+            [cljs.reader :as reader]
+            [clojure.string :as s])
+  (:require-macros [enfocus.macros :as em]))
+
+(def filter-input 
+  (single-node 
+    (sel ".search input")))
+
+(defn project-matches [query project]
+  (let [words (cons (:name project)
+                    (map name (:categories project)))
+        to-match (->> words
+                   (s/join "")
+                   (s/lower-case))]
+    (<= 0 (.indexOf to-match (s/lower-case query)))))
+
+(defn apply-filter-for [projects]
+ (let [query (value filter-input)]
+   (html/render-projects 
+     (filter (partial project-matches query)
+             projects))))
+
+(defn filter-category [projects evt]
+  (let [target (.-currentTarget evt)]
+    (set-value! filter-input 
+                (.-innerHTML target))
+    (apply-filter-for projects)))
+
+(defn init-listeners [projects]
+  (at js/document
+    ["input"] (em/listen
+                :keyup
+                (debounce
+                  (partial apply-filter-for projects)
+                  500))
+    [".category-links li"] (em/listen
+                             :click
+                             (partial filter-category projects))))
+
+(defn init [projects-edn]
+  (let [projects (reader/read-string projects-edn)]
+    (init-listeners projects)
+    (html/render-projects projects)
+    (html/loaded)))
+
diff --git a/tests/examplefiles/example.gd b/tests/examplefiles/example.gd
new file mode 100644
index 00000000..c285ea32
--- /dev/null
+++ b/tests/examplefiles/example.gd
@@ -0,0 +1,23 @@
+#############################################################################
+##
+#W  example.gd
+##
+##  This file contains a sample of a GAP declaration file.
+##
+DeclareProperty( "SomeProperty", IsLeftModule );
+DeclareGlobalFunction( "SomeGlobalFunction" );
+
+
+#############################################################################
+##
+#C  IsQuuxFrobnicator(<R>)
+##
+##  <ManSection>
+##  <Filt Name="IsQuuxFrobnicator" Arg='R' Type='Category'/>
+##
+##  <Description>
+##  Tests whether R is a quux frobnicator.
+##  </Description>
+##  </ManSection>
+##
+DeclareSynonym( "IsQuuxFrobnicator", IsField and IsGroup );
diff --git a/tests/examplefiles/example.gi b/tests/examplefiles/example.gi
new file mode 100644
index 00000000..c9c5e55d
--- /dev/null
+++ b/tests/examplefiles/example.gi
@@ -0,0 +1,64 @@
+#############################################################################
+##
+#W  example.gd
+##
+##  This file contains a sample of a GAP implementation file.
+##
+
+
+#############################################################################
+##
+#M  SomeOperation( <val> )
+##
+##  performs some operation on <val>
+##
+InstallMethod( SomeProperty,
+    "for left modules",
+    [ IsLeftModule ], 0,
+    function( M )
+    if IsFreeLeftModule( M ) and not IsTrivial( M ) then
+      return true;
+    fi;
+    TryNextMethod();
+    end );
+
+
+
+#############################################################################
+##
+#F  SomeGlobalFunction( )
+##
+##  A global variadic funfion.
+##
+InstallGlobalFunction( SomeGlobalFunction, function( arg )
+    if Length( arg ) = 3 then
+      return arg[1] + arg[2] * arg[3];
+    elif Length( arg ) = 2 then
+      return arg[1] - arg[2]
+    else
+      Error( "usage: SomeGlobalFunction( <x>, <y>[, <z>] )" );
+    fi;
+    end );
+
+
+#
+# A plain function.
+#
+SomeFunc := function(x, y)
+    local z, func, tmp, j;
+    z := x * 1.0;
+    y := 17^17 - y;
+    func := a -> a mod 5;
+    tmp := List( [1..50], func );
+    while y > 0 do
+        for j in tmp do
+            Print(j, "\n");
+        od;
+        repeat
+            y := y - 1;
+        until 0 < 1;
+        y := y -1;
+    od;
+    return z;
+end;
+        
+\ No newline at end of file
diff --git a/tests/examplefiles/objc_example.m b/tests/examplefiles/objc_example.m
index 67b33022..f4f27170 100644
--- a/tests/examplefiles/objc_example.m
+++ b/tests/examplefiles/objc_example.m
@@ -30,3 +30,6 @@ NSDictionary *d = @{ @"key": @"value" };
 
 NSNumber *n1 = @( 1 );
 NSNumber *n2 = @( [a length] );
+
++ (void)f1:(NSString *)s1;
++ (void)f2:(NSString *) s2;
diff --git a/tests/examplefiles/test.pig b/tests/examplefiles/test.pig
new file mode 100644
index 00000000..f67b0268
--- /dev/null
+++ b/tests/examplefiles/test.pig
@@ -0,0 +1,148 @@
+/**
+ *  This script is an example recommender (using made up data) showing how you might modify item-item links
+ *  by defining similar relations between items in a dataset and customizing the change in weighting.
+ *  This example creates metadata by using the genre field as the metadata_field.  The items with
+ *  the same genre have it's weight cut in half in order to boost the signals of movies that do not have the same genre.
+ *  This technique requires a customization of the standard GetItemItemRecommendations macro
+ */
+import 'recommenders.pig';
+
+
+
+%default INPUT_PATH_PURCHASES '../data/retail/purchases.json'
+%default INPUT_PATH_WISHLIST '../data/retail/wishlists.json'
+%default INPUT_PATH_INVENTORY '../data/retail/inventory.json'
+%default OUTPUT_PATH '../data/retail/out/modify_item_item'
+
+
+/******** Custom GetItemItemRecommnedations *********/
+define recsys__GetItemItemRecommendations_ModifyCustom(user_item_signals, metadata) returns item_item_recs {
+
+    -- Convert user_item_signals to an item_item_graph
+    ii_links_raw, item_weights   =   recsys__BuildItemItemGraph(
+                                       $user_item_signals,
+                                       $LOGISTIC_PARAM,
+                                       $MIN_LINK_WEIGHT,
+                                       $MAX_LINKS_PER_USER
+                                     );
+    -- NOTE this function is added in order to combine metadata with item-item links
+        -- See macro for more detailed explination
+    ii_links_metadata           =   recsys__AddMetadataToItemItemLinks(
+                                        ii_links_raw,
+                                        $metadata
+                                    );
+
+    /********* Custom Code starts here ********/
+
+    --The code here should adjust the weights based on an item-item link and the equality of metadata.
+    -- In this case, if the metadata is the same, the weight is reduced.  Otherwise the weight is left alone.
+    ii_links_adjusted           =  foreach ii_links_metadata generate item_A, item_B,
+                                        -- the amount of weight adjusted is dependant on the domain of data and what is expected
+                                        -- It is always best to adjust the weight by multiplying it by a factor rather than addition with a constant
+                                        (metadata_B == metadata_A ? (weight * 0.5): weight) as weight;
+
+
+    /******** Custom Code stops here *********/
+
+    -- remove negative numbers just incase
+    ii_links_adjusted_filt = foreach ii_links_adjusted generate item_A, item_B,
+                                      (weight <= 0 ? 0: weight) as weight;
+    -- Adjust the weights of the graph to improve recommendations.
+    ii_links                    =   recsys__AdjustItemItemGraphWeight(
+                                        ii_links_adjusted_filt,
+                                        item_weights,
+                                        $BAYESIAN_PRIOR
+                                    );
+
+    -- Use the item-item graph to create item-item recommendations.
+    $item_item_recs =  recsys__BuildItemItemRecommendationsFromGraph(
+                           ii_links,
+                           $NUM_RECS_PER_ITEM,
+                           $NUM_RECS_PER_ITEM
+                       );
+};
+
+
+/******* Load Data **********/
+
+--Get purchase signals
+purchase_input = load '$INPUT_PATH_PURCHASES' using org.apache.pig.piggybank.storage.JsonLoader(
+                    'row_id: int,
+                     movie_id: chararray,
+                     movie_name: chararray,
+                     user_id: chararray,
+                     purchase_price: int');
+
+--Get wishlist signals
+wishlist_input =  load '$INPUT_PATH_WISHLIST' using org.apache.pig.piggybank.storage.JsonLoader(
+                     'row_id: int,
+                      movie_id: chararray,
+                      movie_name: chararray,
+                      user_id: chararray');
+
+
+/******* Convert Data to Signals **********/
+
+-- Start with choosing 1 as max weight for a signal.
+purchase_signals = foreach purchase_input generate
+                        user_id    as user,
+                        movie_name as item,
+                        1.0        as weight;
+
+
+-- Start with choosing 0.5 as weight for wishlist items because that is a weaker signal than
+-- purchasing an item.
+wishlist_signals = foreach wishlist_input generate
+                        user_id    as user,
+                        movie_name as item,
+                        0.5        as weight;
+
+user_signals = union purchase_signals, wishlist_signals;
+
+
+/******** Changes for Modifying item-item links ******/
+inventory_input = load '$INPUT_PATH_INVENTORY' using org.apache.pig.piggybank.storage.JsonLoader(
+                     'movie_title: chararray,
+                      genres: bag{tuple(content:chararray)}');
+
+
+metadata = foreach inventory_input generate
+              FLATTEN(genres) as metadata_field,
+              movie_title as item;
+-- requires the macro to be written seperately
+  --NOTE this macro is defined within this file for clarity
+item_item_recs = recsys__GetItemItemRecommendations_ModifyCustom(user_signals, metadata);
+/******* No more changes ********/
+
+
+user_item_recs = recsys__GetUserItemRecommendations(user_signals, item_item_recs);
+
+--Completely unrelated code stuck in the middle
+data        =    LOAD 's3n://my-s3-bucket/path/to/responses'
+                 USING org.apache.pig.piggybank.storage.JsonLoader();
+responses   =    FOREACH data GENERATE object#'response' AS response: map[];
+out         =    FOREACH responses
+                 GENERATE response#'id' AS id: int, response#'thread' AS thread: chararray,
+                          response#'comments' AS comments: {t: (comment: chararray)};
+STORE out INTO 's3n://path/to/output' USING PigStorage('|');
+
+
+/******* Store recommendations **********/
+
+--  If your output folder exists already, hadoop will refuse to write data to it.
+
+rmf $OUTPUT_PATH/item_item_recs;
+rmf $OUTPUT_PATH/user_item_recs;
+
+store item_item_recs into '$OUTPUT_PATH/item_item_recs' using PigStorage();
+store user_item_recs into '$OUTPUT_PATH/user_item_recs' using PigStorage();
+
+-- STORE the item_item_recs into dynamo
+STORE item_item_recs
+ INTO '$OUTPUT_PATH/unused-ii-table-data'
+USING com.mortardata.pig.storage.DynamoDBStorage('$II_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');
+
+-- STORE the user_item_recs into dynamo
+STORE user_item_recs
+ INTO '$OUTPUT_PATH/unused-ui-table-data'
+USING com.mortardata.pig.storage.DynamoDBStorage('$UI_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');
diff --git a/tests/examplefiles/test.zep b/tests/examplefiles/test.zep
new file mode 100644
index 00000000..4724d4c4
--- /dev/null
+++ b/tests/examplefiles/test.zep
@@ -0,0 +1,33 @@
+namespace Test;
+
+use Test\Foo;
+
+class Bar
+{
+    protected a;
+    private b;
+    public c {set, get};
+
+    public function __construct(string str, boolean bool)
+    {
+        let this->c = str;
+        this->setC(bool);
+        let this->b = [];
+    }
+
+    public function sayHello(string name)
+    {
+        echo "Hello " . name;
+    }
+
+    protected function loops()
+    {
+        for a in b {
+            echo a;
+        }
+        loop {
+            return "boo!";
+        }
+    }
+
+}
+\ No newline at end of file
author	Tim Hatch <tim@timhatch.com>	2014-04-14 13:47:40 -0400
committer	Tim Hatch <tim@timhatch.com>	2014-04-14 13:47:40 -0400
commit	06a720cca67ff19f873f8066c17cf4ea90ab0f0f (patch)
tree	2901fe8e218cce5a8e788645d41aec654f297e23
parent	02683b5def213065f6b893f91fc54f313141fbdf (diff)
parent	5d57fe78405ac06a306f5ed2dd1b630a909cbdfb (diff)
download	pygments-06a720cca67ff19f873f8066c17cf4ea90ab0f0f.tar.gz