summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hatch <tim@timhatch.com>2014-04-14 13:47:40 -0400
committerTim Hatch <tim@timhatch.com>2014-04-14 13:47:40 -0400
commit06a720cca67ff19f873f8066c17cf4ea90ab0f0f (patch)
tree2901fe8e218cce5a8e788645d41aec654f297e23
parent02683b5def213065f6b893f91fc54f313141fbdf (diff)
parent5d57fe78405ac06a306f5ed2dd1b630a909cbdfb (diff)
downloadpygments-06a720cca67ff19f873f8066c17cf4ea90ab0f0f.tar.gz
Merged in yloiseau/pygments-main (pull request #309)
-rw-r--r--AUTHORS2
-rw-r--r--doc/languages.rst2
-rw-r--r--pygments/cmdline.py10
-rw-r--r--pygments/formatters/latex.py92
-rw-r--r--pygments/formatters/rtf.py11
-rw-r--r--pygments/lexers/_mapping.py4
-rw-r--r--pygments/lexers/compiled.py21
-rw-r--r--pygments/lexers/jvm.py84
-rw-r--r--pygments/lexers/math.py52
-rw-r--r--pygments/lexers/other.py10
-rw-r--r--pygments/lexers/web.py67
-rw-r--r--pygments/token.py2
-rw-r--r--tests/examplefiles/core.cljs52
-rw-r--r--tests/examplefiles/example.gd23
-rw-r--r--tests/examplefiles/example.gi64
-rw-r--r--tests/examplefiles/objc_example.m3
-rw-r--r--tests/examplefiles/test.pig148
-rw-r--r--tests/examplefiles/test.zep33
18 files changed, 659 insertions, 21 deletions
diff --git a/AUTHORS b/AUTHORS
index c4da54f6..83c0eaca 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -65,6 +65,7 @@ Other contributors, listed alphabetically, are:
* Rob Hoelz -- Perl 6 lexer
* Doug Hogan -- Mscgen lexer
* Ben Hollis -- Mason lexer
+* Max Horn -- GAP lexer
* Dustin Howett -- Logos lexer
* Alastair Houghton -- Lexer inheritance facility
* Tim Howard -- BlitzMax lexer
@@ -98,6 +99,7 @@ Other contributors, listed alphabetically, are:
* Brian McKenna -- F# lexer
* Charles McLaughlin -- Puppet lexer
* Lukas Meuser -- BBCode formatter, Lua lexer
+* Cat Miller -- Pig lexer
* Paul Miller -- LiveScript lexer
* Hong Minhee -- HTTP lexer
* Michael Mior -- Awk lexer
diff --git a/doc/languages.rst b/doc/languages.rst
index 0fc9d9d3..426a576b 100644
--- a/doc/languages.rst
+++ b/doc/languages.rst
@@ -35,6 +35,7 @@ Programming languages
* Fancy
* Fortran
* F#
+* GAP
* Gherkin (Cucumber)
* GL shaders
* Groovy
@@ -81,6 +82,7 @@ Programming languages
* Visual Basic.NET
* Visual FoxPro
* XQuery
+* Zephir
</ul>
Template languages
diff --git a/pygments/cmdline.py b/pygments/cmdline.py
index af8d48ea..7c23ebee 100644
--- a/pygments/cmdline.py
+++ b/pygments/cmdline.py
@@ -19,6 +19,7 @@ from pygments import __version__, highlight
from pygments.util import ClassNotFound, OptionError, docstring_headline
from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \
find_lexer_class, guess_lexer, TextLexer
+from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
from pygments.formatters import get_all_formatters, get_formatter_by_name, \
get_formatter_for_filename, find_formatter_class, \
TerminalFormatter # pylint:disable-msg=E0611
@@ -405,6 +406,15 @@ def main(args=sys.argv):
else:
code = sys.stdin.read()
+ # When using the LaTeX formatter and the option `escapeinside` is
+ # specified, we need a special lexer which collects escaped text
+ # before running the chosen language lexer.
+ escapeinside = parsed_opts.get('escapeinside', '')
+ if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
+ left = escapeinside[0]
+ right = escapeinside[1]
+ lexer = LatexEmbeddedLexer(left, right, lexer)
+
# No encoding given? Use latin1 if output file given,
# stdin/stdout encoding otherwise.
# (This is a compromise, I'm not too happy with it...)
diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py
index 0f2397eb..fee177c5 100644
--- a/pygments/formatters/latex.py
+++ b/pygments/formatters/latex.py
@@ -12,6 +12,7 @@
from __future__ import division
from pygments.formatter import Formatter
+from pygments.lexer import Lexer
from pygments.token import Token, STANDARD_TYPES
from pygments.util import get_bool_opt, get_int_opt, StringIO, xrange, \
iteritems
@@ -226,6 +227,15 @@ class LatexFormatter(Formatter):
``False``).
.. versionadded:: 1.2
+
+ `escapeinside`
+ If set to a string of length 2, enables escaping to LaTeX. Text
+ delimited by these 2 characters is read as LaTeX code and
+ typeset accordingly. It has no effect in string literals. It has
+ no effect in comments if `texcomments` or `mathescape` is
+ set. (default: ``''``).
+
+ .. versionadded:: 2.0
"""
name = 'LaTeX'
aliases = ['latex', 'tex']
@@ -243,6 +253,13 @@ class LatexFormatter(Formatter):
self.commandprefix = options.get('commandprefix', 'PY')
self.texcomments = get_bool_opt(options, 'texcomments', False)
self.mathescape = get_bool_opt(options, 'mathescape', False)
+ self.escapeinside = options.get('escapeinside', '')
+
+ if len(self.escapeinside) == 2:
+ self.left = self.escapeinside[0]
+ self.right = self.escapeinside[1]
+ else:
+ self.escapeinside = ''
self._create_stylesheet()
@@ -314,14 +331,14 @@ class LatexFormatter(Formatter):
realoutfile = outfile
outfile = StringIO()
- outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}')
+ outfile.write(u'\\begin{Verbatim}[commandchars=\\\\\\{\\}')
if self.linenos:
start, step = self.linenostart, self.linenostep
outfile.write(u',numbers=left' +
(start and u',firstnumber=%d' % start or u'') +
(step and u',stepnumber=%d' % step or u''))
- if self.mathescape or self.texcomments:
- outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}')
+ if self.mathescape or self.texcomments or self.escapeinside:
+ outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
if self.verboptions:
outfile.write(u',' + self.verboptions)
outfile.write(u']\n')
@@ -350,9 +367,22 @@ class LatexFormatter(Formatter):
parts[i] = escape_tex(part, self.commandprefix)
in_math = not in_math
value = '$'.join(parts)
+ elif self.escapeinside:
+ text = value
+ value = ''
+ while len(text) > 0:
+ a,sep1,text = text.partition(self.left)
+ if len(sep1) > 0:
+ b,sep2,text = text.partition(self.right)
+ if len(sep2) > 0:
+ value = value + escape_tex(a, self.commandprefix) + b
+ else:
+ value = value + escape_tex(a + sep1 + b, self.commandprefix)
+ else:
+ value = value + escape_tex(a, self.commandprefix)
else:
value = escape_tex(value, self.commandprefix)
- else:
+ elif not (ttype in Token.Escape):
value = escape_tex(value, self.commandprefix)
styles = []
while ttype is not Token:
@@ -384,3 +414,57 @@ class LatexFormatter(Formatter):
encoding = self.encoding or 'latin1',
styledefs = self.get_style_defs(),
code = outfile.getvalue()))
+
+
+class LatexEmbeddedLexer(Lexer):
+ r"""
+
+ This lexer takes one lexer as argument, the lexer for the language
+ being formatted, and the left and right delimiters for escaped text.
+
+ First everything is scanned using the language lexer to obtain
+ strings and comments. All other consecutive tokens are merged and
+ the resulting text is scanned for escaped segments, which are given
+ the Token.Escape type. Finally text that is not escaped is scanned
+ again with the language lexer.
+ """
+ def __init__(self, left, right, lang, **options):
+ self.left = left
+ self.right = right
+ self.lang = lang
+ Lexer.__init__(self, **options)
+
+ def get_tokens_unprocessed(self, text):
+ buf = ''
+ for i, t, v in self.lang.get_tokens_unprocessed(text):
+ if t in Token.Comment or t in Token.String:
+ if buf:
+ for x in self.get_tokens_aux(idx, buf):
+ yield x
+ buf = ''
+ yield i, t, v
+ else:
+ if not buf:
+ idx = i;
+ buf += v
+ if buf:
+ for x in self.get_tokens_aux(idx, buf):
+ yield x
+
+ def get_tokens_aux(self, index, text):
+ while text:
+ a, sep1, text = text.partition(self.left)
+ if a:
+ for i, t, v in self.lang.get_tokens_unprocessed(a):
+ yield index + i, t, v
+ index += len(a)
+ if sep1:
+ b, sep2, text = text.partition(self.right)
+ if sep2:
+ yield index + len(sep1), Token.Escape, b
+ index += len(sep1) + len(b) + len(sep2)
+ else:
+ yield index, Token.Error, sep1
+ index += len(sep1)
+ text = b
+
diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py
index 59d97742..9d87e8f1 100644
--- a/pygments/formatters/rtf.py
+++ b/pygments/formatters/rtf.py
@@ -10,6 +10,7 @@
"""
from pygments.formatter import Formatter
+from pygments.util import get_int_opt
__all__ = ['RtfFormatter']
@@ -32,6 +33,12 @@ class RtfFormatter(Formatter):
`fontface`
The used font famliy, for example ``Bitstream Vera Sans``. Defaults to
some generic font which is supposed to have fixed width.
+
+ `fontsize`
+ Size of the font used. Size is specified in half points. The
+ default is 24 half-points, giving a size 12 font.
+
+ .. versionadded:: 2.0
"""
name = 'RTF'
aliases = ['rtf']
@@ -49,9 +56,11 @@ class RtfFormatter(Formatter):
specification claims that ``\fmodern`` are "Fixed-pitch serif
and sans serif fonts". Hope every RTF implementation thinks
the same about modern...
+
"""
Formatter.__init__(self, **options)
self.fontface = options.get('fontface') or ''
+ self.fontsize = get_int_opt(options, 'fontsize', 0)
def _escape(self, text):
return text.replace('\\', '\\\\') \
@@ -106,6 +115,8 @@ class RtfFormatter(Formatter):
))
offset += 1
outfile.write(r'}\f0')
+ if self.fontsize:
+ outfile.write(r'\fs%d' % (self.fontsize))
# highlight stream
for ttype, value in tokensource:
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index b90e0d8f..c937d908 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -67,6 +67,7 @@ LEXERS = {
'CirruLexer': ('pygments.lexers.web', 'Cirru', ('cirru',), ('*.cirru', '*.cr'), ('text/x-cirru',)),
'ClayLexer': ('pygments.lexers.compiled', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)),
'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')),
+ 'ClojureScriptLexer': ('pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')),
'CobolFreeformatLexer': ('pygments.lexers.compiled', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()),
'CobolLexer': ('pygments.lexers.compiled', 'COBOL', ('cobol',), ('*.cob', '*.COB', '*.cpy', '*.CPY'), ('text/x-cobol',)),
'CoffeeScriptLexer': ('pygments.lexers.web', 'CoffeeScript', ('coffee-script', 'coffeescript', 'coffee'), ('*.coffee',), ('text/coffeescript',)),
@@ -119,6 +120,7 @@ LEXERS = {
'FelixLexer': ('pygments.lexers.compiled', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)),
'FortranLexer': ('pygments.lexers.compiled', 'Fortran', ('fortran',), ('*.f', '*.f90', '*.F', '*.F90'), ('text/x-fortran',)),
'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()),
+ 'GAPLexer': ('pygments.lexers.math', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()),
'GLShaderLexer': ('pygments.lexers.compiled', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)),
'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)),
'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')),
@@ -239,6 +241,7 @@ LEXERS = {
'Perl6Lexer': ('pygments.lexers.agile', 'Perl6', ('perl6', 'pl6'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t'), ('text/x-perl6', 'application/x-perl6')),
'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm', '*.t'), ('text/x-perl', 'application/x-perl')),
'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]', '*.inc'), ('text/x-php',)),
+ 'PigLexer': ('pygments.lexers.jvm', 'Pig', ('pig',), ('*.pig',), ('text/x-pig',)),
'PikeLexer': ('pygments.lexers.compiled', 'Pike', ('pike',), ('*.pike', '*.pmod'), ('text/x-pike',)),
'PlPgsqlLexer': ('pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)),
'PostScriptLexer': ('pygments.lexers.other', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
@@ -329,6 +332,7 @@ LEXERS = {
'XsltLexer': ('pygments.lexers.web', 'XSLT', ('xslt',), ('*.xsl', '*.xslt', '*.xpl'), ('application/xsl+xml', 'application/xslt+xml')),
'XtendLexer': ('pygments.lexers.jvm', 'Xtend', ('xtend',), ('*.xtend',), ('text/x-xtend',)),
'YamlLexer': ('pygments.lexers.text', 'YAML', ('yaml',), ('*.yaml', '*.yml'), ('text/x-yaml',)),
+ 'ZephirLexer': ('pygments.lexers.web', 'Zephir', ('zephir',), ('*.zep',), ()),
}
if __name__ == '__main__':
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index db69fdc6..e6e10098 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -469,20 +469,23 @@ class DLexer(RegexLexer):
(r'(abstract|alias|align|asm|assert|auto|body|break|case|cast'
r'|catch|class|const|continue|debug|default|delegate|delete'
r'|deprecated|do|else|enum|export|extern|finally|final'
- r'|foreach_reverse|foreach|for|function|goto|if|import|inout'
- r'|interface|invariant|in|is|lazy|mixin|module|new|nothrow|out'
+ r'|foreach_reverse|foreach|for|function|goto|if|immutable|import'
+ r'|interface|invariant|inout|in|is|lazy|mixin|module|new|nothrow|out'
r'|override|package|pragma|private|protected|public|pure|ref|return'
- r'|scope|static|struct|super|switch|synchronized|template|this'
+ r'|scope|shared|static|struct|super|switch|synchronized|template|this'
r'|throw|try|typedef|typeid|typeof|union|unittest|version|volatile'
- r'|while|with|__traits)\b', Keyword
+ r'|while|with|__gshared|__traits|__vector|__parameters)\b', Keyword
),
(r'(bool|byte|cdouble|cent|cfloat|char|creal|dchar|double|float'
r'|idouble|ifloat|int|ireal|long|real|short|ubyte|ucent|uint|ulong'
r'|ushort|void|wchar)\b', Keyword.Type
),
(r'(false|true|null)\b', Keyword.Constant),
+ (r'(__FILE__|__MODULE__|__LINE__|__FUNCTION__|__PRETTY_FUNCTION__'
+ r'|__DATE__|__EOF__|__TIME__|__TIMESTAMP__|__VENDOR__|__VERSION__)\b',
+ Keyword.Pseudo),
(r'macro\b', Keyword.Reserved),
- (r'(string|wstring|dstring)\b', Name.Builtin),
+ (r'(string|wstring|dstring|size_t|ptrdiff_t)\b', Name.Builtin),
# FloatLiteral
# -- HexFloat
(r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)'
@@ -528,6 +531,8 @@ class DLexer(RegexLexer):
(r'q"(.).*?\1"', String),
# -- TokenString
(r'q{', String, 'token_string'),
+ # Attributes
+ (r'@([a-zA-Z_]\w*)?', Name.Decorator),
# Tokens
(r'(~=|\^=|%=|\*=|==|!>=|!<=|!<>=|!<>|!<|!>|!=|>>>=|>>>|>>=|>>|>='
r'|<>=|<>|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.\.|\.\.|/=)'
@@ -535,6 +540,8 @@ class DLexer(RegexLexer):
),
# Identifier
(r'[a-zA-Z_]\w*', Name),
+ # Line
+ (r'#line\s.*\n', Comment.Special),
],
'nested_comment': [
(r'[^+/]+', Comment.Multiline),
@@ -1434,8 +1441,8 @@ def objective(baselexer):
# discussion in Issue 789
(r',', Punctuation),
(r'\.\.\.', Punctuation),
- (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this),
- Name.Variable)),
+ (r'(\(.*?\))(\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)',
+ bygroups(using(this), Text, Name.Variable)),
(r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function),
(';', Punctuation, '#pop'),
('{', Punctuation, 'function'),
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index 30a7ddd7..64c47b6e 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -19,8 +19,9 @@ from pygments import unistring as uni
__all__ = ['JavaLexer', 'ScalaLexer', 'GosuLexer', 'GosuTemplateLexer',
- 'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'KotlinLexer',
- 'XtendLexer', 'AspectJLexer', 'CeylonLexer']
+ 'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'ClojureScriptLexer',
+ 'KotlinLexer', 'XtendLexer', 'AspectJLexer', 'CeylonLexer',
+ 'PigLexer']
class JavaLexer(RegexLexer):
@@ -813,6 +814,19 @@ class ClojureLexer(RegexLexer):
}
+class ClojureScriptLexer(ClojureLexer):
+ """
+ Lexer for `ClojureScript <http://clojure.org/clojurescript>`_
+ source code.
+
+ .. versionadded:: 2.0
+ """
+ name = 'ClojureScript'
+ aliases = ['clojurescript', 'cljs']
+ filenames = ['*.cljs']
+ mimetypes = ['text/x-clojurescript', 'application/x-clojurescript']
+
+
class TeaLangLexer(RegexLexer):
"""
For `Tea <http://teatrove.org/>`_ source code. Only used within a
@@ -1066,3 +1080,69 @@ class XtendLexer(RegexLexer):
(r'.', String)
],
}
+
+class PigLexer(RegexLexer):
+ """
+ For `Pig Latin <https://pig.apache.org/>`_ source code.
+
+ .. versionadded:: 2.0
+ """
+
+ name = 'Pig'
+ aliases = ['pig']
+ filenames = ['*.pig']
+ mimetypes = ['text/x-pig']
+
+ flags = re.MULTILINE | re.IGNORECASE
+
+ tokens = {
+ 'root': [
+ (r'\s+', Text),
+ (r'--.*', Comment),
+ (r'/\*[\w\W]*?\*/', Comment.Multiline),
+ (r'\\\n', Text),
+ (r'\\', Text),
+ (r'\'(?:\\[ntbrf\\\']|\\u[0-9a-f]{4}|[^\'\\\n\r])*\'', String),
+ include('keywords'),
+ include('types'),
+ include('builtins'),
+ include('punct'),
+ include('operators'),
+ (r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
+ (r'0x[0-9a-f]+', Number.Hex),
+ (r'[0-9]+L?', Number.Integer),
+ (r'\n', Text),
+ (r'([a-z_][a-z0-9_]*)(\s*)(\()',
+ bygroups(Name.Function, Text, Punctuation)),
+ (r'[()#:]', Text),
+ (r'[^(:#\'\")\s]+', Text),
+ (r'\S+\s+', Text) # TODO: make tests pass without \s+
+ ],
+ 'keywords': [
+ (r'(assert|and|any|all|arrange|as|asc|bag|by|cache|CASE|cat|cd|cp|'
+ r'%declare|%default|define|dense|desc|describe|distinct|du|dump|'
+ r'eval|exex|explain|filter|flatten|foreach|full|generate|group|'
+ r'help|if|illustrate|import|inner|input|into|is|join|kill|left|'
+ r'limit|load|ls|map|matches|mkdir|mv|not|null|onschema|or|order|'
+ r'outer|output|parallel|pig|pwd|quit|register|returns|right|rm|'
+ r'rmf|rollup|run|sample|set|ship|split|stderr|stdin|stdout|store|'
+ r'stream|through|union|using|void)\b', Keyword)
+ ],
+ 'builtins': [
+ (r'(AVG|BinStorage|cogroup|CONCAT|copyFromLocal|copyToLocal|COUNT|'
+ r'cross|DIFF|MAX|MIN|PigDump|PigStorage|SIZE|SUM|TextLoader|'
+ r'TOKENIZE)\b', Name.Builtin)
+ ],
+ 'types': [
+ (r'(bytearray|BIGINTEGER|BIGDECIMAL|chararray|datetime|double|float|'
+ r'int|long|tuple)\b', Keyword.Type)
+ ],
+ 'punct': [
+ (r'[;(){}\[\]]', Punctuation),
+ ],
+ 'operators': [
+ (r'[#=,./%+\-?]', Operator),
+ (r'(eq|gt|lt|gte|lte|neq|matches)\b', Operator),
+ (r'(==|<=|<|>=|>|!=)', Operator),
+ ],
+ }
diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py
index 1bce106c..e7a8948b 100644
--- a/pygments/lexers/math.py
+++ b/pygments/lexers/math.py
@@ -26,7 +26,7 @@ from pygments.lexers import _stan_builtins
__all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer',
'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer',
'RConsoleLexer', 'SLexer', 'JagsLexer', 'BugsLexer', 'StanLexer',
- 'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer']
+ 'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer', 'GAPLexer']
class JuliaLexer(RegexLexer):
@@ -1972,3 +1972,53 @@ class MathematicaLexer(RegexLexer):
(r'\s+', Text.Whitespace),
],
}
+
+class GAPLexer(RegexLexer):
+ """
+ For `GAP <http://www.gap-system.org>`_ source code.
+
+ .. versionadded:: 2.0
+ """
+ name = 'GAP'
+ aliases = ['gap']
+ filenames = ['*.g', '*.gd', '*.gi', '*.gap']
+
+ tokens = {
+ 'root' : [
+ (r'#.*$', Comment.Single),
+ (r'"(?:[^"\\]|\\.)*"', String),
+ (r'\(|\)|\[|\]|\{|\}', Punctuation),
+ (r'''(?x)\b(?:
+ if|then|elif|else|fi|
+ for|while|do|od|
+ repeat|until|
+ break|continue|
+ function|local|return|end|
+ rec|
+ quit|QUIT|
+ IsBound|Unbind|
+ TryNextMethod|
+ Info|Assert
+ )\b''', Keyword),
+ (r'''(?x)\b(?:
+ true|false|fail|infinity
+ )\b''',
+ Name.Constant),
+ (r'''(?x)\b(?:
+ (Declare|Install)([A-Z][A-Za-z]+)|
+ BindGlobal|BIND_GLOBAL
+ )\b''',
+ Name.Builtin),
+ (r'\.|,|:=|;|=|\+|-|\*|/|\^|>|<', Operator),
+ (r'''(?x)\b(?:
+ and|or|not|mod|in
+ )\b''',
+ Operator.Word),
+ (r'''(?x)
+ (?:[a-zA-Z_0-9]+|`[^`]*`)
+ (?:::[a-zA-Z_0-9]+|`[^`]*`)*''', Name.Variable),
+ (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number),
+ (r'\.[0-9]+(?:e[0-9]+)?', Number),
+ (r'.', Text)
+ ]
+ }
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index 1b8ce892..ba777e28 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -1235,9 +1235,9 @@ class ModelicaLexer(RegexLexer):
'root': [
include('whitespace'),
include('keywords'),
+ include('classes'),
include('functions'),
include('operators'),
- include('classes'),
(r'("<html>|<html>)', Name.Tag, 'html-content'),
include('statements'),
],
@@ -1264,9 +1264,9 @@ class ModelicaLexer(RegexLexer):
r'terminate)\b', Name.Builtin),
],
'classes': [
- (r'(block|class|connector|end|function|model|package|'
+ (r'(operator)?(\s+)?(block|class|connector|end|function|model|operator|package|'
r'record|type)(\s+)((?!if|when|while)[A-Za-z_]\w*|[\'][^\']+[\'])([;]?)',
- bygroups(Keyword, Text, Name.Class, Text))
+ bygroups(Keyword, Text, Keyword, Text, Name.Class, Text))
],
'quoted_ident': [
(r'\'', Name, '#pop'),
@@ -2528,11 +2528,11 @@ class AwkLexer(RegexLexer):
'root': [
(r'^(?=\s|/)', Text, 'slashstartsregex'),
include('commentsandwhitespace'),
- (r'\+\+|--|\|\||&&|in|\$|!?~|'
+ (r'\+\+|--|\|\||&&|in\b|\$|!?~|'
r'(\*\*|[-<>+*%\^/!=])=?', Operator, 'slashstartsregex'),
(r'[{(\[;,]', Punctuation, 'slashstartsregex'),
(r'[})\].]', Punctuation),
- (r'(break|continue|do|while|exit|for|if|'
+ (r'(break|continue|do|while|exit|for|if|else|'
r'return)\b', Keyword, 'slashstartsregex'),
(r'function\b', Keyword.Declaration, 'slashstartsregex'),
(r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|'
diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py
index c975ad80..7d3073f1 100644
--- a/pygments/lexers/web.py
+++ b/pygments/lexers/web.py
@@ -28,7 +28,7 @@ __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'JsonLexer', 'CssLexer',
'ObjectiveJLexer', 'CoffeeScriptLexer', 'LiveScriptLexer',
'DuelLexer', 'ScamlLexer', 'JadeLexer', 'XQueryLexer',
'DtdLexer', 'DartLexer', 'LassoLexer', 'QmlLexer', 'TypeScriptLexer',
- 'KalLexer', 'CirruLexer', 'MaskLexer']
+ 'KalLexer', 'CirruLexer', 'MaskLexer', 'ZephirLexer']
class JavascriptLexer(RegexLexer):
@@ -3691,7 +3691,7 @@ class DartLexer(RegexLexer):
r'native|operator|set|static|typedef|var)\b', Keyword.Declaration),
(r'\b(bool|double|Dynamic|int|num|Object|String|void)\b', Keyword.Type),
(r'\b(false|null|true)\b', Keyword.Constant),
- (r'[~!%^&*+=|?:<>/-]|as', Operator),
+ (r'[~!%^&*+=|?:<>/-]|as\b', Operator),
(r'[a-zA-Z_$][a-zA-Z0-9_]*:', Name.Label),
(r'[a-zA-Z_$][a-zA-Z0-9_]*', Name),
(r'[(){}\[\],.;]', Punctuation),
@@ -4336,5 +4336,68 @@ class MaskLexer(RegexLexer):
'string-double-pop2':[
(r'"', String.Single, '#pop:2'),
include('string-base')
+ ],
+ }
+
+
+class ZephirLexer(RegexLexer):
+ """
+ For `Zephir language <http://zephir-lang.com/>`_ source code.
+
+ Zephir is a compiled high level language aimed
+ to the creation of C-extensions for PHP.
+
+ .. versionadded:: 2.0
+ """
+
+ name = 'Zephir'
+ aliases = ['zephir']
+ filenames = ['*.zep']
+
+ zephir_keywords = [ 'fetch', 'echo', 'isset', 'empty']
+ zephir_type = [ 'bit', 'bits' , 'string' ]
+
+ flags = re.DOTALL | re.MULTILINE
+
+ tokens = {
+ 'commentsandwhitespace': [
+ (r'\s+', Text),
+ (r'//.*?\n', Comment.Single),
+ (r'/\*.*?\*/', Comment.Multiline)
+ ],
+ 'slashstartsregex': [
+ include('commentsandwhitespace'),
+ (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
+ r'([gim]+\b|\B)', String.Regex, '#pop'),
+ (r'', Text, '#pop')
+ ],
+ 'badregex': [
+ (r'\n', Text, '#pop')
+ ],
+ 'root': [
+ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
+ include('commentsandwhitespace'),
+ (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
+ r'(<<|>>>?|==?|!=?|->|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'),
+ (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
+ (r'[})\].]', Punctuation),
+ (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|require|inline|'
+ r'throw|try|catch|finally|new|delete|typeof|instanceof|void|namespace|use|extends|'
+ r'this|fetch|isset|unset|echo|fetch|likely|unlikely|empty)\b', Keyword, 'slashstartsregex'),
+ (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
+ (r'(abstract|boolean|bool|char|class|const|double|enum|export|'
+ r'extends|final|float|goto|implements|import|int|string|interface|long|ulong|char|uchar|native|unsigned|'
+ r'private|protected|public|short|static|self|throws|reverse|'
+ r'transient|volatile)\b', Keyword.Reserved),
+ (r'(true|false|null|undefined)\b', Keyword.Constant),
+ (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
+ r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
+ r'window)\b', Name.Builtin),
+ (r'[$a-zA-Z_][a-zA-Z0-9_\\]*', Name.Other),
+ (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
+ (r'0x[0-9a-fA-F]+', Number.Hex),
+ (r'[0-9]+', Number.Integer),
+ (r'"(\\\\|\\"|[^"])*"', String.Double),
+ (r"'(\\\\|\\'|[^'])*'", String.Single),
]
}
diff --git a/pygments/token.py b/pygments/token.py
index f6c3066d..c40ffd33 100644
--- a/pygments/token.py
+++ b/pygments/token.py
@@ -49,6 +49,7 @@ Token = _TokenType()
# Special token types
Text = Token.Text
Whitespace = Text.Whitespace
+Escape = Token.Escape
Error = Token.Error
# Text that doesn't belong to this lexer (e.g. HTML in PHP)
Other = Token.Other
@@ -116,6 +117,7 @@ STANDARD_TYPES = {
Text: '',
Whitespace: 'w',
+ Escape: 'esc',
Error: 'err',
Other: 'x',
diff --git a/tests/examplefiles/core.cljs b/tests/examplefiles/core.cljs
new file mode 100644
index 00000000..f135b832
--- /dev/null
+++ b/tests/examplefiles/core.cljs
@@ -0,0 +1,52 @@
+
+(ns bounder.core
+ (:require [bounder.html :as html]
+ [domina :refer [value set-value! single-node]]
+ [domina.css :refer [sel]]
+ [lowline.functions :refer [debounce]]
+ [enfocus.core :refer [at]]
+ [cljs.reader :as reader]
+ [clojure.string :as s])
+ (:require-macros [enfocus.macros :as em]))
+
+(def filter-input
+ (single-node
+ (sel ".search input")))
+
+(defn project-matches [query project]
+ (let [words (cons (:name project)
+ (map name (:categories project)))
+ to-match (->> words
+ (s/join "")
+ (s/lower-case))]
+ (<= 0 (.indexOf to-match (s/lower-case query)))))
+
+(defn apply-filter-for [projects]
+ (let [query (value filter-input)]
+ (html/render-projects
+ (filter (partial project-matches query)
+ projects))))
+
+(defn filter-category [projects evt]
+ (let [target (.-currentTarget evt)]
+ (set-value! filter-input
+ (.-innerHTML target))
+ (apply-filter-for projects)))
+
+(defn init-listeners [projects]
+ (at js/document
+ ["input"] (em/listen
+ :keyup
+ (debounce
+ (partial apply-filter-for projects)
+ 500))
+ [".category-links li"] (em/listen
+ :click
+ (partial filter-category projects))))
+
+(defn init [projects-edn]
+ (let [projects (reader/read-string projects-edn)]
+ (init-listeners projects)
+ (html/render-projects projects)
+ (html/loaded)))
+
diff --git a/tests/examplefiles/example.gd b/tests/examplefiles/example.gd
new file mode 100644
index 00000000..c285ea32
--- /dev/null
+++ b/tests/examplefiles/example.gd
@@ -0,0 +1,23 @@
+#############################################################################
+##
+#W example.gd
+##
+## This file contains a sample of a GAP declaration file.
+##
+DeclareProperty( "SomeProperty", IsLeftModule );
+DeclareGlobalFunction( "SomeGlobalFunction" );
+
+
+#############################################################################
+##
+#C IsQuuxFrobnicator(<R>)
+##
+## <ManSection>
+## <Filt Name="IsQuuxFrobnicator" Arg='R' Type='Category'/>
+##
+## <Description>
+## Tests whether R is a quux frobnicator.
+## </Description>
+## </ManSection>
+##
+DeclareSynonym( "IsQuuxFrobnicator", IsField and IsGroup );
diff --git a/tests/examplefiles/example.gi b/tests/examplefiles/example.gi
new file mode 100644
index 00000000..c9c5e55d
--- /dev/null
+++ b/tests/examplefiles/example.gi
@@ -0,0 +1,64 @@
+#############################################################################
+##
+#W example.gd
+##
+## This file contains a sample of a GAP implementation file.
+##
+
+
+#############################################################################
+##
+#M SomeOperation( <val> )
+##
+## performs some operation on <val>
+##
+InstallMethod( SomeProperty,
+ "for left modules",
+ [ IsLeftModule ], 0,
+ function( M )
+ if IsFreeLeftModule( M ) and not IsTrivial( M ) then
+ return true;
+ fi;
+ TryNextMethod();
+ end );
+
+
+
+#############################################################################
+##
+#F SomeGlobalFunction( )
+##
+## A global variadic funfion.
+##
+InstallGlobalFunction( SomeGlobalFunction, function( arg )
+ if Length( arg ) = 3 then
+ return arg[1] + arg[2] * arg[3];
+ elif Length( arg ) = 2 then
+ return arg[1] - arg[2]
+ else
+ Error( "usage: SomeGlobalFunction( <x>, <y>[, <z>] )" );
+ fi;
+ end );
+
+
+#
+# A plain function.
+#
+SomeFunc := function(x, y)
+ local z, func, tmp, j;
+ z := x * 1.0;
+ y := 17^17 - y;
+ func := a -> a mod 5;
+ tmp := List( [1..50], func );
+ while y > 0 do
+ for j in tmp do
+ Print(j, "\n");
+ od;
+ repeat
+ y := y - 1;
+ until 0 < 1;
+ y := y -1;
+ od;
+ return z;
+end;
+ \ No newline at end of file
diff --git a/tests/examplefiles/objc_example.m b/tests/examplefiles/objc_example.m
index 67b33022..f4f27170 100644
--- a/tests/examplefiles/objc_example.m
+++ b/tests/examplefiles/objc_example.m
@@ -30,3 +30,6 @@ NSDictionary *d = @{ @"key": @"value" };
NSNumber *n1 = @( 1 );
NSNumber *n2 = @( [a length] );
+
++ (void)f1:(NSString *)s1;
++ (void)f2:(NSString *) s2;
diff --git a/tests/examplefiles/test.pig b/tests/examplefiles/test.pig
new file mode 100644
index 00000000..f67b0268
--- /dev/null
+++ b/tests/examplefiles/test.pig
@@ -0,0 +1,148 @@
+/**
+ * This script is an example recommender (using made up data) showing how you might modify item-item links
+ * by defining similar relations between items in a dataset and customizing the change in weighting.
+ * This example creates metadata by using the genre field as the metadata_field. The items with
+ * the same genre have it's weight cut in half in order to boost the signals of movies that do not have the same genre.
+ * This technique requires a customization of the standard GetItemItemRecommendations macro
+ */
+import 'recommenders.pig';
+
+
+
+%default INPUT_PATH_PURCHASES '../data/retail/purchases.json'
+%default INPUT_PATH_WISHLIST '../data/retail/wishlists.json'
+%default INPUT_PATH_INVENTORY '../data/retail/inventory.json'
+%default OUTPUT_PATH '../data/retail/out/modify_item_item'
+
+
+/******** Custom GetItemItemRecommnedations *********/
+define recsys__GetItemItemRecommendations_ModifyCustom(user_item_signals, metadata) returns item_item_recs {
+
+ -- Convert user_item_signals to an item_item_graph
+ ii_links_raw, item_weights = recsys__BuildItemItemGraph(
+ $user_item_signals,
+ $LOGISTIC_PARAM,
+ $MIN_LINK_WEIGHT,
+ $MAX_LINKS_PER_USER
+ );
+ -- NOTE this function is added in order to combine metadata with item-item links
+ -- See macro for more detailed explination
+ ii_links_metadata = recsys__AddMetadataToItemItemLinks(
+ ii_links_raw,
+ $metadata
+ );
+
+ /********* Custom Code starts here ********/
+
+ --The code here should adjust the weights based on an item-item link and the equality of metadata.
+ -- In this case, if the metadata is the same, the weight is reduced. Otherwise the weight is left alone.
+ ii_links_adjusted = foreach ii_links_metadata generate item_A, item_B,
+ -- the amount of weight adjusted is dependant on the domain of data and what is expected
+ -- It is always best to adjust the weight by multiplying it by a factor rather than addition with a constant
+ (metadata_B == metadata_A ? (weight * 0.5): weight) as weight;
+
+
+ /******** Custom Code stops here *********/
+
+ -- remove negative numbers just incase
+ ii_links_adjusted_filt = foreach ii_links_adjusted generate item_A, item_B,
+ (weight <= 0 ? 0: weight) as weight;
+ -- Adjust the weights of the graph to improve recommendations.
+ ii_links = recsys__AdjustItemItemGraphWeight(
+ ii_links_adjusted_filt,
+ item_weights,
+ $BAYESIAN_PRIOR
+ );
+
+ -- Use the item-item graph to create item-item recommendations.
+ $item_item_recs = recsys__BuildItemItemRecommendationsFromGraph(
+ ii_links,
+ $NUM_RECS_PER_ITEM,
+ $NUM_RECS_PER_ITEM
+ );
+};
+
+
+/******* Load Data **********/
+
+--Get purchase signals
+purchase_input = load '$INPUT_PATH_PURCHASES' using org.apache.pig.piggybank.storage.JsonLoader(
+ 'row_id: int,
+ movie_id: chararray,
+ movie_name: chararray,
+ user_id: chararray,
+ purchase_price: int');
+
+--Get wishlist signals
+wishlist_input = load '$INPUT_PATH_WISHLIST' using org.apache.pig.piggybank.storage.JsonLoader(
+ 'row_id: int,
+ movie_id: chararray,
+ movie_name: chararray,
+ user_id: chararray');
+
+
+/******* Convert Data to Signals **********/
+
+-- Start with choosing 1 as max weight for a signal.
+purchase_signals = foreach purchase_input generate
+ user_id as user,
+ movie_name as item,
+ 1.0 as weight;
+
+
+-- Start with choosing 0.5 as weight for wishlist items because that is a weaker signal than
+-- purchasing an item.
+wishlist_signals = foreach wishlist_input generate
+ user_id as user,
+ movie_name as item,
+ 0.5 as weight;
+
+user_signals = union purchase_signals, wishlist_signals;
+
+
+/******** Changes for Modifying item-item links ******/
+inventory_input = load '$INPUT_PATH_INVENTORY' using org.apache.pig.piggybank.storage.JsonLoader(
+ 'movie_title: chararray,
+ genres: bag{tuple(content:chararray)}');
+
+
+metadata = foreach inventory_input generate
+ FLATTEN(genres) as metadata_field,
+ movie_title as item;
+-- requires the macro to be written seperately
+ --NOTE this macro is defined within this file for clarity
+item_item_recs = recsys__GetItemItemRecommendations_ModifyCustom(user_signals, metadata);
+/******* No more changes ********/
+
+
+user_item_recs = recsys__GetUserItemRecommendations(user_signals, item_item_recs);
+
+--Completely unrelated code stuck in the middle
+data = LOAD 's3n://my-s3-bucket/path/to/responses'
+ USING org.apache.pig.piggybank.storage.JsonLoader();
+responses = FOREACH data GENERATE object#'response' AS response: map[];
+out = FOREACH responses
+ GENERATE response#'id' AS id: int, response#'thread' AS thread: chararray,
+ response#'comments' AS comments: {t: (comment: chararray)};
+STORE out INTO 's3n://path/to/output' USING PigStorage('|');
+
+
+/******* Store recommendations **********/
+
+-- If your output folder exists already, hadoop will refuse to write data to it.
+
+rmf $OUTPUT_PATH/item_item_recs;
+rmf $OUTPUT_PATH/user_item_recs;
+
+store item_item_recs into '$OUTPUT_PATH/item_item_recs' using PigStorage();
+store user_item_recs into '$OUTPUT_PATH/user_item_recs' using PigStorage();
+
+-- STORE the item_item_recs into dynamo
+STORE item_item_recs
+ INTO '$OUTPUT_PATH/unused-ii-table-data'
+USING com.mortardata.pig.storage.DynamoDBStorage('$II_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');
+
+-- STORE the user_item_recs into dynamo
+STORE user_item_recs
+ INTO '$OUTPUT_PATH/unused-ui-table-data'
+USING com.mortardata.pig.storage.DynamoDBStorage('$UI_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY');
diff --git a/tests/examplefiles/test.zep b/tests/examplefiles/test.zep
new file mode 100644
index 00000000..4724d4c4
--- /dev/null
+++ b/tests/examplefiles/test.zep
@@ -0,0 +1,33 @@
+namespace Test;
+
+use Test\Foo;
+
+class Bar
+{
+ protected a;
+ private b;
+ public c {set, get};
+
+ public function __construct(string str, boolean bool)
+ {
+ let this->c = str;
+ this->setC(bool);
+ let this->b = [];
+ }
+
+ public function sayHello(string name)
+ {
+ echo "Hello " . name;
+ }
+
+ protected function loops()
+ {
+ for a in b {
+ echo a;
+ }
+ loop {
+ return "boo!";
+ }
+ }
+
+} \ No newline at end of file