diff options
author | Tim Hatch <tim@timhatch.com> | 2014-04-14 13:47:40 -0400 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2014-04-14 13:47:40 -0400 |
commit | 06a720cca67ff19f873f8066c17cf4ea90ab0f0f (patch) | |
tree | 2901fe8e218cce5a8e788645d41aec654f297e23 | |
parent | 02683b5def213065f6b893f91fc54f313141fbdf (diff) | |
parent | 5d57fe78405ac06a306f5ed2dd1b630a909cbdfb (diff) | |
download | pygments-06a720cca67ff19f873f8066c17cf4ea90ab0f0f.tar.gz |
Merged in yloiseau/pygments-main (pull request #309)
-rw-r--r-- | AUTHORS | 2 | ||||
-rw-r--r-- | doc/languages.rst | 2 | ||||
-rw-r--r-- | pygments/cmdline.py | 10 | ||||
-rw-r--r-- | pygments/formatters/latex.py | 92 | ||||
-rw-r--r-- | pygments/formatters/rtf.py | 11 | ||||
-rw-r--r-- | pygments/lexers/_mapping.py | 4 | ||||
-rw-r--r-- | pygments/lexers/compiled.py | 21 | ||||
-rw-r--r-- | pygments/lexers/jvm.py | 84 | ||||
-rw-r--r-- | pygments/lexers/math.py | 52 | ||||
-rw-r--r-- | pygments/lexers/other.py | 10 | ||||
-rw-r--r-- | pygments/lexers/web.py | 67 | ||||
-rw-r--r-- | pygments/token.py | 2 | ||||
-rw-r--r-- | tests/examplefiles/core.cljs | 52 | ||||
-rw-r--r-- | tests/examplefiles/example.gd | 23 | ||||
-rw-r--r-- | tests/examplefiles/example.gi | 64 | ||||
-rw-r--r-- | tests/examplefiles/objc_example.m | 3 | ||||
-rw-r--r-- | tests/examplefiles/test.pig | 148 | ||||
-rw-r--r-- | tests/examplefiles/test.zep | 33 |
18 files changed, 659 insertions, 21 deletions
@@ -65,6 +65,7 @@ Other contributors, listed alphabetically, are: * Rob Hoelz -- Perl 6 lexer * Doug Hogan -- Mscgen lexer * Ben Hollis -- Mason lexer +* Max Horn -- GAP lexer * Dustin Howett -- Logos lexer * Alastair Houghton -- Lexer inheritance facility * Tim Howard -- BlitzMax lexer @@ -98,6 +99,7 @@ Other contributors, listed alphabetically, are: * Brian McKenna -- F# lexer * Charles McLaughlin -- Puppet lexer * Lukas Meuser -- BBCode formatter, Lua lexer +* Cat Miller -- Pig lexer * Paul Miller -- LiveScript lexer * Hong Minhee -- HTTP lexer * Michael Mior -- Awk lexer diff --git a/doc/languages.rst b/doc/languages.rst index 0fc9d9d3..426a576b 100644 --- a/doc/languages.rst +++ b/doc/languages.rst @@ -35,6 +35,7 @@ Programming languages * Fancy * Fortran * F# +* GAP * Gherkin (Cucumber) * GL shaders * Groovy @@ -81,6 +82,7 @@ Programming languages * Visual Basic.NET * Visual FoxPro * XQuery +* Zephir </ul> Template languages diff --git a/pygments/cmdline.py b/pygments/cmdline.py index af8d48ea..7c23ebee 100644 --- a/pygments/cmdline.py +++ b/pygments/cmdline.py @@ -19,6 +19,7 @@ from pygments import __version__, highlight from pygments.util import ClassNotFound, OptionError, docstring_headline from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \ find_lexer_class, guess_lexer, TextLexer +from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter from pygments.formatters import get_all_formatters, get_formatter_by_name, \ get_formatter_for_filename, find_formatter_class, \ TerminalFormatter # pylint:disable-msg=E0611 @@ -405,6 +406,15 @@ def main(args=sys.argv): else: code = sys.stdin.read() + # When using the LaTeX formatter and the option `escapeinside` is + # specified, we need a special lexer which collects escaped text + # before running the chosen language lexer. + escapeinside = parsed_opts.get('escapeinside', '') + if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): + left = escapeinside[0] + right = escapeinside[1] + lexer = LatexEmbeddedLexer(left, right, lexer) + # No encoding given? Use latin1 if output file given, # stdin/stdout encoding otherwise. # (This is a compromise, I'm not too happy with it...) diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index 0f2397eb..fee177c5 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -12,6 +12,7 @@ from __future__ import division from pygments.formatter import Formatter +from pygments.lexer import Lexer from pygments.token import Token, STANDARD_TYPES from pygments.util import get_bool_opt, get_int_opt, StringIO, xrange, \ iteritems @@ -226,6 +227,15 @@ class LatexFormatter(Formatter): ``False``). .. versionadded:: 1.2 + + `escapeinside` + If set to a string of length 2, enables escaping to LaTeX. Text + delimited by these 2 characters is read as LaTeX code and + typeset accordingly. It has no effect in string literals. It has + no effect in comments if `texcomments` or `mathescape` is + set. (default: ``''``). + + .. versionadded:: 2.0 """ name = 'LaTeX' aliases = ['latex', 'tex'] @@ -243,6 +253,13 @@ class LatexFormatter(Formatter): self.commandprefix = options.get('commandprefix', 'PY') self.texcomments = get_bool_opt(options, 'texcomments', False) self.mathescape = get_bool_opt(options, 'mathescape', False) + self.escapeinside = options.get('escapeinside', '') + + if len(self.escapeinside) == 2: + self.left = self.escapeinside[0] + self.right = self.escapeinside[1] + else: + self.escapeinside = '' self._create_stylesheet() @@ -314,14 +331,14 @@ class LatexFormatter(Formatter): realoutfile = outfile outfile = StringIO() - outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}') + outfile.write(u'\\begin{Verbatim}[commandchars=\\\\\\{\\}') if self.linenos: start, step = self.linenostart, self.linenostep outfile.write(u',numbers=left' + (start and u',firstnumber=%d' % start or u'') + (step and u',stepnumber=%d' % step or u'')) - if self.mathescape or self.texcomments: - outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') + if self.mathescape or self.texcomments or self.escapeinside: + outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}') if self.verboptions: outfile.write(u',' + self.verboptions) outfile.write(u']\n') @@ -350,9 +367,22 @@ class LatexFormatter(Formatter): parts[i] = escape_tex(part, self.commandprefix) in_math = not in_math value = '$'.join(parts) + elif self.escapeinside: + text = value + value = '' + while len(text) > 0: + a,sep1,text = text.partition(self.left) + if len(sep1) > 0: + b,sep2,text = text.partition(self.right) + if len(sep2) > 0: + value = value + escape_tex(a, self.commandprefix) + b + else: + value = value + escape_tex(a + sep1 + b, self.commandprefix) + else: + value = value + escape_tex(a, self.commandprefix) else: value = escape_tex(value, self.commandprefix) - else: + elif not (ttype in Token.Escape): value = escape_tex(value, self.commandprefix) styles = [] while ttype is not Token: @@ -384,3 +414,57 @@ class LatexFormatter(Formatter): encoding = self.encoding or 'latin1', styledefs = self.get_style_defs(), code = outfile.getvalue())) + + +class LatexEmbeddedLexer(Lexer): + r""" + + This lexer takes one lexer as argument, the lexer for the language + being formatted, and the left and right delimiters for escaped text. + + First everything is scanned using the language lexer to obtain + strings and comments. All other consecutive tokens are merged and + the resulting text is scanned for escaped segments, which are given + the Token.Escape type. Finally text that is not escaped is scanned + again with the language lexer. + """ + def __init__(self, left, right, lang, **options): + self.left = left + self.right = right + self.lang = lang + Lexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + buf = '' + for i, t, v in self.lang.get_tokens_unprocessed(text): + if t in Token.Comment or t in Token.String: + if buf: + for x in self.get_tokens_aux(idx, buf): + yield x + buf = '' + yield i, t, v + else: + if not buf: + idx = i; + buf += v + if buf: + for x in self.get_tokens_aux(idx, buf): + yield x + + def get_tokens_aux(self, index, text): + while text: + a, sep1, text = text.partition(self.left) + if a: + for i, t, v in self.lang.get_tokens_unprocessed(a): + yield index + i, t, v + index += len(a) + if sep1: + b, sep2, text = text.partition(self.right) + if sep2: + yield index + len(sep1), Token.Escape, b + index += len(sep1) + len(b) + len(sep2) + else: + yield index, Token.Error, sep1 + index += len(sep1) + text = b + diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py index 59d97742..9d87e8f1 100644 --- a/pygments/formatters/rtf.py +++ b/pygments/formatters/rtf.py @@ -10,6 +10,7 @@ """ from pygments.formatter import Formatter +from pygments.util import get_int_opt __all__ = ['RtfFormatter'] @@ -32,6 +33,12 @@ class RtfFormatter(Formatter): `fontface` The used font famliy, for example ``Bitstream Vera Sans``. Defaults to some generic font which is supposed to have fixed width. + + `fontsize` + Size of the font used. Size is specified in half points. The + default is 24 half-points, giving a size 12 font. + + .. versionadded:: 2.0 """ name = 'RTF' aliases = ['rtf'] @@ -49,9 +56,11 @@ class RtfFormatter(Formatter): specification claims that ``\fmodern`` are "Fixed-pitch serif and sans serif fonts". Hope every RTF implementation thinks the same about modern... + """ Formatter.__init__(self, **options) self.fontface = options.get('fontface') or '' + self.fontsize = get_int_opt(options, 'fontsize', 0) def _escape(self, text): return text.replace('\\', '\\\\') \ @@ -106,6 +115,8 @@ class RtfFormatter(Formatter): )) offset += 1 outfile.write(r'}\f0') + if self.fontsize: + outfile.write(r'\fs%d' % (self.fontsize)) # highlight stream for ttype, value in tokensource: diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index b90e0d8f..c937d908 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -67,6 +67,7 @@ LEXERS = { 'CirruLexer': ('pygments.lexers.web', 'Cirru', ('cirru',), ('*.cirru', '*.cr'), ('text/x-cirru',)), 'ClayLexer': ('pygments.lexers.compiled', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)), 'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')), + 'ClojureScriptLexer': ('pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')), 'CobolFreeformatLexer': ('pygments.lexers.compiled', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()), 'CobolLexer': ('pygments.lexers.compiled', 'COBOL', ('cobol',), ('*.cob', '*.COB', '*.cpy', '*.CPY'), ('text/x-cobol',)), 'CoffeeScriptLexer': ('pygments.lexers.web', 'CoffeeScript', ('coffee-script', 'coffeescript', 'coffee'), ('*.coffee',), ('text/coffeescript',)), @@ -119,6 +120,7 @@ LEXERS = { 'FelixLexer': ('pygments.lexers.compiled', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)), 'FortranLexer': ('pygments.lexers.compiled', 'Fortran', ('fortran',), ('*.f', '*.f90', '*.F', '*.F90'), ('text/x-fortran',)), 'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()), + 'GAPLexer': ('pygments.lexers.math', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()), 'GLShaderLexer': ('pygments.lexers.compiled', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)), 'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)), 'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')), @@ -239,6 +241,7 @@ LEXERS = { 'Perl6Lexer': ('pygments.lexers.agile', 'Perl6', ('perl6', 'pl6'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t'), ('text/x-perl6', 'application/x-perl6')), 'PerlLexer': ('pygments.lexers.agile', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm', '*.t'), ('text/x-perl', 'application/x-perl')), 'PhpLexer': ('pygments.lexers.web', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]', '*.inc'), ('text/x-php',)), + 'PigLexer': ('pygments.lexers.jvm', 'Pig', ('pig',), ('*.pig',), ('text/x-pig',)), 'PikeLexer': ('pygments.lexers.compiled', 'Pike', ('pike',), ('*.pike', '*.pmod'), ('text/x-pike',)), 'PlPgsqlLexer': ('pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)), 'PostScriptLexer': ('pygments.lexers.other', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)), @@ -329,6 +332,7 @@ LEXERS = { 'XsltLexer': ('pygments.lexers.web', 'XSLT', ('xslt',), ('*.xsl', '*.xslt', '*.xpl'), ('application/xsl+xml', 'application/xslt+xml')), 'XtendLexer': ('pygments.lexers.jvm', 'Xtend', ('xtend',), ('*.xtend',), ('text/x-xtend',)), 'YamlLexer': ('pygments.lexers.text', 'YAML', ('yaml',), ('*.yaml', '*.yml'), ('text/x-yaml',)), + 'ZephirLexer': ('pygments.lexers.web', 'Zephir', ('zephir',), ('*.zep',), ()), } if __name__ == '__main__': diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py index db69fdc6..e6e10098 100644 --- a/pygments/lexers/compiled.py +++ b/pygments/lexers/compiled.py @@ -469,20 +469,23 @@ class DLexer(RegexLexer): (r'(abstract|alias|align|asm|assert|auto|body|break|case|cast' r'|catch|class|const|continue|debug|default|delegate|delete' r'|deprecated|do|else|enum|export|extern|finally|final' - r'|foreach_reverse|foreach|for|function|goto|if|import|inout' - r'|interface|invariant|in|is|lazy|mixin|module|new|nothrow|out' + r'|foreach_reverse|foreach|for|function|goto|if|immutable|import' + r'|interface|invariant|inout|in|is|lazy|mixin|module|new|nothrow|out' r'|override|package|pragma|private|protected|public|pure|ref|return' - r'|scope|static|struct|super|switch|synchronized|template|this' + r'|scope|shared|static|struct|super|switch|synchronized|template|this' r'|throw|try|typedef|typeid|typeof|union|unittest|version|volatile' - r'|while|with|__traits)\b', Keyword + r'|while|with|__gshared|__traits|__vector|__parameters)\b', Keyword ), (r'(bool|byte|cdouble|cent|cfloat|char|creal|dchar|double|float' r'|idouble|ifloat|int|ireal|long|real|short|ubyte|ucent|uint|ulong' r'|ushort|void|wchar)\b', Keyword.Type ), (r'(false|true|null)\b', Keyword.Constant), + (r'(__FILE__|__MODULE__|__LINE__|__FUNCTION__|__PRETTY_FUNCTION__' + r'|__DATE__|__EOF__|__TIME__|__TIMESTAMP__|__VENDOR__|__VERSION__)\b', + Keyword.Pseudo), (r'macro\b', Keyword.Reserved), - (r'(string|wstring|dstring)\b', Name.Builtin), + (r'(string|wstring|dstring|size_t|ptrdiff_t)\b', Name.Builtin), # FloatLiteral # -- HexFloat (r'0[xX]([0-9a-fA-F_]*\.[0-9a-fA-F_]+|[0-9a-fA-F_]+)' @@ -528,6 +531,8 @@ class DLexer(RegexLexer): (r'q"(.).*?\1"', String), # -- TokenString (r'q{', String, 'token_string'), + # Attributes + (r'@([a-zA-Z_]\w*)?', Name.Decorator), # Tokens (r'(~=|\^=|%=|\*=|==|!>=|!<=|!<>=|!<>|!<|!>|!=|>>>=|>>>|>>=|>>|>=' r'|<>=|<>|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.\.|\.\.|/=)' @@ -535,6 +540,8 @@ class DLexer(RegexLexer): ), # Identifier (r'[a-zA-Z_]\w*', Name), + # Line + (r'#line\s.*\n', Comment.Special), ], 'nested_comment': [ (r'[^+/]+', Comment.Multiline), @@ -1434,8 +1441,8 @@ def objective(baselexer): # discussion in Issue 789 (r',', Punctuation), (r'\.\.\.', Punctuation), - (r'(\(.*?\))([a-zA-Z$_][a-zA-Z0-9$_]*)', bygroups(using(this), - Name.Variable)), + (r'(\(.*?\))(\s*)([a-zA-Z$_][a-zA-Z0-9$_]*)', + bygroups(using(this), Text, Name.Variable)), (r'[a-zA-Z$_][a-zA-Z0-9$_]*:', Name.Function), (';', Punctuation, '#pop'), ('{', Punctuation, 'function'), diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 30a7ddd7..64c47b6e 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -19,8 +19,9 @@ from pygments import unistring as uni __all__ = ['JavaLexer', 'ScalaLexer', 'GosuLexer', 'GosuTemplateLexer', - 'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'KotlinLexer', - 'XtendLexer', 'AspectJLexer', 'CeylonLexer'] + 'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'ClojureScriptLexer', + 'KotlinLexer', 'XtendLexer', 'AspectJLexer', 'CeylonLexer', + 'PigLexer'] class JavaLexer(RegexLexer): @@ -813,6 +814,19 @@ class ClojureLexer(RegexLexer): } +class ClojureScriptLexer(ClojureLexer): + """ + Lexer for `ClojureScript <http://clojure.org/clojurescript>`_ + source code. + + .. versionadded:: 2.0 + """ + name = 'ClojureScript' + aliases = ['clojurescript', 'cljs'] + filenames = ['*.cljs'] + mimetypes = ['text/x-clojurescript', 'application/x-clojurescript'] + + class TeaLangLexer(RegexLexer): """ For `Tea <http://teatrove.org/>`_ source code. Only used within a @@ -1066,3 +1080,69 @@ class XtendLexer(RegexLexer): (r'.', String) ], } + +class PigLexer(RegexLexer): + """ + For `Pig Latin <https://pig.apache.org/>`_ source code. + + .. versionadded:: 2.0 + """ + + name = 'Pig' + aliases = ['pig'] + filenames = ['*.pig'] + mimetypes = ['text/x-pig'] + + flags = re.MULTILINE | re.IGNORECASE + + tokens = { + 'root': [ + (r'\s+', Text), + (r'--.*', Comment), + (r'/\*[\w\W]*?\*/', Comment.Multiline), + (r'\\\n', Text), + (r'\\', Text), + (r'\'(?:\\[ntbrf\\\']|\\u[0-9a-f]{4}|[^\'\\\n\r])*\'', String), + include('keywords'), + include('types'), + include('builtins'), + include('punct'), + include('operators'), + (r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-f]+', Number.Hex), + (r'[0-9]+L?', Number.Integer), + (r'\n', Text), + (r'([a-z_][a-z0-9_]*)(\s*)(\()', + bygroups(Name.Function, Text, Punctuation)), + (r'[()#:]', Text), + (r'[^(:#\'\")\s]+', Text), + (r'\S+\s+', Text) # TODO: make tests pass without \s+ + ], + 'keywords': [ + (r'(assert|and|any|all|arrange|as|asc|bag|by|cache|CASE|cat|cd|cp|' + r'%declare|%default|define|dense|desc|describe|distinct|du|dump|' + r'eval|exex|explain|filter|flatten|foreach|full|generate|group|' + r'help|if|illustrate|import|inner|input|into|is|join|kill|left|' + r'limit|load|ls|map|matches|mkdir|mv|not|null|onschema|or|order|' + r'outer|output|parallel|pig|pwd|quit|register|returns|right|rm|' + r'rmf|rollup|run|sample|set|ship|split|stderr|stdin|stdout|store|' + r'stream|through|union|using|void)\b', Keyword) + ], + 'builtins': [ + (r'(AVG|BinStorage|cogroup|CONCAT|copyFromLocal|copyToLocal|COUNT|' + r'cross|DIFF|MAX|MIN|PigDump|PigStorage|SIZE|SUM|TextLoader|' + r'TOKENIZE)\b', Name.Builtin) + ], + 'types': [ + (r'(bytearray|BIGINTEGER|BIGDECIMAL|chararray|datetime|double|float|' + r'int|long|tuple)\b', Keyword.Type) + ], + 'punct': [ + (r'[;(){}\[\]]', Punctuation), + ], + 'operators': [ + (r'[#=,./%+\-?]', Operator), + (r'(eq|gt|lt|gte|lte|neq|matches)\b', Operator), + (r'(==|<=|<|>=|>|!=)', Operator), + ], + } diff --git a/pygments/lexers/math.py b/pygments/lexers/math.py index 1bce106c..e7a8948b 100644 --- a/pygments/lexers/math.py +++ b/pygments/lexers/math.py @@ -26,7 +26,7 @@ from pygments.lexers import _stan_builtins __all__ = ['JuliaLexer', 'JuliaConsoleLexer', 'MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer', 'NumPyLexer', 'RConsoleLexer', 'SLexer', 'JagsLexer', 'BugsLexer', 'StanLexer', - 'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer'] + 'IDLLexer', 'RdLexer', 'IgorLexer', 'MathematicaLexer', 'GAPLexer'] class JuliaLexer(RegexLexer): @@ -1972,3 +1972,53 @@ class MathematicaLexer(RegexLexer): (r'\s+', Text.Whitespace), ], } + +class GAPLexer(RegexLexer): + """ + For `GAP <http://www.gap-system.org>`_ source code. + + .. versionadded:: 2.0 + """ + name = 'GAP' + aliases = ['gap'] + filenames = ['*.g', '*.gd', '*.gi', '*.gap'] + + tokens = { + 'root' : [ + (r'#.*$', Comment.Single), + (r'"(?:[^"\\]|\\.)*"', String), + (r'\(|\)|\[|\]|\{|\}', Punctuation), + (r'''(?x)\b(?: + if|then|elif|else|fi| + for|while|do|od| + repeat|until| + break|continue| + function|local|return|end| + rec| + quit|QUIT| + IsBound|Unbind| + TryNextMethod| + Info|Assert + )\b''', Keyword), + (r'''(?x)\b(?: + true|false|fail|infinity + )\b''', + Name.Constant), + (r'''(?x)\b(?: + (Declare|Install)([A-Z][A-Za-z]+)| + BindGlobal|BIND_GLOBAL + )\b''', + Name.Builtin), + (r'\.|,|:=|;|=|\+|-|\*|/|\^|>|<', Operator), + (r'''(?x)\b(?: + and|or|not|mod|in + )\b''', + Operator.Word), + (r'''(?x) + (?:[a-zA-Z_0-9]+|`[^`]*`) + (?:::[a-zA-Z_0-9]+|`[^`]*`)*''', Name.Variable), + (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number), + (r'\.[0-9]+(?:e[0-9]+)?', Number), + (r'.', Text) + ] + } diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py index 1b8ce892..ba777e28 100644 --- a/pygments/lexers/other.py +++ b/pygments/lexers/other.py @@ -1235,9 +1235,9 @@ class ModelicaLexer(RegexLexer): 'root': [ include('whitespace'), include('keywords'), + include('classes'), include('functions'), include('operators'), - include('classes'), (r'("<html>|<html>)', Name.Tag, 'html-content'), include('statements'), ], @@ -1264,9 +1264,9 @@ class ModelicaLexer(RegexLexer): r'terminate)\b', Name.Builtin), ], 'classes': [ - (r'(block|class|connector|end|function|model|package|' + (r'(operator)?(\s+)?(block|class|connector|end|function|model|operator|package|' r'record|type)(\s+)((?!if|when|while)[A-Za-z_]\w*|[\'][^\']+[\'])([;]?)', - bygroups(Keyword, Text, Name.Class, Text)) + bygroups(Keyword, Text, Keyword, Text, Name.Class, Text)) ], 'quoted_ident': [ (r'\'', Name, '#pop'), @@ -2528,11 +2528,11 @@ class AwkLexer(RegexLexer): 'root': [ (r'^(?=\s|/)', Text, 'slashstartsregex'), include('commentsandwhitespace'), - (r'\+\+|--|\|\||&&|in|\$|!?~|' + (r'\+\+|--|\|\||&&|in\b|\$|!?~|' r'(\*\*|[-<>+*%\^/!=])=?', Operator, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), - (r'(break|continue|do|while|exit|for|if|' + (r'(break|continue|do|while|exit|for|if|else|' r'return)\b', Keyword, 'slashstartsregex'), (r'function\b', Keyword.Declaration, 'slashstartsregex'), (r'(atan2|cos|exp|int|log|rand|sin|sqrt|srand|gensub|gsub|index|' diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py index c975ad80..7d3073f1 100644 --- a/pygments/lexers/web.py +++ b/pygments/lexers/web.py @@ -28,7 +28,7 @@ __all__ = ['HtmlLexer', 'XmlLexer', 'JavascriptLexer', 'JsonLexer', 'CssLexer', 'ObjectiveJLexer', 'CoffeeScriptLexer', 'LiveScriptLexer', 'DuelLexer', 'ScamlLexer', 'JadeLexer', 'XQueryLexer', 'DtdLexer', 'DartLexer', 'LassoLexer', 'QmlLexer', 'TypeScriptLexer', - 'KalLexer', 'CirruLexer', 'MaskLexer'] + 'KalLexer', 'CirruLexer', 'MaskLexer', 'ZephirLexer'] class JavascriptLexer(RegexLexer): @@ -3691,7 +3691,7 @@ class DartLexer(RegexLexer): r'native|operator|set|static|typedef|var)\b', Keyword.Declaration), (r'\b(bool|double|Dynamic|int|num|Object|String|void)\b', Keyword.Type), (r'\b(false|null|true)\b', Keyword.Constant), - (r'[~!%^&*+=|?:<>/-]|as', Operator), + (r'[~!%^&*+=|?:<>/-]|as\b', Operator), (r'[a-zA-Z_$][a-zA-Z0-9_]*:', Name.Label), (r'[a-zA-Z_$][a-zA-Z0-9_]*', Name), (r'[(){}\[\],.;]', Punctuation), @@ -4336,5 +4336,68 @@ class MaskLexer(RegexLexer): 'string-double-pop2':[ (r'"', String.Single, '#pop:2'), include('string-base') + ], + } + + +class ZephirLexer(RegexLexer): + """ + For `Zephir language <http://zephir-lang.com/>`_ source code. + + Zephir is a compiled high level language aimed + to the creation of C-extensions for PHP. + + .. versionadded:: 2.0 + """ + + name = 'Zephir' + aliases = ['zephir'] + filenames = ['*.zep'] + + zephir_keywords = [ 'fetch', 'echo', 'isset', 'empty'] + zephir_type = [ 'bit', 'bits' , 'string' ] + + flags = re.DOTALL | re.MULTILINE + + tokens = { + 'commentsandwhitespace': [ + (r'\s+', Text), + (r'//.*?\n', Comment.Single), + (r'/\*.*?\*/', Comment.Multiline) + ], + 'slashstartsregex': [ + include('commentsandwhitespace'), + (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' + r'([gim]+\b|\B)', String.Regex, '#pop'), + (r'', Text, '#pop') + ], + 'badregex': [ + (r'\n', Text, '#pop') + ], + 'root': [ + (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), + include('commentsandwhitespace'), + (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' + r'(<<|>>>?|==?|!=?|->|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'), + (r'[{(\[;,]', Punctuation, 'slashstartsregex'), + (r'[})\].]', Punctuation), + (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|require|inline|' + r'throw|try|catch|finally|new|delete|typeof|instanceof|void|namespace|use|extends|' + r'this|fetch|isset|unset|echo|fetch|likely|unlikely|empty)\b', Keyword, 'slashstartsregex'), + (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'), + (r'(abstract|boolean|bool|char|class|const|double|enum|export|' + r'extends|final|float|goto|implements|import|int|string|interface|long|ulong|char|uchar|native|unsigned|' + r'private|protected|public|short|static|self|throws|reverse|' + r'transient|volatile)\b', Keyword.Reserved), + (r'(true|false|null|undefined)\b', Keyword.Constant), + (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|' + r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|' + r'window)\b', Name.Builtin), + (r'[$a-zA-Z_][a-zA-Z0-9_\\]*', Name.Other), + (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), + (r'0x[0-9a-fA-F]+', Number.Hex), + (r'[0-9]+', Number.Integer), + (r'"(\\\\|\\"|[^"])*"', String.Double), + (r"'(\\\\|\\'|[^'])*'", String.Single), ] } diff --git a/pygments/token.py b/pygments/token.py index f6c3066d..c40ffd33 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -49,6 +49,7 @@ Token = _TokenType() # Special token types Text = Token.Text Whitespace = Text.Whitespace +Escape = Token.Escape Error = Token.Error # Text that doesn't belong to this lexer (e.g. HTML in PHP) Other = Token.Other @@ -116,6 +117,7 @@ STANDARD_TYPES = { Text: '', Whitespace: 'w', + Escape: 'esc', Error: 'err', Other: 'x', diff --git a/tests/examplefiles/core.cljs b/tests/examplefiles/core.cljs new file mode 100644 index 00000000..f135b832 --- /dev/null +++ b/tests/examplefiles/core.cljs @@ -0,0 +1,52 @@ + +(ns bounder.core + (:require [bounder.html :as html] + [domina :refer [value set-value! single-node]] + [domina.css :refer [sel]] + [lowline.functions :refer [debounce]] + [enfocus.core :refer [at]] + [cljs.reader :as reader] + [clojure.string :as s]) + (:require-macros [enfocus.macros :as em])) + +(def filter-input + (single-node + (sel ".search input"))) + +(defn project-matches [query project] + (let [words (cons (:name project) + (map name (:categories project))) + to-match (->> words + (s/join "") + (s/lower-case))] + (<= 0 (.indexOf to-match (s/lower-case query))))) + +(defn apply-filter-for [projects] + (let [query (value filter-input)] + (html/render-projects + (filter (partial project-matches query) + projects)))) + +(defn filter-category [projects evt] + (let [target (.-currentTarget evt)] + (set-value! filter-input + (.-innerHTML target)) + (apply-filter-for projects))) + +(defn init-listeners [projects] + (at js/document + ["input"] (em/listen + :keyup + (debounce + (partial apply-filter-for projects) + 500)) + [".category-links li"] (em/listen + :click + (partial filter-category projects)))) + +(defn init [projects-edn] + (let [projects (reader/read-string projects-edn)] + (init-listeners projects) + (html/render-projects projects) + (html/loaded))) + diff --git a/tests/examplefiles/example.gd b/tests/examplefiles/example.gd new file mode 100644 index 00000000..c285ea32 --- /dev/null +++ b/tests/examplefiles/example.gd @@ -0,0 +1,23 @@ +############################################################################# +## +#W example.gd +## +## This file contains a sample of a GAP declaration file. +## +DeclareProperty( "SomeProperty", IsLeftModule ); +DeclareGlobalFunction( "SomeGlobalFunction" ); + + +############################################################################# +## +#C IsQuuxFrobnicator(<R>) +## +## <ManSection> +## <Filt Name="IsQuuxFrobnicator" Arg='R' Type='Category'/> +## +## <Description> +## Tests whether R is a quux frobnicator. +## </Description> +## </ManSection> +## +DeclareSynonym( "IsQuuxFrobnicator", IsField and IsGroup ); diff --git a/tests/examplefiles/example.gi b/tests/examplefiles/example.gi new file mode 100644 index 00000000..c9c5e55d --- /dev/null +++ b/tests/examplefiles/example.gi @@ -0,0 +1,64 @@ +############################################################################# +## +#W example.gd +## +## This file contains a sample of a GAP implementation file. +## + + +############################################################################# +## +#M SomeOperation( <val> ) +## +## performs some operation on <val> +## +InstallMethod( SomeProperty, + "for left modules", + [ IsLeftModule ], 0, + function( M ) + if IsFreeLeftModule( M ) and not IsTrivial( M ) then + return true; + fi; + TryNextMethod(); + end ); + + + +############################################################################# +## +#F SomeGlobalFunction( ) +## +## A global variadic funfion. +## +InstallGlobalFunction( SomeGlobalFunction, function( arg ) + if Length( arg ) = 3 then + return arg[1] + arg[2] * arg[3]; + elif Length( arg ) = 2 then + return arg[1] - arg[2] + else + Error( "usage: SomeGlobalFunction( <x>, <y>[, <z>] )" ); + fi; + end ); + + +# +# A plain function. +# +SomeFunc := function(x, y) + local z, func, tmp, j; + z := x * 1.0; + y := 17^17 - y; + func := a -> a mod 5; + tmp := List( [1..50], func ); + while y > 0 do + for j in tmp do + Print(j, "\n"); + od; + repeat + y := y - 1; + until 0 < 1; + y := y -1; + od; + return z; +end; +
\ No newline at end of file diff --git a/tests/examplefiles/objc_example.m b/tests/examplefiles/objc_example.m index 67b33022..f4f27170 100644 --- a/tests/examplefiles/objc_example.m +++ b/tests/examplefiles/objc_example.m @@ -30,3 +30,6 @@ NSDictionary *d = @{ @"key": @"value" }; NSNumber *n1 = @( 1 ); NSNumber *n2 = @( [a length] ); + ++ (void)f1:(NSString *)s1; ++ (void)f2:(NSString *) s2; diff --git a/tests/examplefiles/test.pig b/tests/examplefiles/test.pig new file mode 100644 index 00000000..f67b0268 --- /dev/null +++ b/tests/examplefiles/test.pig @@ -0,0 +1,148 @@ +/** + * This script is an example recommender (using made up data) showing how you might modify item-item links + * by defining similar relations between items in a dataset and customizing the change in weighting. + * This example creates metadata by using the genre field as the metadata_field. The items with + * the same genre have it's weight cut in half in order to boost the signals of movies that do not have the same genre. + * This technique requires a customization of the standard GetItemItemRecommendations macro + */ +import 'recommenders.pig'; + + + +%default INPUT_PATH_PURCHASES '../data/retail/purchases.json' +%default INPUT_PATH_WISHLIST '../data/retail/wishlists.json' +%default INPUT_PATH_INVENTORY '../data/retail/inventory.json' +%default OUTPUT_PATH '../data/retail/out/modify_item_item' + + +/******** Custom GetItemItemRecommnedations *********/ +define recsys__GetItemItemRecommendations_ModifyCustom(user_item_signals, metadata) returns item_item_recs { + + -- Convert user_item_signals to an item_item_graph + ii_links_raw, item_weights = recsys__BuildItemItemGraph( + $user_item_signals, + $LOGISTIC_PARAM, + $MIN_LINK_WEIGHT, + $MAX_LINKS_PER_USER + ); + -- NOTE this function is added in order to combine metadata with item-item links + -- See macro for more detailed explination + ii_links_metadata = recsys__AddMetadataToItemItemLinks( + ii_links_raw, + $metadata + ); + + /********* Custom Code starts here ********/ + + --The code here should adjust the weights based on an item-item link and the equality of metadata. + -- In this case, if the metadata is the same, the weight is reduced. Otherwise the weight is left alone. + ii_links_adjusted = foreach ii_links_metadata generate item_A, item_B, + -- the amount of weight adjusted is dependant on the domain of data and what is expected + -- It is always best to adjust the weight by multiplying it by a factor rather than addition with a constant + (metadata_B == metadata_A ? (weight * 0.5): weight) as weight; + + + /******** Custom Code stops here *********/ + + -- remove negative numbers just incase + ii_links_adjusted_filt = foreach ii_links_adjusted generate item_A, item_B, + (weight <= 0 ? 0: weight) as weight; + -- Adjust the weights of the graph to improve recommendations. + ii_links = recsys__AdjustItemItemGraphWeight( + ii_links_adjusted_filt, + item_weights, + $BAYESIAN_PRIOR + ); + + -- Use the item-item graph to create item-item recommendations. + $item_item_recs = recsys__BuildItemItemRecommendationsFromGraph( + ii_links, + $NUM_RECS_PER_ITEM, + $NUM_RECS_PER_ITEM + ); +}; + + +/******* Load Data **********/ + +--Get purchase signals +purchase_input = load '$INPUT_PATH_PURCHASES' using org.apache.pig.piggybank.storage.JsonLoader( + 'row_id: int, + movie_id: chararray, + movie_name: chararray, + user_id: chararray, + purchase_price: int'); + +--Get wishlist signals +wishlist_input = load '$INPUT_PATH_WISHLIST' using org.apache.pig.piggybank.storage.JsonLoader( + 'row_id: int, + movie_id: chararray, + movie_name: chararray, + user_id: chararray'); + + +/******* Convert Data to Signals **********/ + +-- Start with choosing 1 as max weight for a signal. +purchase_signals = foreach purchase_input generate + user_id as user, + movie_name as item, + 1.0 as weight; + + +-- Start with choosing 0.5 as weight for wishlist items because that is a weaker signal than +-- purchasing an item. +wishlist_signals = foreach wishlist_input generate + user_id as user, + movie_name as item, + 0.5 as weight; + +user_signals = union purchase_signals, wishlist_signals; + + +/******** Changes for Modifying item-item links ******/ +inventory_input = load '$INPUT_PATH_INVENTORY' using org.apache.pig.piggybank.storage.JsonLoader( + 'movie_title: chararray, + genres: bag{tuple(content:chararray)}'); + + +metadata = foreach inventory_input generate + FLATTEN(genres) as metadata_field, + movie_title as item; +-- requires the macro to be written seperately + --NOTE this macro is defined within this file for clarity +item_item_recs = recsys__GetItemItemRecommendations_ModifyCustom(user_signals, metadata); +/******* No more changes ********/ + + +user_item_recs = recsys__GetUserItemRecommendations(user_signals, item_item_recs); + +--Completely unrelated code stuck in the middle +data = LOAD 's3n://my-s3-bucket/path/to/responses' + USING org.apache.pig.piggybank.storage.JsonLoader(); +responses = FOREACH data GENERATE object#'response' AS response: map[]; +out = FOREACH responses + GENERATE response#'id' AS id: int, response#'thread' AS thread: chararray, + response#'comments' AS comments: {t: (comment: chararray)}; +STORE out INTO 's3n://path/to/output' USING PigStorage('|'); + + +/******* Store recommendations **********/ + +-- If your output folder exists already, hadoop will refuse to write data to it. + +rmf $OUTPUT_PATH/item_item_recs; +rmf $OUTPUT_PATH/user_item_recs; + +store item_item_recs into '$OUTPUT_PATH/item_item_recs' using PigStorage(); +store user_item_recs into '$OUTPUT_PATH/user_item_recs' using PigStorage(); + +-- STORE the item_item_recs into dynamo +STORE item_item_recs + INTO '$OUTPUT_PATH/unused-ii-table-data' +USING com.mortardata.pig.storage.DynamoDBStorage('$II_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY'); + +-- STORE the user_item_recs into dynamo +STORE user_item_recs + INTO '$OUTPUT_PATH/unused-ui-table-data' +USING com.mortardata.pig.storage.DynamoDBStorage('$UI_TABLE', '$AWS_ACCESS_KEY_ID', '$AWS_SECRET_ACCESS_KEY'); diff --git a/tests/examplefiles/test.zep b/tests/examplefiles/test.zep new file mode 100644 index 00000000..4724d4c4 --- /dev/null +++ b/tests/examplefiles/test.zep @@ -0,0 +1,33 @@ +namespace Test; + +use Test\Foo; + +class Bar +{ + protected a; + private b; + public c {set, get}; + + public function __construct(string str, boolean bool) + { + let this->c = str; + this->setC(bool); + let this->b = []; + } + + public function sayHello(string name) + { + echo "Hello " . name; + } + + protected function loops() + { + for a in b { + echo a; + } + loop { + return "boo!"; + } + } + +}
\ No newline at end of file |