summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hatch <tim@timhatch.com>2011-03-19 20:00:07 -0700
committerTim Hatch <tim@timhatch.com>2011-03-19 20:00:07 -0700
commite7c58665d02d9a9ca75f7600361c8bb2f1989981 (patch)
tree664ee459ce61070f08f69d781065a4e22e3775d7
parenta220bbdfb2be4343b8f4a569ad827eecf8251989 (diff)
downloadpygments-e7c58665d02d9a9ca75f7600361c8bb2f1989981.tar.gz
Bulk changes to improve many lexers inner workings
Based on a suspicion that most examplefiles only exercise a small part of the lexers, I've written some code to find suspicious parts of regular expressions, then gone back over those to fix them. Most of these affect whether the regex does what it looks like it does, but none of them should appreciably change the function of the lexer. * a few cases which used capturing groups + bygroups incorrectly (most were harmless, but I think one could have generated a traceback in its previous state) * a few cases which could match empty string, without a callback (this is highly discouraged, because if an op doesn't consume any characters, it might be possible to enter an empty loop). I'll revisit individually the cases where a callback or state push is used. * many cases with embedded newlines in non-verbose regexes * many, many cases with reversed (else|elseif) style alternations
-rw-r--r--docs/src/tokens.txt2
-rwxr-xr-xpygments/formatters/_mapping.py2
-rw-r--r--pygments/lexers/agile.py8
-rw-r--r--pygments/lexers/asm.py8
-rw-r--r--pygments/lexers/compiled.py6
-rw-r--r--pygments/lexers/dotnet.py2
-rw-r--r--pygments/lexers/functional.py4
-rw-r--r--pygments/lexers/hdl.py30
-rw-r--r--pygments/lexers/other.py16
-rw-r--r--pygments/lexers/parsers.py4
-rw-r--r--pygments/lexers/web.py115
11 files changed, 101 insertions, 96 deletions
diff --git a/docs/src/tokens.txt b/docs/src/tokens.txt
index 9ef0df8d..4900a9ab 100644
--- a/docs/src/tokens.txt
+++ b/docs/src/tokens.txt
@@ -303,7 +303,7 @@ Comments
`Comment.Special`
Special data in comments. For example code tags, author and license
- informations etc.
+ information, etc.
Generic Tokens
diff --git a/pygments/formatters/_mapping.py b/pygments/formatters/_mapping.py
index 0c344a7a..3dfe970e 100755
--- a/pygments/formatters/_mapping.py
+++ b/pygments/formatters/_mapping.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
pygments.formatters._mapping
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Formatter mapping defintions. This file is generated by itself. Everytime
you change something on a builtin formatter defintion, run this script from
diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py
index 6a37c193..e648b9d6 100644
--- a/pygments/lexers/agile.py
+++ b/pygments/lexers/agile.py
@@ -910,7 +910,7 @@ class PerlLexer(RegexLexer):
(r'(q|qq|qw|qr|qx)([^a-zA-Z0-9])(.|\n)*?\2', String.Other),
(r'package\s+', Keyword, 'modulename'),
(r'sub\s+', Keyword, 'funcname'),
- (r'(\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|'
+ (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
r'!~|&&?|\|\||\.{1,3})', Operator),
(r'[-+/*%=<>&^|!\\~]=?', Operator),
(r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage
@@ -1034,7 +1034,7 @@ class LuaLexer(RegexLexer):
# multiline strings
(r'(?s)\[(=*)\[.*?\]\1\]', String),
- (r'(==|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#])', Operator),
+ (r'(==|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#])', Operator),
(r'[\[\]\{\}\(\)\.,:;]', Punctuation),
(r'(and|or|not)\b', Operator.Word),
@@ -1301,7 +1301,7 @@ class TclLexer(RegexLexer):
'params': [
(r';', Keyword, '#pop'),
(r'\n', Text, '#pop'),
- (r'(else|elseif|then)', Keyword),
+ (r'(else|elseif|then)\b', Keyword),
include('basic'),
include('data'),
],
@@ -1949,7 +1949,7 @@ class IokeLexer(RegexLexer):
Operator),
# Punctuation
- (r'(\`\`|\`|\'\'|\'|\.|\,|@|@@|\[|\]|\(|\)|{|})', Punctuation),
+ (r'''(``?|''?|\.|,|@@?|[\[\](){}])''', Punctuation),
#kinds
(r'[A-Z][a-zA-Z0-9_!:?]*', Name.Class),
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py
index 4740569c..57c6be1b 100644
--- a/pygments/lexers/asm.py
+++ b/pygments/lexers/asm.py
@@ -130,17 +130,17 @@ class ObjdumpLexer(RegexLexer):
('( *)('+hex+r'+:)(\t)((?:'+hex+hex+' )+)$',
bygroups(Text, Name.Label, Text, Number.Hex)),
# Skipped a few bytes
- ('\t\.\.\.$', Text),
+ (r'\t\.\.\.$', Text),
# Relocation line
# (With offset)
- ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$',
+ (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x' + hex + '+)$',
bygroups(Text, Name.Label, Text, Name.Property, Text,
Name.Constant, Punctuation, Number.Hex)),
# (Without offset)
- ('(\t\t\t)('+hex+'+:)( )([^\t]+)(\t)(.*?)$',
+ (r'(\t\t\t)('+hex+r'+:)( )([^\t]+)(\t)(.*?)$',
bygroups(Text, Name.Label, Text, Name.Property, Text,
Name.Constant)),
- ('[^\n]+\n', Other)
+ (r'[^\n]+\n', Other)
]
}
diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
index 5c10a785..28ebb8a6 100644
--- a/pygments/lexers/compiled.py
+++ b/pygments/lexers/compiled.py
@@ -1059,7 +1059,7 @@ class DylanLexer(RegexLexer):
tokens = {
'root': [
(r'\b(subclass|abstract|block|c(on(crete|stant)|lass)|domain'
- r'|ex(c(eption|lude)|port)|f(unction(|al))|generic|handler'
+ r'|ex(c(eption|lude)|port)|f(unction(al)?)|generic|handler'
r'|i(n(herited|line|stance|terface)|mport)|library|m(acro|ethod)'
r'|open|primary|sealed|si(deways|ngleton)|slot'
r'|v(ariable|irtual))\b', Name.Builtin),
@@ -1069,7 +1069,7 @@ class DylanLexer(RegexLexer):
(r'"', String, 'string'),
(r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
(r'=>|\b(a(bove|fterwards)|b(e(gin|low)|y)|c(ase|leanup|reate)'
- r'|define|else(|if)|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise'
+ r'|define|else(if)?|end|f(inally|or|rom)|i[fn]|l(et|ocal)|otherwise'
r'|rename|s(elect|ignal)|t(hen|o)|u(n(less|til)|se)|wh(en|ile))\b',
Keyword),
(r'([ \t])([!\$%&\*\/:<=>\?~_^a-zA-Z0-9.+\-]*:)',
@@ -2151,7 +2151,7 @@ class AdaLexer(RegexLexer):
'end' : [
('(if|case|record|loop|select)', Keyword.Reserved),
('"[^"]+"|[a-zA-Z0-9_]+', Name.Function),
- ('[\n\s]+', Text),
+ ('\s+', Text),
(';', Punctuation, '#pop'),
],
'type_def': [
diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py
index 48feeb85..2055c009 100644
--- a/pygments/lexers/dotnet.py
+++ b/pygments/lexers/dotnet.py
@@ -181,7 +181,7 @@ class BooLexer(RegexLexer):
("'(\\\\|\\'|[^']*?)'", String.Single),
('[a-zA-Z_][a-zA-Z0-9_]*', Name),
(r'(\d+\.\d*|\d*\.\d+)([fF][+-]?[0-9]+)?', Number.Float),
- (r'[0-9][0-9\.]*(m|ms|d|h|s)', Number),
+ (r'[0-9][0-9\.]*(ms?|d|h|s)', Number),
(r'0\d+', Number.Oct),
(r'0x[a-fA-F0-9]+', Number.Hex),
(r'\d+L', Number.Integer.Long),
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index e6d16b19..5dbd6df2 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -563,7 +563,7 @@ class OcamlLexer(RegexLexer):
(r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
(r'\(\*', Comment, 'comment'),
(r'\b(%s)\b' % '|'.join(keywords), Keyword),
- (r'(%s)' % '|'.join(keyopts), Operator),
+ (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
(r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
(r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
(r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
@@ -656,7 +656,7 @@ class ErlangLexer(RegexLexer):
'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
]
- operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)'
+ operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!)'
word_operators = [
'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
'div', 'not', 'or', 'orelse', 'rem', 'xor'
diff --git a/pygments/lexers/hdl.py b/pygments/lexers/hdl.py
index b176cac1..3c5f4da9 100644
--- a/pygments/lexers/hdl.py
+++ b/pygments/lexers/hdl.py
@@ -76,21 +76,21 @@ class VerilogLexer(RegexLexer):
r'unsigned|var|vectored|void|wait|weak0|weak1|while|'
r'xnor|xor)\b', Keyword),
- (r'(`accelerate|`autoexpand_vectornets|`celldefine|`default_nettype|'
- r'`else|`elsif|`endcelldefine|`endif|`endprotect|`endprotected|'
- r'`expand_vectornets|`ifdef|`ifndef|`include|`noaccelerate|`noexpand_vectornets|'
- r'`noremove_gatenames|`noremove_netnames|`nounconnected_drive|'
- r'`protect|`protected|`remove_gatenames|`remove_netnames|`resetall|'
- r'`timescale|`unconnected_drive|`undef)\b', Comment.Preproc),
-
- (r'(\$bits|\$bitstoreal|\$bitstoshortreal|\$countdrivers|\$display|\$fclose|'
- r'\$fdisplay|\$finish|\$floor|\$fmonitor|\$fopen|\$fstrobe|\$fwrite|'
- r'\$getpattern|\$history|\$incsave|\$input|\$itor|\$key|\$list|\$log|'
- r'\$monitor|\$monitoroff|\$monitoron|\$nokey|\$nolog|\$printtimescale|'
- r'\$random|\$readmemb|\$readmemh|\$realtime|\$realtobits|\$reset|\$reset_count|'
- r'\$reset_value|\$restart|\$rtoi|\$save|\$scale|\$scope|\$shortrealtobits|'
- r'\$showscopes|\$showvariables|\$showvars|\$sreadmemb|\$sreadmemh|'
- r'\$stime|\$stop|\$strobe|\$time|\$timeformat|\$write)\b', Name.Builtin),
+ (r'`(accelerate|autoexpand_vectornets|celldefine|default_nettype|'
+ r'else|elsif|endcelldefine|endif|endprotect|endprotected|'
+ r'expand_vectornets|ifdef|ifndef|include|noaccelerate|noexpand_vectornets|'
+ r'noremove_gatenames|noremove_netnames|nounconnected_drive|'
+ r'protect|protected|remove_gatenames|remove_netnames|resetall|'
+ r'timescale|unconnected_drive|undef)\b', Comment.Preproc),
+
+ (r'\$(bits|bitstoreal|bitstoshortreal|countdrivers|display|fclose|'
+ r'fdisplay|finish|floor|fmonitor|fopen|fstrobe|fwrite|'
+ r'getpattern|history|incsave|input|itor|key|list|log|'
+ r'monitor|monitoroff|monitoron|nokey|nolog|printtimescale|'
+ r'random|readmemb|readmemh|realtime|realtobits|reset|reset_count|'
+ r'reset_value|restart|rtoi|save|scale|scope|shortrealtobits|'
+ r'showscopes|showvariables|showvars|sreadmemb|sreadmemh|'
+ r'stime|stop|strobe|time|timeformat|write)\b', Name.Builtin),
(r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
(r'(byte|shortint|int|longint|interger|time|'
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index 69e4ccb7..f9dc05f9 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -1167,9 +1167,9 @@ class PovrayLexer(RegexLexer):
(r'/\*[\w\W]*?\*/', Comment.Multiline),
(r'//.*\n', Comment.Single),
(r'(?s)"(?:\\.|[^"\\])+"', String.Double),
- (r'#(debug|default|else|end|error|fclose|fopen|if|ifdef|ifndef|'
+ (r'#(debug|default|else|end|error|fclose|fopen|ifdef|ifndef|'
r'include|range|read|render|statistics|switch|undef|version|'
- r'warning|while|write|define|macro|local|declare)',
+ r'warning|while|write|define|macro|local|declare)\b',
Comment.Preproc),
(r'\b(aa_level|aa_threshold|abs|acos|acosh|adaptive|adc_bailout|'
r'agate|agate_turb|all|alpha|ambient|ambient_light|angle|'
@@ -1219,11 +1219,11 @@ class PovrayLexer(RegexLexer):
r'vnormalize|volume_object|volume_rendered|vol_with_light|'
r'vrotate|v_steps|warning|warp|water_level|waves|while|width|'
r'wood|wrinkles|yes)\b', Keyword),
- (r'bicubic_patch|blob|box|camera|cone|cubic|cylinder|difference|'
+ (r'(bicubic_patch|blob|box|camera|cone|cubic|cylinder|difference|'
r'disc|height_field|intersection|julia_fractal|lathe|'
r'light_source|merge|mesh|object|plane|poly|polygon|prism|'
r'quadric|quartic|smooth_triangle|sor|sphere|superellipsoid|'
- r'text|torus|triangle|union', Name.Builtin),
+ r'text|torus|triangle|union)\b', Name.Builtin),
# TODO: <=, etc
(r'[\[\](){}<>;,]', Punctuation),
(r'[-+*/=]', Operator),
@@ -1261,7 +1261,7 @@ class AppleScriptLexer(RegexLexer):
Classes = ['alias ', 'application ', 'boolean ', 'class ', 'constant ',
'date ', 'file ', 'integer ', 'list ', 'number ', 'POSIX file ',
'real ', 'record ', 'reference ', 'RGB color ', 'script ',
- 'text ', 'unit types', '(Unicode )?text', 'string']
+ 'text ', 'unit types', '(?:Unicode )?text', 'string']
BuiltIn = ['attachment', 'attribute run', 'character', 'day', 'month',
'paragraph', 'word', 'year']
HandlerParams = ['about', 'above', 'against', 'apart from', 'around',
@@ -1527,7 +1527,7 @@ class AppleScriptLexer(RegexLexer):
(ur'(-|\*|\+|&|≠|>=?|<=?|=|≥|≤|/|÷|\^)', Operator),
(r"\b(%s)\b" % '|'.join(Operators), Operator.Word),
(r'^(\s*(?:on|end)\s+)'
- r'(%s)' % '|'.join(StudioEvents),
+ r'(%s)' % '|'.join(StudioEvents[::-1]),
bygroups(Keyword, Name.Function)),
(r'^(\s*)(in|on|script|to)(\s+)', bygroups(Text, Keyword, Text)),
(r'\b(as )(%s)\b' % '|'.join(Classes),
@@ -2403,7 +2403,7 @@ class PostScriptLexer(RegexLexer):
],
'escape': [
- (r'([0-8]{3}|n|r|t|b|f|\\|\(|\)|)', String.Escape, '#pop'),
+ (r'([0-8]{3}|n|r|t|b|f|\\|\(|\))?', String.Escape, '#pop'),
],
}
@@ -2784,7 +2784,7 @@ class HybrisLexer(RegexLexer):
# method names
(r'^(\s*(?:function|method|operator\s+)+?)'
r'([a-zA-Z_][a-zA-Z0-9_]*)'
- r'(\s*)(\()', bygroups(Name.Function, Text, Operator)),
+ r'(\s*)(\()', bygroups(Keyword, Name.Function, Text, Operator)),
(r'[^\S\n]+', Text),
(r'//.*?\n', Comment.Single),
(r'/\*.*?\*/', Comment.Multiline),
diff --git a/pygments/lexers/parsers.py b/pygments/lexers/parsers.py
index 0ead39be..6947328f 100644
--- a/pygments/lexers/parsers.py
+++ b/pygments/lexers/parsers.py
@@ -72,8 +72,8 @@ class RagelLexer(RegexLexer):
],
'operators': [
(r',', Operator), # Join
- (r'\||&|-|--', Operator), # Union, Intersection and Subtraction
- (r'\.|<:|:>|:>>', Operator), # Concatention
+ (r'\||&|--?', Operator), # Union, Intersection and Subtraction
+ (r'\.|<:|:>>?', Operator), # Concatention
(r':', Operator), # Label
(r'->', Operator), # Epsilon Transition
(r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py
index 11bc2175..742a93b1 100644
--- a/pygments/lexers/web.py
+++ b/pygments/lexers/web.py
@@ -56,7 +56,7 @@ class JavascriptLexer(RegexLexer):
(r'', Text, '#pop')
],
'badregex': [
- ('\n', Text, '#pop')
+ (r'\n', Text, '#pop')
],
'root': [
(r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
@@ -315,7 +315,7 @@ class CssLexer(RegexLexer):
r'list-style|margin-bottom|margin-left|margin-right|'
r'margin-top|margin|marker-offset|marks|max-height|max-width|'
r'min-height|min-width|opacity|orphans|outline|outline-color|'
- r'outline-style|outline-width|overflow(?:-x|-y|)|padding-bottom|'
+ r'outline-style|outline-width|overflow(?:-x|-y)?|padding-bottom|'
r'padding-left|padding-right|padding-top|padding|page|'
r'page-break-after|page-break-before|page-break-inside|'
r'pause-after|pause-before|pause|pitch|pitch-range|'
@@ -431,13 +431,13 @@ class ObjectiveJLexer(RegexLexer):
(';', Punctuation),
],
'whitespace': [
- (r'(@import)(\s+)("(\\\\|\\"|[^"])*")',
+ (r'(@import)(\s+)("(?:\\\\|\\"|[^"])*")',
bygroups(Comment.Preproc, Text, String.Double)),
- (r'(@import)(\s+)(<(\\\\|\\>|[^>])*>)',
+ (r'(@import)(\s+)(<(?:\\\\|\\>|[^>])*>)',
bygroups(Comment.Preproc, Text, String.Double)),
- (r'(#(?:include|import))(\s+)("(\\\\|\\"|[^"])*")',
+ (r'(#(?:include|import))(\s+)("(?:\\\\|\\"|[^"])*")',
bygroups(Comment.Preproc, Text, String.Double)),
- (r'(#(?:include|import))(\s+)(<(\\\\|\\>|[^>])*>)',
+ (r'(#(?:include|import))(\s+)(<(?:\\\\|\\>|[^>])*>)',
bygroups(Comment.Preproc, Text, String.Double)),
(r'#if\s+0', Comment.Preproc, 'if0'),
@@ -458,7 +458,7 @@ class ObjectiveJLexer(RegexLexer):
(r'', Text, '#pop'),
],
'badregex': [
- ('\n', Text, '#pop'),
+ (r'\n', Text, '#pop'),
],
'statements': [
(r'(L|@)?"', String, 'string'),
@@ -1241,7 +1241,7 @@ class HamlLexer(ExtendedRegexLexer):
'eval-or-plain': [
(r'[&!]?==', Punctuation, 'plain'),
- (r'([&!]?[=~])(' + _comma_dot + '*\n)',
+ (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
bygroups(Punctuation, using(RubyLexer)),
'root'),
(r'', Text, 'plain'),
@@ -1250,18 +1250,18 @@ class HamlLexer(ExtendedRegexLexer):
'content': [
include('css'),
(r'%[a-z0-9_:-]+', Name.Tag, 'tag'),
- (r'!!!' + _dot + '*\n', Name.Namespace, '#pop'),
- (r'(/)(\[' + _dot + '*?\])(' + _dot + '*\n)',
+ (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
+ (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)',
bygroups(Comment, Comment.Special, Comment),
'#pop'),
- (r'/' + _dot + '*\n', _starts_block(Comment, 'html-comment-block'),
+ (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
'#pop'),
- (r'-#' + _dot + '*\n', _starts_block(Comment.Preproc,
+ (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
'haml-comment-block'), '#pop'),
- (r'(-)(' + _comma_dot + '*\n)',
+ (r'(-)(' + _comma_dot + r'*\n)',
bygroups(Punctuation, using(RubyLexer)),
'#pop'),
- (r':' + _dot + '*\n', _starts_block(Name.Decorator, 'filter-block'),
+ (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
'#pop'),
include('eval-or-plain'),
],
@@ -1640,7 +1640,7 @@ class CoffeeScriptLexer(RegexLexer):
(r'', Text, '#pop'),
],
'badregex': [
- ('\n', Text, '#pop'),
+ (r'\n', Text, '#pop'),
],
'root': [
(r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
@@ -1739,7 +1739,7 @@ class ScamlLexer(ExtendedRegexLexer):
'eval-or-plain': [
(r'[&!]?==', Punctuation, 'plain'),
- (r'([&!]?[=~])(' + _dot + '*\n)',
+ (r'([&!]?[=~])(' + _dot + r'*\n)',
bygroups(Punctuation, using(ScalaLexer)),
'root'),
(r'', Text, 'plain'),
@@ -1748,21 +1748,21 @@ class ScamlLexer(ExtendedRegexLexer):
'content': [
include('css'),
(r'%[a-z0-9_:-]+', Name.Tag, 'tag'),
- (r'!!!' + _dot + '*\n', Name.Namespace, '#pop'),
- (r'(/)(\[' + _dot + '*?\])(' + _dot + '*\n)',
+ (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
+ (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)',
bygroups(Comment, Comment.Special, Comment),
'#pop'),
- (r'/' + _dot + '*\n', _starts_block(Comment, 'html-comment-block'),
+ (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
'#pop'),
- (r'-#' + _dot + '*\n', _starts_block(Comment.Preproc,
+ (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
'scaml-comment-block'), '#pop'),
- (r'(-@\s*)(import)?(' + _dot + '*\n)',
+ (r'(-@\s*)(import)?(' + _dot + r'*\n)',
bygroups(Punctuation, Keyword, using(ScalaLexer)),
'#pop'),
- (r'(-)(' + _dot + '*\n)',
+ (r'(-)(' + _dot + r'*\n)',
bygroups(Punctuation, using(ScalaLexer)),
'#pop'),
- (r':' + _dot + '*\n', _starts_block(Name.Decorator, 'filter-block'),
+ (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
'#pop'),
include('eval-or-plain'),
],
@@ -1849,31 +1849,31 @@ class JadeLexer(ExtendedRegexLexer):
'eval-or-plain': [
(r'[&!]?==', Punctuation, 'plain'),
- (r'([&!]?[=~])(' + _dot + '*\n)',
+ (r'([&!]?[=~])(' + _dot + r'*\n)',
bygroups(Punctuation, using(ScalaLexer)), 'root'),
(r'', Text, 'plain'),
],
'content': [
include('css'),
- (r'!!!' + _dot + '*\n', Name.Namespace, '#pop'),
- (r'(/)(\[' + _dot + '*?\])(' + _dot + '*\n)',
+ (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
+ (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)',
bygroups(Comment, Comment.Special, Comment),
'#pop'),
- (r'/' + _dot + '*\n', _starts_block(Comment, 'html-comment-block'),
+ (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
'#pop'),
- (r'-#' + _dot + '*\n', _starts_block(Comment.Preproc,
+ (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
'scaml-comment-block'), '#pop'),
- (r'(-@\s*)(import)?(' + _dot + '*\n)',
+ (r'(-@\s*)(import)?(' + _dot + r'*\n)',
bygroups(Punctuation, Keyword, using(ScalaLexer)),
'#pop'),
- (r'(-)(' + _dot + '*\n)',
+ (r'(-)(' + _dot + r'*\n)',
bygroups(Punctuation, using(ScalaLexer)),
'#pop'),
- (r':' + _dot + '*\n', _starts_block(Name.Decorator, 'filter-block'),
+ (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
'#pop'),
(r'[a-z0-9_:-]+', Name.Tag, 'tag'),
- (r'|', Text, 'eval-or-plain'),
+ (r'\|', Text, 'eval-or-plain'),
],
'tag': [
@@ -1949,24 +1949,24 @@ class XQueryLexer(ExtendedRegexLexer):
# ur"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|"
# ur"[\u10000-\uEFFFF]"
#)
- ncnamestartchar = r"[A-Z]|_|[a-z]"
+ ncnamestartchar = r"(?:[A-Z]|_|[a-z])"
# FIX UNICODE LATER
#ncnamechar = ncnamestartchar + (ur"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|"
# ur"[\u203F-\u2040]")
- ncnamechar = ncnamestartchar + r"|-|\.|[0-9]"
- ncname = "((%s)+(%s)*)" % (ncnamestartchar, ncnamechar)
- pitarget_namestartchar = r"[A-KN-WY-Z]|_|:|[a-kn-wy-z]"
- pitarget_namechar = pitarget_namestartchar + r"|-|\.|[0-9]"
- pitarget = "(%s)+(%s)*" % (pitarget_namestartchar, pitarget_namechar)
+ ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])"
+ ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar)
+ pitarget_namestartchar = r"(?:[A-KN-WY-Z]|_|:|[a-kn-wy-z])"
+ pitarget_namechar = r"(?:" + pitarget_namestartchar + r"|-|\.|[0-9])"
+ pitarget = "%s+%s*" % (pitarget_namestartchar, pitarget_namechar)
prefixedname = "%s:%s" % (ncname, ncname)
unprefixedname = ncname
- qname = "((%s)|(%s))" %(prefixedname, unprefixedname)
+ qname = "(?:%s|%s)" % (prefixedname, unprefixedname)
- entityref = r'&(lt|gt|amp|quot|apos|nbsp);'
- charref = r'&#[0-9]+;|&#x[0-9a-fA-F]+;'
+ entityref = r'(?:&(?:lt|gt|amp|quot|apos|nbsp);)'
+ charref = r'(?:&#[0-9]+;|&#x[0-9a-fA-F]+;)'
- stringdouble = r'("((' + entityref + r')|(' + charref + r')|("")|([^&"]))*")'
- stringsingle = r"('((" + entityref + r")|(" + charref + r")|('')|([^&']))*')"
+ stringdouble = r'(?:"(?:' + entityref + r'|' + charref + r'|""|[^&"])*")'
+ stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')"
# FIX UNICODE LATER
#elementcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|'
@@ -2202,7 +2202,9 @@ class XQueryLexer(ExtendedRegexLexer):
operator_root_callback),
(r'(castable|cast)(\s+)(as)',
bygroups(Keyword, Text, Keyword), 'singletype'),
- (r'(instance)(\s+)(of)|(treat)(\s+)(as)',
+ (r'(instance)(\s+)(of)',
+ bygroups(Keyword, Text, Keyword), 'itemtype'),
+ (r'(treat)(\s+)(as)',
bygroups(Keyword, Text, Keyword), 'itemtype'),
(r'(case)|(as)', Keyword, 'itemtype'),
(r'(\))(\s*)(as)',
@@ -2229,8 +2231,8 @@ class XQueryLexer(ExtendedRegexLexer):
'namespacedecl': [
include('whitespace'),
(r'\(:', Comment, 'comment'),
- (r'(at)(\s+)'+stringdouble, bygroups(Keyword, Text, String.Double)),
- (r"(at)(\s+)"+stringsingle, bygroups(Keyword, Text, String.Single)),
+ (r'(at)(\s+)('+stringdouble+')', bygroups(Keyword, Text, String.Double)),
+ (r"(at)(\s+)("+stringsingle+')', bygroups(Keyword, Text, String.Single)),
(stringdouble, String.Double),
(stringsingle, String.Single),
(r',', Punctuation),
@@ -2262,7 +2264,7 @@ class XQueryLexer(ExtendedRegexLexer):
include('whitespace'),
(r'\(:', Comment, 'comment'),
(r'\$', Punctuation, 'varname'),
- (r'void\s*\(\s*\)',
+ (r'(void)(\s*)(\()(\s*)(\))',
bygroups(Keyword, Text, Punctuation, Text, Punctuation), 'operator'),
(r'(element|attribute|schema-element|schema-attribute|comment|text|'
r'node|binary|document-node)(\s*)(\()',
@@ -2277,9 +2279,9 @@ class XQueryLexer(ExtendedRegexLexer):
(r'\(\#', Punctuation, 'pragma'),
(r';', Punctuation, '#pop'),
(r'then|else', Keyword, '#pop'),
- (r'(at)(\s+)' + stringdouble,
+ (r'(at)(\s+)(' + stringdouble + ')',
bygroups(Keyword, Text, String.Double), 'namespacedecl'),
- (r'(at)(\s+)' + stringsingle,
+ (r'(at)(\s+)(' + stringsingle + ')',
bygroups(Keyword, Text, String.Single), 'namespacedecl'),
(r'except|intersect|in|is|return|satisfies|to|union|where',
Keyword, 'root'),
@@ -2290,16 +2292,18 @@ class XQueryLexer(ExtendedRegexLexer):
bygroups(Keyword, Text, Keyword, Text, Keyword), 'root'),
(r'(castable|cast)(\s+)(as)',
bygroups(Keyword, Text, Keyword), 'singletype'),
- (r'(instance)(\s+)(of)|(treat)(\s+)(as)',
+ (r'(instance)(\s+)(of)',
+ bygroups(Keyword, Text, Keyword)),
+ (r'(treat)(\s+)(as)',
bygroups(Keyword, Text, Keyword)),
(r'case|as', Keyword, 'itemtype'),
(r'(\))(\s*)(as)', bygroups(Operator, Text, Keyword), 'itemtype'),
- (ncname + r'(:\*)', Keyword.Type, 'operator'),
+ (ncname + r':\*', Keyword.Type, 'operator'),
(qname, Keyword.Type, 'occurrenceindicator'),
],
'kindtest': [
(r'\(:', Comment, 'comment'),
- (r'({)', Punctuation, 'root'),
+ (r'{', Punctuation, 'root'),
(r'(\))([*+?]?)', popstate_kindtest_callback),
(r'\*', Name, 'closekindtest'),
(qname, Name, 'closekindtest'),
@@ -2308,7 +2312,7 @@ class XQueryLexer(ExtendedRegexLexer):
'kindtestforpi': [
(r'\(:', Comment, 'comment'),
(r'\)', Punctuation, '#pop'),
- (ncname, bygroups(Name.Variable, Name.Variable)),
+ (ncname, Name.Variable),
(stringdouble, String.Double),
(stringsingle, String.Single),
],
@@ -2514,8 +2518,9 @@ class XQueryLexer(ExtendedRegexLexer):
(r'(element)(\s+)(?=' +qname+ r')',
bygroups(Keyword, Text), 'element_qname'),
#PROCESSING_INSTRUCTION
- (r'(processing-instruction)(\s+)' + ncname + r'(\s*)(\{)',
- bygroups(Keyword, Text, Name.Variable, Text, Punctuation), 'operator'),
+ (r'(processing-instruction)(\s+)(' + ncname + r')(\s*)(\{)',
+ bygroups(Keyword, Text, Name.Variable, Text, Punctuation),
+ 'operator'),
(r'(declare|define)(\s+)(function)',
bygroups(Keyword, Text, Keyword)),