summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorthatch <devnull@localhost>2009-06-21 14:42:13 -0700
committerthatch <devnull@localhost>2009-06-21 14:42:13 -0700
commit0878247fbf35cd847ae458970f04f10a48e8e873 (patch)
tree94e6a427b3bd3833d4bbb26fc5fdcbc653ad1d78
parent089200cd819b04e3fe6f456ddd8cad26c126fd83 (diff)
downloadpygments-0878247fbf35cd847ae458970f04f10a48e8e873.tar.gz
#403: Make JS regex parsing better, by Pumbaa80
-rw-r--r--pygments/lexers/web.py38
-rw-r--r--tests/examplefiles/evil_regex.js48
2 files changed, 76 insertions, 10 deletions
diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py
index c66125b9..1274eb26 100644
--- a/pygments/lexers/web.py
+++ b/pygments/lexers/web.py
@@ -39,19 +39,37 @@ class JavascriptLexer(RegexLexer):
flags = re.DOTALL
tokens = {
- 'root': [
+ 'commentsandwhitespace': [
(r'\s+', Text),
(r'<!--', Comment),
(r'//.*?\n', Comment),
- (r'/\*.*?\*/', Comment),
- (r'/(\\\\|\\/|[^/\n])*/[gim]+\b', String.Regex),
- (r'/(\\\\|\\/|[^/\n])*/(?=\s*[,);\n])', String.Regex),
- (r'/(\\\\|\\/|[^/\n])*/(?=\s*\.[a-z])', String.Regex),
- (r'[~\^\*!%&<>\|+=:;,/?\\-]+', Operator),
- (r'[{}\[\]();.]+', Punctuation),
- (r'(for|in|while|do|break|return|continue|if|else|throw|try|'
- r'catch|new|typeof|instanceof|this)\b', Keyword),
- (r'(var|with|const|label|function)\b', Keyword.Declaration),
+ (r'/\*.*?\*/', Comment)
+ ],
+ 'slashstartsregex': [
+ include('commentsandwhitespace'),
+ (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
+ r'([gim]+\b|\B)', String.Regex, '#pop'),
+ (r'(?=/)', Text, ('#pop', 'badregex')),
+ (r'', Text, '#pop')
+ ],
+ 'badregex': [
+ ('\n', Text, '#pop')
+ ],
+ 'root': [
+ (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
+ include('commentsandwhitespace'),
+ (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
+ r'(<<|>>>?|==?|!=?|[-<>+*%&\|\^/])=?', Operator, 'slashstartsregex'),
+ (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
+ (r'[})\].]', Punctuation),
+ (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
+ r'throw|try|catch|finally|new|delete|typeof|instanceof|void|'
+ r'this)\b', Keyword, 'slashstartsregex'),
+ (r'(var|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
+ (r'(abstract|boolean|byte|char|class|const|debugger|double|enum|export|'
+ r'extends|final|float|goto|implements|import|int|interface|long|native|'
+ r'package|private|protected|public|short|static|super|synchronized|throws|'
+ r'transient|volatile)\b', Keyword.Reserved),
(r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
(r'(Array|Boolean|Date|Error|Function|Math|netscape|'
r'Number|Object|Packages|RegExp|String|sun|decodeURI|'
diff --git a/tests/examplefiles/evil_regex.js b/tests/examplefiles/evil_regex.js
new file mode 100644
index 00000000..3839c3f4
--- /dev/null
+++ b/tests/examplefiles/evil_regex.js
@@ -0,0 +1,48 @@
+/regexp/.test(foo) || x = [/regexp/,/regexp/, /regexp/, // comment
+// comment
+/regexp/];
+if (/regexp/.test(string))
+{/regexp/.test(string);};
+x =/regexp/;
+x = /regexp/;
+if (0</regexp/.exec(string) || 1>/regexp/.exec(string))
+x = { u:/regexp/, v: /regexp/ };
+foo();/regexp/.test(string); /regexp/.test(string);
+if (!/regexp/) foobar();
+x = u %/regexp/.exec(string) */regexp/.exec(string) / /regexp/.exec(string);
+x = u?/regexp/.exec(string) : v +/regexp/.exec(string) -/regexp/.exec(string);
+a = u^/regexp/.exec(string) &/regexp/.exec(string) |/regexp/.exec(string) +~/regexp/.exec(string);
+x = /regexp/ /* a comment */ ;
+x = /[reg/exp]/;
+x = 4/2/i;
+x = (a == b) ?/* this is a comment */ c : d;
+/// a comment //
+a = /regex//2/1; //syntactically correct, returns NaN
+
+
+
+
+/* original examples */
+
+// regex
+
+blah(/abc/);
+x = /abc/;
+x = /abc/.match;
+
+// math
+
+blah(1/2); //comment
+x = 1 / 2 / 3;
+x = 1/1/.1;
+
+// broken
+
+x=/1/;
+x=1/a/g;
+x=a/a/g;
+
+// real-world
+
+var x = 1/(1+Math.sqrt(sum)); // convert to number between 1-0
+return Math.round((num / den) * 100)/100;