Rework CoffeeScript use of slashstartsregex.

Fixes #991
author: Tim Hatch <tim@timhatch.com> 2016-05-31 23:52:12 -0700
committer: Tim Hatch <tim@timhatch.com> 2016-05-31 23:52:12 -0700
commit: ab5bb1db3d75fb35b2941c52b0231966e926f2cd (patch)
tree: 4f27c6c6e0f17d9d6eb7a387b5334426f2207084
parent: 6c5623318a7449c0d2ae56f84a64f353302ba100 (diff)
download: pygments-ab5bb1db3d75fb35b2941c52b0231966e926f2cd.tar.gz
2 files changed, 98 insertions, 8 deletions
diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py
index d8ce87d4..a23ba184 100644
--- a/pygments/lexers/javascript.py
+++ b/pygments/lexers/javascript.py
@@ -1016,6 +1016,12 @@ class CoffeeScriptLexer(RegexLexer):
     filenames = ['*.coffee']
     mimetypes = ['text/coffeescript']
 
+
+    _operator_re = (
+        r'\+\+|~|&&|\band\b|\bor\b|\bis\b|\bisnt\b|\bnot\b|\?|:|'
+        r'\|\||\\(?=\n)|'
+        r'(<<|>>>?|==?(?!>)|!=?|=(?!>)|-(?!>)|[<>+*`%&\|\^/])=?')
+
     flags = re.DOTALL
     tokens = {
         'commentsandwhitespace': [
@@ -1034,17 +1040,17 @@ class CoffeeScriptLexer(RegexLexer):
             (r'///', String.Regex, ('#pop', 'multilineregex')),
             (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
              r'([gim]+\b|\B)', String.Regex, '#pop'),
+            # This isn't really guarding against mishighlighting well-formed
+            # code, just the ability to infinite-loop between root and
+            # slashstartsregex.
+            (r'/', Operator),
             default('#pop'),
         ],
         'root': [
-            # this next expr leads to infinite loops root -> slashstartsregex
-            # (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
             include('commentsandwhitespace'),
-            (r'\+\+|~|&&|\band\b|\bor\b|\bis\b|\bisnt\b|\bnot\b|\?|:|'
-             r'\|\||\\(?=\n)|'
-             r'(<<|>>>?|==?(?!>)|!=?|=(?!>)|-(?!>)|[<>+*`%&|^/])=?',
-             Operator, 'slashstartsregex'),
-            (r'(?:\([^()]*\))?\s*[=-]>', Name.Function),
+            (r'^(?=\s|/)', Text, 'slashstartsregex'),
+            (_operator_re, Operator, 'slashstartsregex'),
+            (r'(?:\([^()]*\))?\s*[=-]>', Name.Function, 'slashstartsregex'),
             (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
             (r'[})\].]', Punctuation),
             (r'(?<![.$])(for|own|in|of|while|until|'
@@ -1065,7 +1071,7 @@ class CoffeeScriptLexer(RegexLexer):
             (r'@[$a-zA-Z_][\w.:$]*\s*[:=]\s', Name.Variable.Instance,
              'slashstartsregex'),
             (r'@', Name.Other, 'slashstartsregex'),
-            (r'@?[$a-zA-Z_][\w$]*', Name.Other, 'slashstartsregex'),
+            (r'@?[$a-zA-Z_][\w$]*', Name.Other),
             (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
             (r'0x[0-9a-fA-F]+', Number.Hex),
             (r'[0-9]+', Number.Integer),
diff --git a/tests/test_javascript.py b/tests/test_javascript.py
new file mode 100644
index 00000000..59890659
--- /dev/null
+++ b/tests/test_javascript.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+    Javascript tests
+    ~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import unittest
+
+from pygments.lexers import CoffeeScriptLexer
+from pygments.token import Token
+
+COFFEE_SLASH_GOLDEN = [
+    # input_str, slashes_are_regex_here
+    (r'/\\/', True),
+    (r'/\\/i', True),
+    (r'/\//', True),
+    (r'/(\s)/', True),
+    ('/a{2,8}/', True),
+    ('/b*c?d+/', True),
+    ('/(capture-match)/', True),
+    ('/(?:do-not-capture-match)/', True),
+    ('/this|or|that/', True),
+    ('/[char-set]/', True),
+    ('/[^neg-char_st]/', True),
+    ('/^.*$/', True),
+    (r'/\n(\f)\0\1\d\b\cm\u1234/', True),
+    (r'/^.?([^/\\\n\w]*)a\1+$/.something(or_other) # something more complex', True),
+    ("foo = (str) ->\n  /'|\"/.test str", True),
+    ('a = a / b / c', False),
+    ('a = a/b/c', False),
+    ('a = a/b/ c', False),
+    ('a = a /b/c', False),
+    ('a = 1 + /d/.test(a)', True),
+]
+
+def test_coffee_slashes():
+    for input_str, slashes_are_regex_here in COFFEE_SLASH_GOLDEN:
+        yield coffee_runner, input_str, slashes_are_regex_here
+
+def coffee_runner(input_str, slashes_are_regex_here):
+    lex = CoffeeScriptLexer()
+    output = list(lex.get_tokens(input_str))
+    print(output)
+    for t, s in output:
+        if '/' in s:
+            is_regex = t is Token.String.Regex
+            assert is_regex == slashes_are_regex_here, (t, s)
+
+class CoffeeTest(unittest.TestCase):
+    def setUp(self):
+        self.lexer = CoffeeScriptLexer()
+
+    def testMixedSlashes(self):
+        fragment = u'a?/foo/:1/2;\n'
+        tokens = [
+            (Token.Name.Other, u'a'),
+            (Token.Operator, u'?'),
+            (Token.Literal.String.Regex, u'/foo/'),
+            (Token.Operator, u':'),
+            (Token.Literal.Number.Integer, u'1'),
+            (Token.Operator, u'/'),
+            (Token.Literal.Number.Integer, u'2'),
+            (Token.Punctuation, u';'),
+            (Token.Text, u'\n'),
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+
+    def testBewareInfiniteLoop(self):
+        # This demonstrates the case that "This isn't really guarding" comment
+        # refers to.
+        fragment = '/a/x;\n'
+        tokens = [
+            (Token.Text, ''),
+            (Token.Operator, '/'),
+            (Token.Name.Other, 'a'),
+            (Token.Operator, '/'),
+            (Token.Name.Other, 'x'),
+            (Token.Punctuation, ';'),
+            (Token.Text, '\n'),
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
author	Tim Hatch <tim@timhatch.com>	2016-05-31 23:52:12 -0700
committer	Tim Hatch <tim@timhatch.com>	2016-05-31 23:52:12 -0700
commit	ab5bb1db3d75fb35b2941c52b0231966e926f2cd (patch)
tree	4f27c6c6e0f17d9d6eb7a387b5334426f2207084
parent	6c5623318a7449c0d2ae56f84a64f353302ba100 (diff)
download	pygments-ab5bb1db3d75fb35b2941c52b0231966e926f2cd.tar.gz