diff options
author | Tim Hatch <tim@timhatch.com> | 2016-05-31 23:52:12 -0700 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2016-05-31 23:52:12 -0700 |
commit | ab5bb1db3d75fb35b2941c52b0231966e926f2cd (patch) | |
tree | 4f27c6c6e0f17d9d6eb7a387b5334426f2207084 | |
parent | 6c5623318a7449c0d2ae56f84a64f353302ba100 (diff) | |
download | pygments-ab5bb1db3d75fb35b2941c52b0231966e926f2cd.tar.gz |
Rework CoffeeScript use of slashstartsregex.
Fixes #991
-rw-r--r-- | pygments/lexers/javascript.py | 22 | ||||
-rw-r--r-- | tests/test_javascript.py | 84 |
2 files changed, 98 insertions, 8 deletions
diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index d8ce87d4..a23ba184 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -1016,6 +1016,12 @@ class CoffeeScriptLexer(RegexLexer): filenames = ['*.coffee'] mimetypes = ['text/coffeescript'] + + _operator_re = ( + r'\+\+|~|&&|\band\b|\bor\b|\bis\b|\bisnt\b|\bnot\b|\?|:|' + r'\|\||\\(?=\n)|' + r'(<<|>>>?|==?(?!>)|!=?|=(?!>)|-(?!>)|[<>+*`%&\|\^/])=?') + flags = re.DOTALL tokens = { 'commentsandwhitespace': [ @@ -1034,17 +1040,17 @@ class CoffeeScriptLexer(RegexLexer): (r'///', String.Regex, ('#pop', 'multilineregex')), (r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), + # This isn't really guarding against mishighlighting well-formed + # code, just the ability to infinite-loop between root and + # slashstartsregex. + (r'/', Operator), default('#pop'), ], 'root': [ - # this next expr leads to infinite loops root -> slashstartsregex - # (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), - (r'\+\+|~|&&|\band\b|\bor\b|\bis\b|\bisnt\b|\bnot\b|\?|:|' - r'\|\||\\(?=\n)|' - r'(<<|>>>?|==?(?!>)|!=?|=(?!>)|-(?!>)|[<>+*`%&|^/])=?', - Operator, 'slashstartsregex'), - (r'(?:\([^()]*\))?\s*[=-]>', Name.Function), + (r'^(?=\s|/)', Text, 'slashstartsregex'), + (_operator_re, Operator, 'slashstartsregex'), + (r'(?:\([^()]*\))?\s*[=-]>', Name.Function, 'slashstartsregex'), (r'[{(\[;,]', Punctuation, 'slashstartsregex'), (r'[})\].]', Punctuation), (r'(?<![.$])(for|own|in|of|while|until|' @@ -1065,7 +1071,7 @@ class CoffeeScriptLexer(RegexLexer): (r'@[$a-zA-Z_][\w.:$]*\s*[:=]\s', Name.Variable.Instance, 'slashstartsregex'), (r'@', Name.Other, 'slashstartsregex'), - (r'@?[$a-zA-Z_][\w$]*', Name.Other, 'slashstartsregex'), + (r'@?[$a-zA-Z_][\w$]*', Name.Other), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), diff --git a/tests/test_javascript.py b/tests/test_javascript.py new file mode 100644 index 00000000..59890659 --- /dev/null +++ b/tests/test_javascript.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- +""" + Javascript tests + ~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import unittest + +from pygments.lexers import CoffeeScriptLexer +from pygments.token import Token + +COFFEE_SLASH_GOLDEN = [ + # input_str, slashes_are_regex_here + (r'/\\/', True), + (r'/\\/i', True), + (r'/\//', True), + (r'/(\s)/', True), + ('/a{2,8}/', True), + ('/b*c?d+/', True), + ('/(capture-match)/', True), + ('/(?:do-not-capture-match)/', True), + ('/this|or|that/', True), + ('/[char-set]/', True), + ('/[^neg-char_st]/', True), + ('/^.*$/', True), + (r'/\n(\f)\0\1\d\b\cm\u1234/', True), + (r'/^.?([^/\\\n\w]*)a\1+$/.something(or_other) # something more complex', True), + ("foo = (str) ->\n /'|\"/.test str", True), + ('a = a / b / c', False), + ('a = a/b/c', False), + ('a = a/b/ c', False), + ('a = a /b/c', False), + ('a = 1 + /d/.test(a)', True), +] + +def test_coffee_slashes(): + for input_str, slashes_are_regex_here in COFFEE_SLASH_GOLDEN: + yield coffee_runner, input_str, slashes_are_regex_here + +def coffee_runner(input_str, slashes_are_regex_here): + lex = CoffeeScriptLexer() + output = list(lex.get_tokens(input_str)) + print(output) + for t, s in output: + if '/' in s: + is_regex = t is Token.String.Regex + assert is_regex == slashes_are_regex_here, (t, s) + +class CoffeeTest(unittest.TestCase): + def setUp(self): + self.lexer = CoffeeScriptLexer() + + def testMixedSlashes(self): + fragment = u'a?/foo/:1/2;\n' + tokens = [ + (Token.Name.Other, u'a'), + (Token.Operator, u'?'), + (Token.Literal.String.Regex, u'/foo/'), + (Token.Operator, u':'), + (Token.Literal.Number.Integer, u'1'), + (Token.Operator, u'/'), + (Token.Literal.Number.Integer, u'2'), + (Token.Punctuation, u';'), + (Token.Text, u'\n'), + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + + def testBewareInfiniteLoop(self): + # This demonstrates the case that "This isn't really guarding" comment + # refers to. + fragment = '/a/x;\n' + tokens = [ + (Token.Text, ''), + (Token.Operator, '/'), + (Token.Name.Other, 'a'), + (Token.Operator, '/'), + (Token.Name.Other, 'x'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) |