diff options
author | Kurt McKee <contactme@kurtmckee.org> | 2020-09-06 07:53:35 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-09-06 14:53:35 +0200 |
commit | 5ec283a3592dfdef7aff34ab00ca8685c4d37470 (patch) | |
tree | dc2f7a3013dcda614bf63e7c9c643924d4f9c4e2 | |
parent | 43c280b18596bf3f8905232083f1239aca6ef9fd (diff) | |
download | pygments-git-5ec283a3592dfdef7aff34ab00ca8685c4d37470.tar.gz |
Overhaul Javascript numeric literals (#1534)
* Rename the "Javascript" tests to reflect that they are for CoffeeScript
This change also modifies the module docstring to reflect the file's purpose.
* Overhaul the Javascript numeric literal parsing
Fixes #307
This patch contains the following changes:
* Adds 50+ unit tests for Javascript numeric literals
* Forces ASCII numbers for float literals (so, now reject `.рнк`)
* Adds support for Javascript's BigInt notation (`100n`)
* Adds support for leading-zero-only octal notation (`0777`)
* Adds support for scientific notation with no significand (`1e10`)
Numeric literal parsing is based on information at:
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Grammar_and_types
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures
-rw-r--r-- | pygments/lexers/javascript.py | 13 | ||||
-rw-r--r-- | tests/test_coffeescript.py | 85 | ||||
-rw-r--r-- | tests/test_javascript.py | 139 |
3 files changed, 162 insertions, 75 deletions
diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index 335af320..14b51ebb 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -64,11 +64,14 @@ class JavascriptLexer(RegexLexer): (r'\A#! ?/.*?\n', Comment.Hashbang), # recognized by node.js (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'), include('commentsandwhitespace'), - (r'(\.\d+|[0-9]+\.[0-9]*)([eE][-+]?[0-9]+)?', Number.Float), - (r'0[bB][01]+', Number.Bin), - (r'0[oO][0-7]+', Number.Oct), - (r'0[xX][0-9a-fA-F]+', Number.Hex), - (r'[0-9]+', Number.Integer), + + # Numeric literals + (r'0[bB][01]+n?', Number.Bin), + (r'0[oO]?[0-7]+n?', Number.Oct), # Browsers support "0o7" and "07" notations + (r'0[xX][0-9a-fA-F]+n?', Number.Hex), + (r'[0-9]+n', Number.Integer), # Javascript BigInt requires an "n" postfix + (r'(\.[0-9]+|[0-9]+\.[0-9]*|[0-9]+)([eE][-+]?[0-9]+)?', Number.Float), + (r'\.\.\.|=>', Punctuation), (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|' r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'), diff --git a/tests/test_coffeescript.py b/tests/test_coffeescript.py new file mode 100644 index 00000000..41ca8e0d --- /dev/null +++ b/tests/test_coffeescript.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" + CoffeeScript tests + ~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import pytest + +from pygments.lexers import CoffeeScriptLexer +from pygments.token import Token + +COFFEE_SLASH_GOLDEN = [ + # input_str, slashes_are_regex_here + (r'/\\/', True), + (r'/\\/i', True), + (r'/\//', True), + (r'/(\s)/', True), + ('/a{2,8}/', True), + ('/b*c?d+/', True), + ('/(capture-match)/', True), + ('/(?:do-not-capture-match)/', True), + ('/this|or|that/', True), + ('/[char-set]/', True), + ('/[^neg-char_st]/', True), + ('/^.*$/', True), + (r'/\n(\f)\0\1\d\b\cm\u1234/', True), + (r'/^.?([^/\\\n\w]*)a\1+$/.something(or_other) # something more complex', True), + ("foo = (str) ->\n /'|\"/.test str", True), + ('a = a / b / c', False), + ('a = a/b/c', False), + ('a = a/b/ c', False), + ('a = a /b/c', False), + ('a = 1 + /d/.test(a)', True), +] + + +@pytest.fixture(scope='module') +def lexer(): + yield CoffeeScriptLexer() + + +@pytest.mark.parametrize('golden', COFFEE_SLASH_GOLDEN) +def test_coffee_slashes(lexer, golden): + input_str, slashes_are_regex_here = golden + output = list(lexer.get_tokens(input_str)) + print(output) + for t, s in output: + if '/' in s: + is_regex = t is Token.String.Regex + assert is_regex == slashes_are_regex_here, (t, s) + + +def test_mixed_slashes(lexer): + fragment = u'a?/foo/:1/2;\n' + tokens = [ + (Token.Name.Other, u'a'), + (Token.Operator, u'?'), + (Token.Literal.String.Regex, u'/foo/'), + (Token.Operator, u':'), + (Token.Literal.Number.Integer, u'1'), + (Token.Operator, u'/'), + (Token.Literal.Number.Integer, u'2'), + (Token.Punctuation, u';'), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_beware_infinite_loop(lexer): + # This demonstrates the case that "This isn't really guarding" comment + # refers to. + fragment = '/a/x;\n' + tokens = [ + (Token.Text, ''), + (Token.Operator, '/'), + (Token.Name.Other, 'a'), + (Token.Operator, '/'), + (Token.Name.Other, 'x'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens diff --git a/tests/test_javascript.py b/tests/test_javascript.py index 100e1f22..78350612 100644 --- a/tests/test_javascript.py +++ b/tests/test_javascript.py @@ -9,77 +9,76 @@ import pytest -from pygments.lexers import CoffeeScriptLexer -from pygments.token import Token - -COFFEE_SLASH_GOLDEN = [ - # input_str, slashes_are_regex_here - (r'/\\/', True), - (r'/\\/i', True), - (r'/\//', True), - (r'/(\s)/', True), - ('/a{2,8}/', True), - ('/b*c?d+/', True), - ('/(capture-match)/', True), - ('/(?:do-not-capture-match)/', True), - ('/this|or|that/', True), - ('/[char-set]/', True), - ('/[^neg-char_st]/', True), - ('/^.*$/', True), - (r'/\n(\f)\0\1\d\b\cm\u1234/', True), - (r'/^.?([^/\\\n\w]*)a\1+$/.something(or_other) # something more complex', True), - ("foo = (str) ->\n /'|\"/.test str", True), - ('a = a / b / c', False), - ('a = a/b/c', False), - ('a = a/b/ c', False), - ('a = a /b/c', False), - ('a = 1 + /d/.test(a)', True), -] +from pygments.lexers.javascript import JavascriptLexer +from pygments.token import Number @pytest.fixture(scope='module') def lexer(): - yield CoffeeScriptLexer() - - -@pytest.mark.parametrize('golden', COFFEE_SLASH_GOLDEN) -def test_coffee_slashes(lexer, golden): - input_str, slashes_are_regex_here = golden - output = list(lexer.get_tokens(input_str)) - print(output) - for t, s in output: - if '/' in s: - is_regex = t is Token.String.Regex - assert is_regex == slashes_are_regex_here, (t, s) - - -def test_mixed_slashes(lexer): - fragment = u'a?/foo/:1/2;\n' - tokens = [ - (Token.Name.Other, u'a'), - (Token.Operator, u'?'), - (Token.Literal.String.Regex, u'/foo/'), - (Token.Operator, u':'), - (Token.Literal.Number.Integer, u'1'), - (Token.Operator, u'/'), - (Token.Literal.Number.Integer, u'2'), - (Token.Punctuation, u';'), - (Token.Text, u'\n'), - ] - assert list(lexer.get_tokens(fragment)) == tokens - - -def test_beware_infinite_loop(lexer): - # This demonstrates the case that "This isn't really guarding" comment - # refers to. - fragment = '/a/x;\n' - tokens = [ - (Token.Text, ''), - (Token.Operator, '/'), - (Token.Name.Other, 'a'), - (Token.Operator, '/'), - (Token.Name.Other, 'x'), - (Token.Punctuation, ';'), - (Token.Text, '\n'), - ] - assert list(lexer.get_tokens(fragment)) == tokens + yield JavascriptLexer() + + +@pytest.mark.parametrize( + 'text', + ( + '1', '1.', '.1', '1.1', '1e1', '1E1', '1e+1', '1E-1', '1.e1', '.1e1', + '0888', # octal prefix with non-octal numbers + ) +) +def test_float_literal_positive_matches(lexer, text): + """Test literals that should be tokenized as float literals.""" + assert list(lexer.get_tokens(text))[0] == (Number.Float, text) + + +@pytest.mark.parametrize('text', ('.\u0b6a', '.', '1..', '1n', '1ee', '1e', '1e-', '1e--1', '1e++1', '1e1.0')) +def test_float_literals_negative_matches(lexer, text): + """Test text that should **not** be tokenized as float literals.""" + assert list(lexer.get_tokens(text))[0] != (Number.Float, text) + + +@pytest.mark.parametrize('text', ('0n', '123n')) +def test_integer_literal_positive_matches(lexer, text): + """Test literals that should be tokenized as integer literals.""" + assert list(lexer.get_tokens(text))[0] == (Number.Integer, text) + + +@pytest.mark.parametrize('text', ('1N', '1', '1.0')) +def test_integer_literals_negative_matches(lexer, text): + """Test text that should **not** be tokenized as integer literals.""" + assert list(lexer.get_tokens(text))[0] != (Number.Integer, text) + + +@pytest.mark.parametrize('text', ('0b01', '0B10n')) +def test_binary_literal_positive_matches(lexer, text): + """Test literals that should be tokenized as binary literals.""" + assert list(lexer.get_tokens(text))[0] == (Number.Bin, text) + + +@pytest.mark.parametrize('text', ('0b0N', '0b', '0bb', '0b2')) +def test_binary_literals_negative_matches(lexer, text): + """Test text that should **not** be tokenized as binary literals.""" + assert list(lexer.get_tokens(text))[0] != (Number.Bin, text) + + +@pytest.mark.parametrize('text', ('017', '071n', '0o11', '0O77n')) +def test_octal_literal_positive_matches(lexer, text): + """Test literals that should be tokenized as octal literals.""" + assert list(lexer.get_tokens(text))[0] == (Number.Oct, text) + + +@pytest.mark.parametrize('text', ('01N', '089', '098n', '0o', '0OO', '0o88', '0O88n')) +def test_octal_literals_negative_matches(lexer, text): + """Test text that should **not** be tokenized as octal literals.""" + assert list(lexer.get_tokens(text))[0] != (Number.Oct, text) + + +@pytest.mark.parametrize('text', ('0x01', '0Xefn', '0x0EF')) +def test_hexadecimal_literal_positive_matches(lexer, text): + """Test literals that should be tokenized as hexadecimal literals.""" + assert list(lexer.get_tokens(text))[0] == (Number.Hex, text) + + +@pytest.mark.parametrize('text', ('0x0N', '0x', '0Xx', '0xg', '0xhn')) +def test_hexadecimal_literals_negative_matches(lexer, text): + """Test text that should **not** be tokenized as hexadecimal literals.""" + assert list(lexer.get_tokens(text))[0] != (Number.Hex, text) |