summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKurt McKee <contactme@kurtmckee.org>2020-09-06 07:53:35 -0500
committerGitHub <noreply@github.com>2020-09-06 14:53:35 +0200
commit5ec283a3592dfdef7aff34ab00ca8685c4d37470 (patch)
treedc2f7a3013dcda614bf63e7c9c643924d4f9c4e2
parent43c280b18596bf3f8905232083f1239aca6ef9fd (diff)
downloadpygments-git-5ec283a3592dfdef7aff34ab00ca8685c4d37470.tar.gz
Overhaul Javascript numeric literals (#1534)
* Rename the "Javascript" tests to reflect that they are for CoffeeScript This change also modifies the module docstring to reflect the file's purpose. * Overhaul the Javascript numeric literal parsing Fixes #307 This patch contains the following changes: * Adds 50+ unit tests for Javascript numeric literals * Forces ASCII numbers for float literals (so, now reject `.рнк`) * Adds support for Javascript's BigInt notation (`100n`) * Adds support for leading-zero-only octal notation (`0777`) * Adds support for scientific notation with no significand (`1e10`) Numeric literal parsing is based on information at: * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Grammar_and_types * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Data_structures
-rw-r--r--pygments/lexers/javascript.py13
-rw-r--r--tests/test_coffeescript.py85
-rw-r--r--tests/test_javascript.py139
3 files changed, 162 insertions, 75 deletions
diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py
index 335af320..14b51ebb 100644
--- a/pygments/lexers/javascript.py
+++ b/pygments/lexers/javascript.py
@@ -64,11 +64,14 @@ class JavascriptLexer(RegexLexer):
(r'\A#! ?/.*?\n', Comment.Hashbang), # recognized by node.js
(r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
include('commentsandwhitespace'),
- (r'(\.\d+|[0-9]+\.[0-9]*)([eE][-+]?[0-9]+)?', Number.Float),
- (r'0[bB][01]+', Number.Bin),
- (r'0[oO][0-7]+', Number.Oct),
- (r'0[xX][0-9a-fA-F]+', Number.Hex),
- (r'[0-9]+', Number.Integer),
+
+ # Numeric literals
+ (r'0[bB][01]+n?', Number.Bin),
+ (r'0[oO]?[0-7]+n?', Number.Oct), # Browsers support "0o7" and "07" notations
+ (r'0[xX][0-9a-fA-F]+n?', Number.Hex),
+ (r'[0-9]+n', Number.Integer), # Javascript BigInt requires an "n" postfix
+ (r'(\.[0-9]+|[0-9]+\.[0-9]*|[0-9]+)([eE][-+]?[0-9]+)?', Number.Float),
+
(r'\.\.\.|=>', Punctuation),
(r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
diff --git a/tests/test_coffeescript.py b/tests/test_coffeescript.py
new file mode 100644
index 00000000..41ca8e0d
--- /dev/null
+++ b/tests/test_coffeescript.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+"""
+ CoffeeScript tests
+ ~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.lexers import CoffeeScriptLexer
+from pygments.token import Token
+
+COFFEE_SLASH_GOLDEN = [
+ # input_str, slashes_are_regex_here
+ (r'/\\/', True),
+ (r'/\\/i', True),
+ (r'/\//', True),
+ (r'/(\s)/', True),
+ ('/a{2,8}/', True),
+ ('/b*c?d+/', True),
+ ('/(capture-match)/', True),
+ ('/(?:do-not-capture-match)/', True),
+ ('/this|or|that/', True),
+ ('/[char-set]/', True),
+ ('/[^neg-char_st]/', True),
+ ('/^.*$/', True),
+ (r'/\n(\f)\0\1\d\b\cm\u1234/', True),
+ (r'/^.?([^/\\\n\w]*)a\1+$/.something(or_other) # something more complex', True),
+ ("foo = (str) ->\n /'|\"/.test str", True),
+ ('a = a / b / c', False),
+ ('a = a/b/c', False),
+ ('a = a/b/ c', False),
+ ('a = a /b/c', False),
+ ('a = 1 + /d/.test(a)', True),
+]
+
+
+@pytest.fixture(scope='module')
+def lexer():
+ yield CoffeeScriptLexer()
+
+
+@pytest.mark.parametrize('golden', COFFEE_SLASH_GOLDEN)
+def test_coffee_slashes(lexer, golden):
+ input_str, slashes_are_regex_here = golden
+ output = list(lexer.get_tokens(input_str))
+ print(output)
+ for t, s in output:
+ if '/' in s:
+ is_regex = t is Token.String.Regex
+ assert is_regex == slashes_are_regex_here, (t, s)
+
+
+def test_mixed_slashes(lexer):
+ fragment = u'a?/foo/:1/2;\n'
+ tokens = [
+ (Token.Name.Other, u'a'),
+ (Token.Operator, u'?'),
+ (Token.Literal.String.Regex, u'/foo/'),
+ (Token.Operator, u':'),
+ (Token.Literal.Number.Integer, u'1'),
+ (Token.Operator, u'/'),
+ (Token.Literal.Number.Integer, u'2'),
+ (Token.Punctuation, u';'),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_beware_infinite_loop(lexer):
+ # This demonstrates the case that "This isn't really guarding" comment
+ # refers to.
+ fragment = '/a/x;\n'
+ tokens = [
+ (Token.Text, ''),
+ (Token.Operator, '/'),
+ (Token.Name.Other, 'a'),
+ (Token.Operator, '/'),
+ (Token.Name.Other, 'x'),
+ (Token.Punctuation, ';'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
diff --git a/tests/test_javascript.py b/tests/test_javascript.py
index 100e1f22..78350612 100644
--- a/tests/test_javascript.py
+++ b/tests/test_javascript.py
@@ -9,77 +9,76 @@
import pytest
-from pygments.lexers import CoffeeScriptLexer
-from pygments.token import Token
-
-COFFEE_SLASH_GOLDEN = [
- # input_str, slashes_are_regex_here
- (r'/\\/', True),
- (r'/\\/i', True),
- (r'/\//', True),
- (r'/(\s)/', True),
- ('/a{2,8}/', True),
- ('/b*c?d+/', True),
- ('/(capture-match)/', True),
- ('/(?:do-not-capture-match)/', True),
- ('/this|or|that/', True),
- ('/[char-set]/', True),
- ('/[^neg-char_st]/', True),
- ('/^.*$/', True),
- (r'/\n(\f)\0\1\d\b\cm\u1234/', True),
- (r'/^.?([^/\\\n\w]*)a\1+$/.something(or_other) # something more complex', True),
- ("foo = (str) ->\n /'|\"/.test str", True),
- ('a = a / b / c', False),
- ('a = a/b/c', False),
- ('a = a/b/ c', False),
- ('a = a /b/c', False),
- ('a = 1 + /d/.test(a)', True),
-]
+from pygments.lexers.javascript import JavascriptLexer
+from pygments.token import Number
@pytest.fixture(scope='module')
def lexer():
- yield CoffeeScriptLexer()
-
-
-@pytest.mark.parametrize('golden', COFFEE_SLASH_GOLDEN)
-def test_coffee_slashes(lexer, golden):
- input_str, slashes_are_regex_here = golden
- output = list(lexer.get_tokens(input_str))
- print(output)
- for t, s in output:
- if '/' in s:
- is_regex = t is Token.String.Regex
- assert is_regex == slashes_are_regex_here, (t, s)
-
-
-def test_mixed_slashes(lexer):
- fragment = u'a?/foo/:1/2;\n'
- tokens = [
- (Token.Name.Other, u'a'),
- (Token.Operator, u'?'),
- (Token.Literal.String.Regex, u'/foo/'),
- (Token.Operator, u':'),
- (Token.Literal.Number.Integer, u'1'),
- (Token.Operator, u'/'),
- (Token.Literal.Number.Integer, u'2'),
- (Token.Punctuation, u';'),
- (Token.Text, u'\n'),
- ]
- assert list(lexer.get_tokens(fragment)) == tokens
-
-
-def test_beware_infinite_loop(lexer):
- # This demonstrates the case that "This isn't really guarding" comment
- # refers to.
- fragment = '/a/x;\n'
- tokens = [
- (Token.Text, ''),
- (Token.Operator, '/'),
- (Token.Name.Other, 'a'),
- (Token.Operator, '/'),
- (Token.Name.Other, 'x'),
- (Token.Punctuation, ';'),
- (Token.Text, '\n'),
- ]
- assert list(lexer.get_tokens(fragment)) == tokens
+ yield JavascriptLexer()
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ '1', '1.', '.1', '1.1', '1e1', '1E1', '1e+1', '1E-1', '1.e1', '.1e1',
+ '0888', # octal prefix with non-octal numbers
+ )
+)
+def test_float_literal_positive_matches(lexer, text):
+ """Test literals that should be tokenized as float literals."""
+ assert list(lexer.get_tokens(text))[0] == (Number.Float, text)
+
+
+@pytest.mark.parametrize('text', ('.\u0b6a', '.', '1..', '1n', '1ee', '1e', '1e-', '1e--1', '1e++1', '1e1.0'))
+def test_float_literals_negative_matches(lexer, text):
+ """Test text that should **not** be tokenized as float literals."""
+ assert list(lexer.get_tokens(text))[0] != (Number.Float, text)
+
+
+@pytest.mark.parametrize('text', ('0n', '123n'))
+def test_integer_literal_positive_matches(lexer, text):
+ """Test literals that should be tokenized as integer literals."""
+ assert list(lexer.get_tokens(text))[0] == (Number.Integer, text)
+
+
+@pytest.mark.parametrize('text', ('1N', '1', '1.0'))
+def test_integer_literals_negative_matches(lexer, text):
+ """Test text that should **not** be tokenized as integer literals."""
+ assert list(lexer.get_tokens(text))[0] != (Number.Integer, text)
+
+
+@pytest.mark.parametrize('text', ('0b01', '0B10n'))
+def test_binary_literal_positive_matches(lexer, text):
+ """Test literals that should be tokenized as binary literals."""
+ assert list(lexer.get_tokens(text))[0] == (Number.Bin, text)
+
+
+@pytest.mark.parametrize('text', ('0b0N', '0b', '0bb', '0b2'))
+def test_binary_literals_negative_matches(lexer, text):
+ """Test text that should **not** be tokenized as binary literals."""
+ assert list(lexer.get_tokens(text))[0] != (Number.Bin, text)
+
+
+@pytest.mark.parametrize('text', ('017', '071n', '0o11', '0O77n'))
+def test_octal_literal_positive_matches(lexer, text):
+ """Test literals that should be tokenized as octal literals."""
+ assert list(lexer.get_tokens(text))[0] == (Number.Oct, text)
+
+
+@pytest.mark.parametrize('text', ('01N', '089', '098n', '0o', '0OO', '0o88', '0O88n'))
+def test_octal_literals_negative_matches(lexer, text):
+ """Test text that should **not** be tokenized as octal literals."""
+ assert list(lexer.get_tokens(text))[0] != (Number.Oct, text)
+
+
+@pytest.mark.parametrize('text', ('0x01', '0Xefn', '0x0EF'))
+def test_hexadecimal_literal_positive_matches(lexer, text):
+ """Test literals that should be tokenized as hexadecimal literals."""
+ assert list(lexer.get_tokens(text))[0] == (Number.Hex, text)
+
+
+@pytest.mark.parametrize('text', ('0x0N', '0x', '0Xx', '0xg', '0xhn'))
+def test_hexadecimal_literals_negative_matches(lexer, text):
+ """Test text that should **not** be tokenized as hexadecimal literals."""
+ assert list(lexer.get_tokens(text))[0] != (Number.Hex, text)