summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2016-03-19 11:01:33 +0100
committerStefan Behnel <stefan_ml@behnel.de>2016-03-19 11:01:33 +0100
commit0c1a8b4d079f6602a8da2c23abbadb0a8c59e5f7 (patch)
tree36a18fe7bbe69db6f60ced73517208c9e7ce7011
parent87dc808908793f1dc4d397ef8679a2506abb68c3 (diff)
downloadcython-0c1a8b4d079f6602a8da2c23abbadb0a8c59e5f7.tar.gz
Implement PEP 515: allow underscores in number literals
https://www.python.org/dev/peps/pep-0515/ https://bugs.python.org/issue26331 Practically accepted as of 2016-03-19: http://thread.gmane.org/gmane.comp.python.devel/156533/focus=156734
-rw-r--r--Cython/Compiler/Lexicon.py22
-rw-r--r--Cython/Compiler/Scanning.py10
-rw-r--r--Cython/Compiler/Tests/TestGrammar.py87
-rw-r--r--tests/run/int_literals.pyx33
4 files changed, 141 insertions, 11 deletions
diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py
index 6d7339842..2cc0a2e1f 100644
--- a/Cython/Compiler/Lexicon.py
+++ b/Cython/Compiler/Lexicon.py
@@ -24,21 +24,25 @@ def make_lexicon():
bindigit = Any("01")
octdigit = Any("01234567")
hexdigit = Any("0123456789ABCDEFabcdef")
+ allow_ = Rep(Str("_"))
indentation = Bol + Rep(Any(" \t"))
- decimal = Rep1(digit)
+ def underscore_digits(d):
+ return d + Rep(Str("_") | d)
+
+ decimal = underscore_digits(digit)
dot = Str(".")
- exponent = Any("Ee") + Opt(Any("+-")) + decimal
+ exponent = allow_ + Any("Ee") + Opt(Any("+-")) + decimal
decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
name = letter + Rep(letter | digit)
- intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) |
- (Any("Oo") + Rep1(octdigit)) |
- (Any("Bb") + Rep1(bindigit)) ))
+ intconst = decimal | (Str("0") + ((Any("Xx") + allow_ + underscore_digits(hexdigit)) |
+ (Any("Oo") + allow_ + underscore_digits(octdigit)) |
+ (Any("Bb") + allow_ + underscore_digits(bindigit)) ))
intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
intliteral = intconst + intsuffix
fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
- imagconst = (intconst | fltconst) + Any("jJ")
+ imagconst = (intconst | fltconst) + allow_ + Any("jJ")
beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) |
Any(raw_prefixes) + Opt(Any(bytes_prefixes)) |
@@ -67,9 +71,9 @@ def make_lexicon():
return Lexicon([
(name, IDENT),
- (intliteral, 'INT'),
- (fltconst, 'FLOAT'),
- (imagconst, 'IMAG'),
+ (intliteral, Method('strip_underscores', symbol='INT')),
+ (fltconst, Method('strip_underscores', symbol='FLOAT')),
+ (imagconst, Method('strip_underscores', symbol='IMAG')),
(punct | diphthong, TEXT),
(bra, Method('open_bracket_action')),
diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py
index 83cdb4c71..a233504d6 100644
--- a/Cython/Compiler/Scanning.py
+++ b/Cython/Compiler/Scanning.py
@@ -53,12 +53,15 @@ pyx_reserved_words = py_reserved_words + [
class Method(object):
- def __init__(self, name):
+ def __init__(self, name, **kwargs):
self.name = name
+ self.kwargs = kwargs or None
self.__name__ = name # for Plex tracing
def __call__(self, stream, text):
- return getattr(stream, self.name)(text)
+ method = getattr(stream, self.name)
+ # self.kwargs is almost always unused => avoid call overhead
+ return method(text, **self.kwargs) if self.kwargs is not None else method(text)
#------------------------------------------------------------------
@@ -340,6 +343,9 @@ class PyrexScanner(Scanner):
if self.parse_comments:
self.produce('commentline', text)
+ def strip_underscores(self, text, symbol):
+ self.produce(symbol, text.replace('_', ''))
+
def current_level(self):
return self.indentation_stack[-1]
diff --git a/Cython/Compiler/Tests/TestGrammar.py b/Cython/Compiler/Tests/TestGrammar.py
new file mode 100644
index 000000000..b3212cd82
--- /dev/null
+++ b/Cython/Compiler/Tests/TestGrammar.py
@@ -0,0 +1,87 @@
+# mode: run
+# tag: syntax
+
+"""
+Uses TreeFragment to test invalid syntax.
+"""
+
+from __future__ import absolute_import
+
+from ...TestUtils import CythonTest
+from ..Errors import CompileError
+
+
+VALID_UNDERSCORE_LITERALS = [
+ # Copied from CPython's test_grammar.py
+ '0_0_0',
+ '4_2',
+ '4_______2',
+ '1_0000_0000',
+ '0b_1001_0100',
+ '0x_ffff_ffff',
+ '0o_5_7_7',
+ '1__.4',
+ '42_j',
+ '1.4_j',
+ '1.4e5_j',
+ '1_00_00_.5',
+ '1_e10',
+ '1_E10',
+ '1_e1_0',
+ '.1_4',
+ '0_',
+ '42_',
+ '0b1_',
+ '0xf_',
+ '0o5_',
+]
+
+INVALID_UNDERSCORE_LITERALS = [
+ # Copied from CPython's test_grammar.py
+ # Trailing underscores:
+ # Underscores in the base selector:
+ '0_b0',
+ '0_xf',
+ '0_o5',
+ # Old-style octal, still disallowed:
+ #'0_7',
+ #'09_99',
+ # Underscore after non-digit:
+ '1.4j_',
+ '1.4e_1',
+ '.1_4e_1',
+ '1.0e+_1',
+ '1._4',
+ '1._4j',
+ '1._4e5_j',
+ '._5',
+]
+
+
+class TestGrammar(CythonTest):
+
+ def test_invalid_number_literals(self):
+ for literal in INVALID_UNDERSCORE_LITERALS:
+ for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']:
+ code = 'x = ' + expression % literal
+ try:
+ self.fragment(u'''\
+ # cython: language_level=3
+ ''' + code)
+ except CompileError as exc:
+ assert code in [s.strip() for s in str(exc).splitlines()], str(exc)
+ else:
+ assert False, "Invalid Cython code '%s' failed to raise an exception" % code
+
+ def test_valid_number_literals(self):
+ for literal in VALID_UNDERSCORE_LITERALS:
+ for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']:
+ code = 'x = ' + expression % literal
+ assert self.fragment(u'''\
+ # cython: language_level=3
+ ''' + code) is not None
+
+
+if __name__ == "__main__":
+ import unittest
+ unittest.main()
diff --git a/tests/run/int_literals.pyx b/tests/run/int_literals.pyx
index 0d6860bc2..014c48d57 100644
--- a/tests/run/int_literals.pyx
+++ b/tests/run/int_literals.pyx
@@ -1,9 +1,42 @@
+# mode: run
+# tag: syntax
+
+from __future__ import absolute_import
+
cimport cython
from cython cimport typeof
import sys
+def valid_underscore_literals():
+ """
+ >>> valid_underscore_literals()
+ """
+ # Copied from CPython's test_grammar.py
+ assert 0_0_0 == 0
+ assert 4_2 == 42
+ assert 4_______2 == 42
+ assert 1_0000_0000 == 100000000
+ assert 0b_1001_0100 == 0b10010100
+ assert 0x_ffff_ffff == 0xffffffff
+ assert 0o_5_7_7 == 0o577
+ assert 1__.4 == 1.4
+ assert 42_j == 42j
+ assert 1.4_j == 1.4j
+ assert 1.4e5_j == 1.4e5j
+ assert 1_00_00_.5 == 10000.5
+ assert 1_e10 == 1e10
+ assert 1_E10 == 1E10
+ assert 1_e1_0 == 1e10
+ assert .1_4 == .14
+ assert 0_ == 0
+ assert 42_ == 42
+ assert 0b1_ == 0b1
+ assert 0xf_ == 0xf
+ assert 0o5_ == 0o5
+
+
@cython.test_assert_path_exists(
'//IntNode[@longness = "LL"]',
'//IntNode[@longness = "L"]',