diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2016-03-19 11:01:33 +0100 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2016-03-19 11:01:33 +0100 |
commit | 0c1a8b4d079f6602a8da2c23abbadb0a8c59e5f7 (patch) | |
tree | 36a18fe7bbe69db6f60ced73517208c9e7ce7011 | |
parent | 87dc808908793f1dc4d397ef8679a2506abb68c3 (diff) | |
download | cython-0c1a8b4d079f6602a8da2c23abbadb0a8c59e5f7.tar.gz |
Implement PEP 515: allow underscores in number literals
https://www.python.org/dev/peps/pep-0515/
https://bugs.python.org/issue26331
Practically accepted as of 2016-03-19:
http://thread.gmane.org/gmane.comp.python.devel/156533/focus=156734
-rw-r--r-- | Cython/Compiler/Lexicon.py | 22 | ||||
-rw-r--r-- | Cython/Compiler/Scanning.py | 10 | ||||
-rw-r--r-- | Cython/Compiler/Tests/TestGrammar.py | 87 | ||||
-rw-r--r-- | tests/run/int_literals.pyx | 33 |
4 files changed, 141 insertions, 11 deletions
diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py index 6d7339842..2cc0a2e1f 100644 --- a/Cython/Compiler/Lexicon.py +++ b/Cython/Compiler/Lexicon.py @@ -24,21 +24,25 @@ def make_lexicon(): bindigit = Any("01") octdigit = Any("01234567") hexdigit = Any("0123456789ABCDEFabcdef") + allow_ = Rep(Str("_")) indentation = Bol + Rep(Any(" \t")) - decimal = Rep1(digit) + def underscore_digits(d): + return d + Rep(Str("_") | d) + + decimal = underscore_digits(digit) dot = Str(".") - exponent = Any("Ee") + Opt(Any("+-")) + decimal + exponent = allow_ + Any("Ee") + Opt(Any("+-")) + decimal decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) name = letter + Rep(letter | digit) - intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) | - (Any("Oo") + Rep1(octdigit)) | - (Any("Bb") + Rep1(bindigit)) )) + intconst = decimal | (Str("0") + ((Any("Xx") + allow_ + underscore_digits(hexdigit)) | + (Any("Oo") + allow_ + underscore_digits(octdigit)) | + (Any("Bb") + allow_ + underscore_digits(bindigit)) )) intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) intliteral = intconst + intsuffix fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) - imagconst = (intconst | fltconst) + Any("jJ") + imagconst = (intconst | fltconst) + allow_ + Any("jJ") beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) | Any(raw_prefixes) + Opt(Any(bytes_prefixes)) | @@ -67,9 +71,9 @@ def make_lexicon(): return Lexicon([ (name, IDENT), - (intliteral, 'INT'), - (fltconst, 'FLOAT'), - (imagconst, 'IMAG'), + (intliteral, Method('strip_underscores', symbol='INT')), + (fltconst, Method('strip_underscores', symbol='FLOAT')), + (imagconst, Method('strip_underscores', symbol='IMAG')), (punct | diphthong, TEXT), (bra, Method('open_bracket_action')), diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py index 83cdb4c71..a233504d6 100644 --- a/Cython/Compiler/Scanning.py +++ b/Cython/Compiler/Scanning.py @@ -53,12 +53,15 @@ pyx_reserved_words = py_reserved_words + [ class Method(object): - def __init__(self, name): + def __init__(self, name, **kwargs): self.name = name + self.kwargs = kwargs or None self.__name__ = name # for Plex tracing def __call__(self, stream, text): - return getattr(stream, self.name)(text) + method = getattr(stream, self.name) + # self.kwargs is almost always unused => avoid call overhead + return method(text, **self.kwargs) if self.kwargs is not None else method(text) #------------------------------------------------------------------ @@ -340,6 +343,9 @@ class PyrexScanner(Scanner): if self.parse_comments: self.produce('commentline', text) + def strip_underscores(self, text, symbol): + self.produce(symbol, text.replace('_', '')) + def current_level(self): return self.indentation_stack[-1] diff --git a/Cython/Compiler/Tests/TestGrammar.py b/Cython/Compiler/Tests/TestGrammar.py new file mode 100644 index 000000000..b3212cd82 --- /dev/null +++ b/Cython/Compiler/Tests/TestGrammar.py @@ -0,0 +1,87 @@ +# mode: run +# tag: syntax + +""" +Uses TreeFragment to test invalid syntax. +""" + +from __future__ import absolute_import + +from ...TestUtils import CythonTest +from ..Errors import CompileError + + +VALID_UNDERSCORE_LITERALS = [ + # Copied from CPython's test_grammar.py + '0_0_0', + '4_2', + '4_______2', + '1_0000_0000', + '0b_1001_0100', + '0x_ffff_ffff', + '0o_5_7_7', + '1__.4', + '42_j', + '1.4_j', + '1.4e5_j', + '1_00_00_.5', + '1_e10', + '1_E10', + '1_e1_0', + '.1_4', + '0_', + '42_', + '0b1_', + '0xf_', + '0o5_', +] + +INVALID_UNDERSCORE_LITERALS = [ + # Copied from CPython's test_grammar.py + # Trailing underscores: + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + #'0_7', + #'09_99', + # Underscore after non-digit: + '1.4j_', + '1.4e_1', + '.1_4e_1', + '1.0e+_1', + '1._4', + '1._4j', + '1._4e5_j', + '._5', +] + + +class TestGrammar(CythonTest): + + def test_invalid_number_literals(self): + for literal in INVALID_UNDERSCORE_LITERALS: + for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']: + code = 'x = ' + expression % literal + try: + self.fragment(u'''\ + # cython: language_level=3 + ''' + code) + except CompileError as exc: + assert code in [s.strip() for s in str(exc).splitlines()], str(exc) + else: + assert False, "Invalid Cython code '%s' failed to raise an exception" % code + + def test_valid_number_literals(self): + for literal in VALID_UNDERSCORE_LITERALS: + for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']: + code = 'x = ' + expression % literal + assert self.fragment(u'''\ + # cython: language_level=3 + ''' + code) is not None + + +if __name__ == "__main__": + import unittest + unittest.main() diff --git a/tests/run/int_literals.pyx b/tests/run/int_literals.pyx index 0d6860bc2..014c48d57 100644 --- a/tests/run/int_literals.pyx +++ b/tests/run/int_literals.pyx @@ -1,9 +1,42 @@ +# mode: run +# tag: syntax + +from __future__ import absolute_import + cimport cython from cython cimport typeof import sys +def valid_underscore_literals(): + """ + >>> valid_underscore_literals() + """ + # Copied from CPython's test_grammar.py + assert 0_0_0 == 0 + assert 4_2 == 42 + assert 4_______2 == 42 + assert 1_0000_0000 == 100000000 + assert 0b_1001_0100 == 0b10010100 + assert 0x_ffff_ffff == 0xffffffff + assert 0o_5_7_7 == 0o577 + assert 1__.4 == 1.4 + assert 42_j == 42j + assert 1.4_j == 1.4j + assert 1.4e5_j == 1.4e5j + assert 1_00_00_.5 == 10000.5 + assert 1_e10 == 1e10 + assert 1_E10 == 1E10 + assert 1_e1_0 == 1e10 + assert .1_4 == .14 + assert 0_ == 0 + assert 42_ == 42 + assert 0b1_ == 0b1 + assert 0xf_ == 0xf + assert 0o5_ == 0o5 + + @cython.test_assert_path_exists( '//IntNode[@longness = "LL"]', '//IntNode[@longness = "L"]', |