diff options
author | Theodore Brown <theodorejb@outlook.com> | 2019-05-02 12:10:19 -0500 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2019-06-14 11:37:04 +0200 |
commit | f74109d9a4b1e4fbaeba4f68c8fc89950e19d265 (patch) | |
tree | 479e45443ebaac440802f843baaac77a4e0aa6a3 | |
parent | ec77cca5f30c77944c5038bb17de7353fb200757 (diff) | |
download | php-git-f74109d9a4b1e4fbaeba4f68c8fc89950e19d265.tar.gz |
Implement numeric literal separators
RFC: https://wiki.php.net/rfc/numeric_literal_separator
-rw-r--r-- | UPGRADING | 9 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_001.phpt | 27 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_002.phpt | 7 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_003.phpt | 7 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_004.phpt | 7 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_005.phpt | 7 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_006.phpt | 7 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_007.phpt | 7 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_008.phpt | 7 | ||||
-rw-r--r-- | Zend/tests/numeric_literal_separator_009.phpt | 7 | ||||
-rw-r--r-- | Zend/zend_language_scanner.l | 133 |
11 files changed, 197 insertions, 28 deletions
@@ -186,6 +186,15 @@ PHP 7.4 UPGRADE NOTES RFC: https://wiki.php.net/rfc/spread_operator_for_array + . Added support for underscore separators in numeric literals. Some examples: + + 6.674_083e-11; // float + 299_792_458; // decimal + 0xCAFE_F00D; // hexadecimal + 0b0101_1111; // binary + + RFC: https://wiki.php.net/rfc/numeric_literal_separator + . Support for WeakReferences has been added. RFC: https://wiki.php.net/rfc/weakrefs diff --git a/Zend/tests/numeric_literal_separator_001.phpt b/Zend/tests/numeric_literal_separator_001.phpt new file mode 100644 index 0000000000..866bd36fa9 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_001.phpt @@ -0,0 +1,27 @@ +--TEST-- +Valid use of numeric literal separator +--FILE-- +<?php +var_dump(299_792_458 === 299792458); +var_dump(135_00 === 13500); +var_dump(96_485.332_12 === 96485.33212); +var_dump(6.626_070_15e-34 === 6.62607015e-34); +var_dump(6.674_083e-11 === 6.674083e-11); +var_dump(0xCAFE_F00D === 0xCAFEF00D); +var_dump(0x54_4A_42 === 0x544A42); +var_dump(0b0101_1111 === 0b01011111); +var_dump(0b01_0000_10 === 0b01000010); +var_dump(0137_041 === 0137041); +var_dump(0_124 === 0124); +--EXPECT-- +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) +bool(true) diff --git a/Zend/tests/numeric_literal_separator_002.phpt b/Zend/tests/numeric_literal_separator_002.phpt new file mode 100644 index 0000000000..984438fc07 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_002.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: trailing underscore +--FILE-- +<?php +100_; +--EXPECTF-- +Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d diff --git a/Zend/tests/numeric_literal_separator_003.phpt b/Zend/tests/numeric_literal_separator_003.phpt new file mode 100644 index 0000000000..e0cd716223 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_003.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: adjacent underscores +--FILE-- +<?php +10__0; +--EXPECTF-- +Parse error: syntax error, unexpected '__0' (T_STRING) in %s on line %d diff --git a/Zend/tests/numeric_literal_separator_004.phpt b/Zend/tests/numeric_literal_separator_004.phpt new file mode 100644 index 0000000000..6db8f8eb86 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_004.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: underscore left of period +--FILE-- +<?php +100_.0; +--EXPECTF-- +Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d diff --git a/Zend/tests/numeric_literal_separator_005.phpt b/Zend/tests/numeric_literal_separator_005.phpt new file mode 100644 index 0000000000..4b454e2d17 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_005.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: underscore right of period +--FILE-- +<?php +100._0; +--EXPECTF-- +Parse error: syntax error, unexpected '_0' (T_STRING) in %s on line %d diff --git a/Zend/tests/numeric_literal_separator_006.phpt b/Zend/tests/numeric_literal_separator_006.phpt new file mode 100644 index 0000000000..14bd290992 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_006.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: underscore next to 0x +--FILE-- +<?php +0x_0123; +--EXPECTF-- +Parse error: syntax error, unexpected 'x_0123' (T_STRING) in %s on line %d diff --git a/Zend/tests/numeric_literal_separator_007.phpt b/Zend/tests/numeric_literal_separator_007.phpt new file mode 100644 index 0000000000..e74c7ed033 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_007.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: underscore next to 0b +--FILE-- +<?php +0b_0101; +--EXPECTF-- +Parse error: syntax error, unexpected 'b_0101' (T_STRING) in %s on line %d diff --git a/Zend/tests/numeric_literal_separator_008.phpt b/Zend/tests/numeric_literal_separator_008.phpt new file mode 100644 index 0000000000..66f1d0ff6a --- /dev/null +++ b/Zend/tests/numeric_literal_separator_008.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: underscore left of e +--FILE-- +<?php +1_e2; +--EXPECTF-- +Parse error: syntax error, unexpected '_e2' (T_STRING) in %s on line %d diff --git a/Zend/tests/numeric_literal_separator_009.phpt b/Zend/tests/numeric_literal_separator_009.phpt new file mode 100644 index 0000000000..c690f66b10 --- /dev/null +++ b/Zend/tests/numeric_literal_separator_009.phpt @@ -0,0 +1,7 @@ +--TEST-- +Invalid use: underscore right of e +--FILE-- +<?php +1e_2; +--EXPECTF-- +Parse error: syntax error, unexpected 'e_2' (T_STRING) in %s on line %d diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 497b02230b..663431708b 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -120,6 +120,21 @@ do { \ BEGIN_EXTERN_C() +static void strip_underscores(char *str, int *len) +{ + char *src = str, *dest = str; + while (*src != '\0') { + if (*src != '_') { + *dest = *src; + dest++; + } else { + --(*len); + } + src++; + } + *dest = '\0'; +} + static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length) { const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding(); @@ -1245,11 +1260,11 @@ restart: /*!re2c re2c:yyfill:check = 0; -LNUM [0-9]+ -DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*) +LNUM [0-9]+(_[0-9]+)* +DNUM ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?) EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) -HNUM "0x"[0-9a-fA-F]+ -BNUM "0b"[01]+ +HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)* +BNUM "0b"[01]+(_[01]+)* LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* @@ -1760,45 +1775,70 @@ NEWLINE ("\r"|"\n"|"\r\n") } <ST_IN_SCRIPTING>{BNUM} { - char *bin = yytext + 2; /* Skip "0b" */ - int len = yyleng - 2; - char *end; + /* The +/- 2 skips "0b" */ + int len = yyleng - 2, contains_underscores; + char *end, *bin = yytext + 2; /* Skip any leading 0s */ - while (*bin == '0') { + while (*bin == '0' || *bin == '_') { ++bin; --len; } + contains_underscores = (memchr(bin, '_', len) != NULL); + + if (contains_underscores) { + bin = estrndup(bin, len); + strip_underscores(bin, &len); + } + if (len < SIZEOF_ZEND_LONG * 8) { if (len == 0) { ZVAL_LONG(zendlval, 0); } else { errno = 0; ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2)); - ZEND_ASSERT(!errno && end == yytext + yyleng); + ZEND_ASSERT(!errno && end == bin + len); + } + if (contains_underscores) { + efree(bin); } RETURN_TOKEN_WITH_VAL(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ - ZEND_ASSERT(end == yytext + yyleng); + ZEND_ASSERT(end == bin + len); + if (contains_underscores) { + efree(bin); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); } } <ST_IN_SCRIPTING>{LNUM} { - char *end; - if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ + int len = yyleng, contains_underscores; + char *end, *lnum = yytext; + + contains_underscores = (memchr(lnum, '_', len) != NULL); + + if (contains_underscores) { + lnum = estrndup(lnum, len); + strip_underscores(lnum, &len); + } + + if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */ errno = 0; /* base must be passed explicitly for correct parse error on Windows */ - ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10)); + ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10)); /* This isn't an assert, we need to ensure 019 isn't valid octal * Because the lexing itself doesn't do that for us */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } if (PARSER_MODE()) { RETURN_TOKEN(T_ERROR); } @@ -1806,29 +1846,38 @@ NEWLINE ("\r"|"\n"|"\r\n") } } else { errno = 0; - ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0)); + ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0)); if (errno == ERANGE) { /* Overflow */ errno = 0; - if (yytext[0] == '0') { /* octal overflow */ - ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end)); + if (lnum[0] == '0') { /* octal overflow */ + ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end)); } else { - ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end)); + ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end)); } /* Also not an assert for the same reason */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } if (PARSER_MODE()) { RETURN_TOKEN(T_ERROR); } } + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); } /* Also not an assert for the same reason */ - if (end != yytext + yyleng) { + if (end != lnum + len) { zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0); ZVAL_UNDEF(zendlval); + if (contains_underscores) { + efree(lnum); + } if (PARSER_MODE()) { RETURN_TOKEN(T_ERROR); } @@ -1836,18 +1885,28 @@ NEWLINE ("\r"|"\n"|"\r\n") } } ZEND_ASSERT(!errno); + if (contains_underscores) { + efree(lnum); + } RETURN_TOKEN_WITH_VAL(T_LNUMBER); } <ST_IN_SCRIPTING>{HNUM} { - char *hex = yytext + 2; /* Skip "0x" */ - int len = yyleng - 2; - char *end; + /* The +/- 2 skips "0x" */ + int len = yyleng - 2, contains_underscores; + char *end, *hex = yytext + 2; /* Skip any leading 0s */ - while (*hex == '0') { - hex++; - len--; + while (*hex == '0' || *hex == '_') { + ++hex; + --len; + } + + contains_underscores = (memchr(hex, '_', len) != NULL); + + if (contains_underscores) { + hex = estrndup(hex, len); + strip_underscores(hex, &len); } if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) { @@ -1858,11 +1917,17 @@ NEWLINE ("\r"|"\n"|"\r\n") ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16)); ZEND_ASSERT(!errno && end == hex + len); } + if (contains_underscores) { + efree(hex); + } RETURN_TOKEN_WITH_VAL(T_LNUMBER); } else { ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ ZEND_ASSERT(end == hex + len); + if (contains_underscores) { + efree(hex); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); } } @@ -1894,10 +1959,22 @@ string: <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} { const char *end; + int len = yyleng, contains_underscores; + char *dnum = yytext; - ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end)); + contains_underscores = (memchr(dnum, '_', len) != NULL); + + if (contains_underscores) { + dnum = estrndup(dnum, len); + strip_underscores(dnum, &len); + } + + ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end)); /* errno isn't checked since we allow HUGE_VAL/INF overflow */ - ZEND_ASSERT(end == yytext + yyleng); + ZEND_ASSERT(end == dnum + len); + if (contains_underscores) { + efree(dnum); + } RETURN_TOKEN_WITH_VAL(T_DNUMBER); } |