summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTheodore Brown <theodorejb@outlook.com>2019-05-02 12:10:19 -0500
committerNikita Popov <nikita.ppv@gmail.com>2019-06-14 11:37:04 +0200
commitf74109d9a4b1e4fbaeba4f68c8fc89950e19d265 (patch)
tree479e45443ebaac440802f843baaac77a4e0aa6a3
parentec77cca5f30c77944c5038bb17de7353fb200757 (diff)
downloadphp-git-f74109d9a4b1e4fbaeba4f68c8fc89950e19d265.tar.gz
Implement numeric literal separators
RFC: https://wiki.php.net/rfc/numeric_literal_separator
-rw-r--r--UPGRADING9
-rw-r--r--Zend/tests/numeric_literal_separator_001.phpt27
-rw-r--r--Zend/tests/numeric_literal_separator_002.phpt7
-rw-r--r--Zend/tests/numeric_literal_separator_003.phpt7
-rw-r--r--Zend/tests/numeric_literal_separator_004.phpt7
-rw-r--r--Zend/tests/numeric_literal_separator_005.phpt7
-rw-r--r--Zend/tests/numeric_literal_separator_006.phpt7
-rw-r--r--Zend/tests/numeric_literal_separator_007.phpt7
-rw-r--r--Zend/tests/numeric_literal_separator_008.phpt7
-rw-r--r--Zend/tests/numeric_literal_separator_009.phpt7
-rw-r--r--Zend/zend_language_scanner.l133
11 files changed, 197 insertions, 28 deletions
diff --git a/UPGRADING b/UPGRADING
index 66be319cb9..1eaf8fc3d4 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -186,6 +186,15 @@ PHP 7.4 UPGRADE NOTES
RFC: https://wiki.php.net/rfc/spread_operator_for_array
+ . Added support for underscore separators in numeric literals. Some examples:
+
+ 6.674_083e-11; // float
+ 299_792_458; // decimal
+ 0xCAFE_F00D; // hexadecimal
+ 0b0101_1111; // binary
+
+ RFC: https://wiki.php.net/rfc/numeric_literal_separator
+
. Support for WeakReferences has been added.
RFC: https://wiki.php.net/rfc/weakrefs
diff --git a/Zend/tests/numeric_literal_separator_001.phpt b/Zend/tests/numeric_literal_separator_001.phpt
new file mode 100644
index 0000000000..866bd36fa9
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_001.phpt
@@ -0,0 +1,27 @@
+--TEST--
+Valid use of numeric literal separator
+--FILE--
+<?php
+var_dump(299_792_458 === 299792458);
+var_dump(135_00 === 13500);
+var_dump(96_485.332_12 === 96485.33212);
+var_dump(6.626_070_15e-34 === 6.62607015e-34);
+var_dump(6.674_083e-11 === 6.674083e-11);
+var_dump(0xCAFE_F00D === 0xCAFEF00D);
+var_dump(0x54_4A_42 === 0x544A42);
+var_dump(0b0101_1111 === 0b01011111);
+var_dump(0b01_0000_10 === 0b01000010);
+var_dump(0137_041 === 0137041);
+var_dump(0_124 === 0124);
+--EXPECT--
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
diff --git a/Zend/tests/numeric_literal_separator_002.phpt b/Zend/tests/numeric_literal_separator_002.phpt
new file mode 100644
index 0000000000..984438fc07
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_002.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: trailing underscore
+--FILE--
+<?php
+100_;
+--EXPECTF--
+Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_003.phpt b/Zend/tests/numeric_literal_separator_003.phpt
new file mode 100644
index 0000000000..e0cd716223
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_003.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: adjacent underscores
+--FILE--
+<?php
+10__0;
+--EXPECTF--
+Parse error: syntax error, unexpected '__0' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_004.phpt b/Zend/tests/numeric_literal_separator_004.phpt
new file mode 100644
index 0000000000..6db8f8eb86
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_004.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore left of period
+--FILE--
+<?php
+100_.0;
+--EXPECTF--
+Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_005.phpt b/Zend/tests/numeric_literal_separator_005.phpt
new file mode 100644
index 0000000000..4b454e2d17
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_005.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore right of period
+--FILE--
+<?php
+100._0;
+--EXPECTF--
+Parse error: syntax error, unexpected '_0' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_006.phpt b/Zend/tests/numeric_literal_separator_006.phpt
new file mode 100644
index 0000000000..14bd290992
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_006.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore next to 0x
+--FILE--
+<?php
+0x_0123;
+--EXPECTF--
+Parse error: syntax error, unexpected 'x_0123' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_007.phpt b/Zend/tests/numeric_literal_separator_007.phpt
new file mode 100644
index 0000000000..e74c7ed033
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_007.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore next to 0b
+--FILE--
+<?php
+0b_0101;
+--EXPECTF--
+Parse error: syntax error, unexpected 'b_0101' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_008.phpt b/Zend/tests/numeric_literal_separator_008.phpt
new file mode 100644
index 0000000000..66f1d0ff6a
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_008.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore left of e
+--FILE--
+<?php
+1_e2;
+--EXPECTF--
+Parse error: syntax error, unexpected '_e2' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_009.phpt b/Zend/tests/numeric_literal_separator_009.phpt
new file mode 100644
index 0000000000..c690f66b10
--- /dev/null
+++ b/Zend/tests/numeric_literal_separator_009.phpt
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore right of e
+--FILE--
+<?php
+1e_2;
+--EXPECTF--
+Parse error: syntax error, unexpected 'e_2' (T_STRING) in %s on line %d
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index 497b02230b..663431708b 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -120,6 +120,21 @@ do { \
BEGIN_EXTERN_C()
+static void strip_underscores(char *str, int *len)
+{
+ char *src = str, *dest = str;
+ while (*src != '\0') {
+ if (*src != '_') {
+ *dest = *src;
+ dest++;
+ } else {
+ --(*len);
+ }
+ src++;
+ }
+ *dest = '\0';
+}
+
static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
{
const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
@@ -1245,11 +1260,11 @@ restart:
/*!re2c
re2c:yyfill:check = 0;
-LNUM [0-9]+
-DNUM ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
+LNUM [0-9]+(_[0-9]+)*
+DNUM ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?)
EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM})
-HNUM "0x"[0-9a-fA-F]+
-BNUM "0b"[01]+
+HNUM "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
+BNUM "0b"[01]+(_[01]+)*
LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
WHITESPACE [ \n\r\t]+
TABS_AND_SPACES [ \t]*
@@ -1760,45 +1775,70 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>{BNUM} {
- char *bin = yytext + 2; /* Skip "0b" */
- int len = yyleng - 2;
- char *end;
+ /* The +/- 2 skips "0b" */
+ int len = yyleng - 2, contains_underscores;
+ char *end, *bin = yytext + 2;
/* Skip any leading 0s */
- while (*bin == '0') {
+ while (*bin == '0' || *bin == '_') {
++bin;
--len;
}
+ contains_underscores = (memchr(bin, '_', len) != NULL);
+
+ if (contains_underscores) {
+ bin = estrndup(bin, len);
+ strip_underscores(bin, &len);
+ }
+
if (len < SIZEOF_ZEND_LONG * 8) {
if (len == 0) {
ZVAL_LONG(zendlval, 0);
} else {
errno = 0;
ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
- ZEND_ASSERT(!errno && end == yytext + yyleng);
+ ZEND_ASSERT(!errno && end == bin + len);
+ }
+ if (contains_underscores) {
+ efree(bin);
}
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
- ZEND_ASSERT(end == yytext + yyleng);
+ ZEND_ASSERT(end == bin + len);
+ if (contains_underscores) {
+ efree(bin);
+ }
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
}
<ST_IN_SCRIPTING>{LNUM} {
- char *end;
- if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
+ int len = yyleng, contains_underscores;
+ char *end, *lnum = yytext;
+
+ contains_underscores = (memchr(lnum, '_', len) != NULL);
+
+ if (contains_underscores) {
+ lnum = estrndup(lnum, len);
+ strip_underscores(lnum, &len);
+ }
+
+ if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
errno = 0;
/* base must be passed explicitly for correct parse error on Windows */
- ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
+ ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10));
/* This isn't an assert, we need to ensure 019 isn't valid octal
* Because the lexing itself doesn't do that for us
*/
- if (end != yytext + yyleng) {
+ if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
+ if (contains_underscores) {
+ efree(lnum);
+ }
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
@@ -1806,29 +1846,38 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
} else {
errno = 0;
- ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
+ ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
if (errno == ERANGE) { /* Overflow */
errno = 0;
- if (yytext[0] == '0') { /* octal overflow */
- ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
+ if (lnum[0] == '0') { /* octal overflow */
+ ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
} else {
- ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
+ ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
}
/* Also not an assert for the same reason */
- if (end != yytext + yyleng) {
+ if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error,
"Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
+ if (contains_underscores) {
+ efree(lnum);
+ }
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
}
+ if (contains_underscores) {
+ efree(lnum);
+ }
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
/* Also not an assert for the same reason */
- if (end != yytext + yyleng) {
+ if (end != lnum + len) {
zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
ZVAL_UNDEF(zendlval);
+ if (contains_underscores) {
+ efree(lnum);
+ }
if (PARSER_MODE()) {
RETURN_TOKEN(T_ERROR);
}
@@ -1836,18 +1885,28 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
}
ZEND_ASSERT(!errno);
+ if (contains_underscores) {
+ efree(lnum);
+ }
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
}
<ST_IN_SCRIPTING>{HNUM} {
- char *hex = yytext + 2; /* Skip "0x" */
- int len = yyleng - 2;
- char *end;
+ /* The +/- 2 skips "0x" */
+ int len = yyleng - 2, contains_underscores;
+ char *end, *hex = yytext + 2;
/* Skip any leading 0s */
- while (*hex == '0') {
- hex++;
- len--;
+ while (*hex == '0' || *hex == '_') {
+ ++hex;
+ --len;
+ }
+
+ contains_underscores = (memchr(hex, '_', len) != NULL);
+
+ if (contains_underscores) {
+ hex = estrndup(hex, len);
+ strip_underscores(hex, &len);
}
if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
@@ -1858,11 +1917,17 @@ NEWLINE ("\r"|"\n"|"\r\n")
ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
ZEND_ASSERT(!errno && end == hex + len);
}
+ if (contains_underscores) {
+ efree(hex);
+ }
RETURN_TOKEN_WITH_VAL(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == hex + len);
+ if (contains_underscores) {
+ efree(hex);
+ }
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}
}
@@ -1894,10 +1959,22 @@ string:
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
const char *end;
+ int len = yyleng, contains_underscores;
+ char *dnum = yytext;
- ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
+ contains_underscores = (memchr(dnum, '_', len) != NULL);
+
+ if (contains_underscores) {
+ dnum = estrndup(dnum, len);
+ strip_underscores(dnum, &len);
+ }
+
+ ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
- ZEND_ASSERT(end == yytext + yyleng);
+ ZEND_ASSERT(end == dnum + len);
+ if (contains_underscores) {
+ efree(dnum);
+ }
RETURN_TOKEN_WITH_VAL(T_DNUMBER);
}