summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHugo van der Sanden <hv@crypt.org>2015-02-23 16:48:15 +0000
committerHugo van der Sanden <hv@crypt.org>2015-03-09 22:15:46 +0000
commit22ff313068aa37b1a24855e760e71ee9a20a1a90 (patch)
tree09da195258e4a85e42a39dd24d7c60c849409ef1
parent35cd12d12a5a5777098caf722f8748b39c3be45f (diff)
downloadperl-22ff313068aa37b1a24855e760e71ee9a20a1a90.tar.gz
[perl #123814] replace grok_atou with grok_atoUV
Some questions and loose ends: XXX gv.c:S_gv_magicalize - why are we using SSize_t for paren? XXX mg.c:Perl_magic_set - need appopriate error handling for $) XXX regcomp.c:S_reg - need to check if we do the right thing if parno was not grokked Perl_get_debug_opts should probably return something unsigned; not sure if that's something we can change.
-rw-r--r--doio.c8
-rw-r--r--embed.fnc2
-rw-r--r--embed.h2
-rw-r--r--ext/DynaLoader/dlutils.c10
-rw-r--r--ext/XS-APItest/APItest.pm2
-rw-r--r--ext/XS-APItest/numeric.xs12
-rw-r--r--ext/XS-APItest/t/grok.t126
-rw-r--r--gv.c12
-rw-r--r--locale.c5
-rw-r--r--malloc.c4
-rw-r--r--mg.c15
-rw-r--r--numeric.c59
-rw-r--r--perl.c34
-rw-r--r--pod/perlclib.pod21
-rw-r--r--pod/perlhacktips.pod4
-rw-r--r--pod/perllocale.pod7
-rw-r--r--pp_sys.c5
-rw-r--r--proto.h9
-rw-r--r--regcomp.c45
-rw-r--r--toke.c5
-rw-r--r--utf8.c12
-rw-r--r--util.c61
22 files changed, 269 insertions, 191 deletions
diff --git a/doio.c b/doio.c
index a63f2a229b..218887d69d 100644
--- a/doio.c
+++ b/doio.c
@@ -377,6 +377,7 @@ Perl_do_open6(pTHX_ GV *gv, const char *oname, STRLEN len,
else {
PerlIO *that_fp = NULL;
int wanted_fd;
+ UV uv;
if (num_svs > 1) {
/* diag_listed_as: More than one argument to '%s' open */
Perl_croak(aTHX_ "More than one argument to '%c&' open",IoTYPE(io));
@@ -390,8 +391,11 @@ Perl_do_open6(pTHX_ GV *gv, const char *oname, STRLEN len,
wanted_fd = SvUV(*svp);
num_svs = 0;
}
- else if (isDIGIT(*type)) {
- wanted_fd = grok_atou(type, NULL);
+ else if (isDIGIT(*type)
+ && grok_atoUV(type, &uv, NULL)
+ && uv <= INT_MAX
+ ) {
+ wanted_fd = (int)uv;
}
else {
const IO* thatio;
diff --git a/embed.fnc b/embed.fnc
index eecbbd5601..128d4bd809 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -818,7 +818,7 @@ Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep
Apd |int |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV *valuep|U32 flags
ApdR |bool |grok_numeric_radix|NN const char **sp|NN const char *send
Apd |UV |grok_oct |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
-Apdn |UV |grok_atou |NN const char* pv|NULLOK const char** endptr
+Apdn |bool |grok_atoUV |NN const char* pv|NN UV* valptr|NULLOK const char** endptr
: These are all indirectly referenced by globals.c. This is somewhat annoying.
p |int |magic_clearenv |NN SV* sv|NN MAGIC* mg
p |int |magic_clear_all_env|NN SV* sv|NN MAGIC* mg
diff --git a/embed.h b/embed.h
index e6c665dc38..02a4aceec4 100644
--- a/embed.h
+++ b/embed.h
@@ -178,7 +178,7 @@
#define getcwd_sv(a) Perl_getcwd_sv(aTHX_ a)
#define gp_free(a) Perl_gp_free(aTHX_ a)
#define gp_ref(a) Perl_gp_ref(aTHX_ a)
-#define grok_atou Perl_grok_atou
+#define grok_atoUV Perl_grok_atoUV
#define grok_bin(a,b,c,d) Perl_grok_bin(aTHX_ a,b,c,d)
#define grok_hex(a,b,c,d) Perl_grok_hex(aTHX_ a,b,c,d)
#define grok_infnan(a,b) Perl_grok_infnan(aTHX_ a,b)
diff --git a/ext/DynaLoader/dlutils.c b/ext/DynaLoader/dlutils.c
index 96ea8befa5..fca8e787eb 100644
--- a/ext/DynaLoader/dlutils.c
+++ b/ext/DynaLoader/dlutils.c
@@ -100,6 +100,7 @@ dl_generic_private_init(pTHX) /* called by dl_*.xs dl_private_init() */
{
#if defined(PERL_IN_DL_HPUX_XS) || defined(PERL_IN_DL_DLOPEN_XS)
char *perl_dl_nonlazy;
+ UV uv;
#endif
MY_CXT_INIT;
@@ -115,9 +116,12 @@ dl_generic_private_init(pTHX) /* called by dl_*.xs dl_private_init() */
#endif
#if defined(PERL_IN_DL_HPUX_XS) || defined(PERL_IN_DL_DLOPEN_XS)
- if ( (perl_dl_nonlazy = getenv("PERL_DL_NONLAZY")) != NULL )
- dl_nonlazy = grok_atou(perl_dl_nonlazy, NULL);
- else
+ if ( (perl_dl_nonlazy = getenv("PERL_DL_NONLAZY")) != NULL
+ && grok_atoUV(perl_dl_nonlazy, &uv, NULL)
+ && uv <= INT_MAX
+ ) {
+ dl_nonlazy = (int)uv;
+ } else
dl_nonlazy = 0;
if (dl_nonlazy)
DLDEBUG(1,PerlIO_printf(Perl_debug_log, "DynaLoader bind mode is 'non-lazy'\n"));
diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm
index a759492f0a..e4b7156496 100644
--- a/ext/XS-APItest/APItest.pm
+++ b/ext/XS-APItest/APItest.pm
@@ -5,7 +5,7 @@ use strict;
use warnings;
use Carp;
-our $VERSION = '0.70';
+our $VERSION = '0.71';
require XSLoader;
diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs
index 6d1ef82340..0ce9e080ff 100644
--- a/ext/XS-APItest/numeric.xs
+++ b/ext/XS-APItest/numeric.xs
@@ -32,22 +32,24 @@ grok_number_flags(number, flags)
PUSHs(sv_2mortal(newSVuv(value)));
void
-grok_atou(number, endsv)
+grok_atoUV(number, endsv)
SV *number
SV *endsv
PREINIT:
STRLEN len;
const char *pv = SvPV(number, len);
- UV result;
+ UV value = 0xdeadbeef;
+ bool result;
const char* endptr;
PPCODE:
EXTEND(SP,2);
if (endsv == &PL_sv_undef) {
- result = grok_atou(pv, NULL);
+ result = grok_atoUV(pv, &value, NULL);
} else {
- result = grok_atou(pv, &endptr);
+ result = grok_atoUV(pv, &value, &endptr);
}
- PUSHs(sv_2mortal(newSVuv(result)));
+ PUSHs(result ? &PL_sv_yes : &PL_sv_no);
+ PUSHs(sv_2mortal(newSVuv(value)));
if (endsv == &PL_sv_undef) {
PUSHs(sv_2mortal(newSVpvn(NULL, 0)));
} else {
diff --git a/ext/XS-APItest/t/grok.t b/ext/XS-APItest/t/grok.t
index f66717bb7c..c3169ce7cc 100644
--- a/ext/XS-APItest/t/grok.t
+++ b/ext/XS-APItest/t/grok.t
@@ -137,26 +137,26 @@ my @atous =
[ "12x", "x", 12, 2 ],
# Leading whitespace is failure.
- [ " 0", " 0", 0, 0 ],
- [ " 1", " 1", 0, 0 ],
- [ " 12", " 12", 0, 0 ],
+ [ " 0", undef, 0, 0 ],
+ [ " 1", undef, 0, 0 ],
+ [ " 12", undef, 0, 0 ],
# Leading garbage is outright failure.
- [ "x0", "x0", 0, 0 ],
- [ "x1", "x1", 0, 0 ],
- [ "x12", "x12", 0, 0 ],
+ [ "x0", undef, 0, 0 ],
+ [ "x1", undef, 0, 0 ],
+ [ "x12", undef, 0, 0 ],
# We do not parse decimal point.
- [ "12.3", ".3", 12, 2 ],
+ [ "12.3", ".3", 12, 2 ],
# Leading pluses or minuses are no good.
- [ "+12", "+12", 0, 0 ],
- [ "-12", "-12", 0, 0 ],
+ [ "+12", undef, 0, 0 ],
+ [ "-12", undef, 0, 0 ],
- # Extra leading zeros cause overflow.
- [ "00", "00", $ATOU_MAX, 0 ],
- [ "01", "01", $ATOU_MAX, 0 ],
- [ "012", "012", $ATOU_MAX, 0 ],
+ # Extra leading zeros are no good.
+ [ "00", undef, $ATOU_MAX, 0 ],
+ [ "01", undef, $ATOU_MAX, 0 ],
+ [ "012", undef, $ATOU_MAX, 0 ],
);
# Values near overflow point.
@@ -173,83 +173,93 @@ if ($Config{uvsize} == 8) {
# This is well within 64-bit.
[ "9999999999", "", 9999999999, 10, ],
- # Values valid up to 64-bit and beyond.
+ # Values valid up to 64-bit, failing beyond.
[ "18446744073709551613", "", 18446744073709551613, 20, ],
[ "18446744073709551614", "", 18446744073709551614, 20, ],
[ "18446744073709551615", "", $ATOU_MAX, 20, ],
- [ "18446744073709551616", "", $ATOU_MAX, 0, ],
- [ "18446744073709551617", "", $ATOU_MAX, 0, ],
+ [ "18446744073709551616", undef, $ATOU_MAX, 0, ],
+ [ "18446744073709551617", undef, $ATOU_MAX, 0, ],
);
} elsif ($Config{uvsize} == 4) {
push @atous,
(
- # Values valid up to 32-bit and beyond.
+ # Values valid up to 32-bit, failing beyond.
[ "4294967293", "", 4294967293, 10, ],
[ "4294967294", "", 4294967294, 10, ],
[ "4294967295", "", $ATOU_MAX, 10, ],
- [ "4294967296", "", $ATOU_MAX, 0, ],
- [ "4294967297", "", $ATOU_MAX, 0, ],
+ [ "4294967296", undef, $ATOU_MAX, 0, ],
+ [ "4294967297", undef, $ATOU_MAX, 0, ],
# Still beyond 32-bit.
- [ "4999999999", "", $ATOU_MAX, 0, ],
- [ "5678901234", "", $ATOU_MAX, 0, ],
- [ "6789012345", "", $ATOU_MAX, 0, ],
- [ "7890123456", "", $ATOU_MAX, 0, ],
- [ "8901234567", "", $ATOU_MAX, 0, ],
- [ "9012345678", "", $ATOU_MAX, 0, ],
- [ "9999999999", "", $ATOU_MAX, 0, ],
- [ "10000000000", "", $ATOU_MAX, 0, ],
- [ "12345678901", "", $ATOU_MAX, 0, ],
+ [ "4999999999", undef, $ATOU_MAX, 0, ],
+ [ "5678901234", undef, $ATOU_MAX, 0, ],
+ [ "6789012345", undef, $ATOU_MAX, 0, ],
+ [ "7890123456", undef, $ATOU_MAX, 0, ],
+ [ "8901234567", undef, $ATOU_MAX, 0, ],
+ [ "9012345678", undef, $ATOU_MAX, 0, ],
+ [ "9999999999", undef, $ATOU_MAX, 0, ],
+ [ "10000000000", undef, $ATOU_MAX, 0, ],
+ [ "12345678901", undef, $ATOU_MAX, 0, ],
# 64-bit values are way beyond.
- [ "18446744073709551613", "", $ATOU_MAX, 0, ],
- [ "18446744073709551614", "", $ATOU_MAX, 0, ],
- [ "18446744073709551615", "", $ATOU_MAX, 0, ],
- [ "18446744073709551616", "", $ATOU_MAX, 0, ],
- [ "18446744073709551617", "", $ATOU_MAX, 0, ],
+ [ "18446744073709551613", undef, $ATOU_MAX, 0, ],
+ [ "18446744073709551614", undef, $ATOU_MAX, 0, ],
+ [ "18446744073709551615", undef, $ATOU_MAX, 0, ],
+ [ "18446744073709551616", undef, $ATOU_MAX, 0, ],
+ [ "18446744073709551617", undef, $ATOU_MAX, 0, ],
);
}
# These will fail to fail once 128/256-bit systems arrive.
push @atous,
(
- [ "23456789012345678901", "", $ATOU_MAX, 0 ],
- [ "34567890123456789012", "", $ATOU_MAX, 0 ],
- [ "98765432109876543210", "", $ATOU_MAX, 0 ],
- [ "98765432109876543211", "", $ATOU_MAX, 0 ],
- [ "99999999999999999999", "", $ATOU_MAX, 0 ],
+ [ "23456789012345678901", undef, $ATOU_MAX, 0 ],
+ [ "34567890123456789012", undef, $ATOU_MAX, 0 ],
+ [ "98765432109876543210", undef, $ATOU_MAX, 0 ],
+ [ "98765432109876543211", undef, $ATOU_MAX, 0 ],
+ [ "99999999999999999999", undef, $ATOU_MAX, 0 ],
);
for my $grok (@atous) {
my $input = $grok->[0];
my $endsv = $grok->[1];
+ my $expect_ok = defined $endsv;
+ my $strict_ok = $expect_ok && $endsv eq '';
- my ($out_uv, $out_len);
+ my ($ok, $out_uv, $out_len);
# First with endsv.
- ($out_uv, $out_len) = grok_atou($input, $endsv);
- is($out_uv, $grok->[2],
- "'$input' $endsv - number success (got $out_uv cf $grok->[2])");
- ok($grok->[3] <= length $input, "'$input' $endsv - length sanity 1");
- unless (length $grok->[1]) {
- is($out_len, $grok->[3], "'$input' $endsv - length sanity 2");
- } # else { ... } ?
- if ($out_len) {
- is($endsv, substr($input, $out_len),
- "'$input' $endsv - length sanity 3");
+ ($ok, $out_uv, $out_len) = grok_atoUV($input, $endsv);
+ is($expect_ok, $ok, sprintf "'$input' expected %s, got %s",
+ ($expect_ok ? 'success' : 'failure'),
+ ($ok ? 'success' : 'failure'),
+ );
+ if ($expect_ok) {
+ is($expect_ok, $ok, "'$input' expect success");
+ is($out_uv, $grok->[2],
+ "'$input' $endsv - number success (got $out_uv cf $grok->[2])");
+ ok($grok->[3] <= length $input, "'$input' $endsv - length sanity 1");
+ unless (length $grok->[1]) {
+ is($out_len, $grok->[3], "'$input' $endsv - length sanity 2");
+ } # else { ... } ?
+ if ($out_len) {
+ is($endsv, substr($input, $out_len),
+ "'$input' $endsv - length sanity 3");
+ }
+ } else {
+ is($expect_ok, $ok, "'$input' expect failure");
+ is(0xdeadbeef, $out_uv, "'$input' on failure expect value unchanged");
}
# Then without endsv (undef == NULL).
- ($out_uv, $out_len) = grok_atou($input, undef);
- if (length $grok->[1]) {
- if ($grok->[2] == $ATOU_MAX) {
- is($out_uv, $ATOU_MAX, "'$input' undef - number overflow");
- } else {
- is($out_uv, 0, "'$input' undef - number zero");
- }
- } else {
+ ($ok, $out_uv, $out_len) = grok_atoUV($input, undef);
+ if ($strict_ok) {
+ is($strict_ok, $ok, "'$input' expect strict success");
is($out_uv, $grok->[2],
- "'$input' undef - number success (got $out_uv cf $grok->[2])");
+ "'$input' $endsv - strict number success (got $out_uv cf $grok->[2])");
+ } else {
+ is($strict_ok, $ok, "'$input' expect strict failure");
+ is(0xdeadbeef, $out_uv, "'$input' on strict failure expect value unchanged");
}
}
diff --git a/gv.c b/gv.c
index 2eb18e4f39..63bdc569b1 100644
--- a/gv.c
+++ b/gv.c
@@ -1984,13 +1984,11 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len,
{
/* Ensures that we have an all-digit variable, ${"1foo"} fails
this test */
- /* This snippet is taken from is_gv_magical */
- const char *end = name + len;
- while (--end > name) {
- if (!isDIGIT(*end))
- return addmg;
- }
- paren = grok_atou(name, NULL);
+ UV uv;
+ if (!grok_atoUV(name, &uv, NULL) || uv > I32_MAX)
+ return addmg;
+ /* XXX why are we using a SSize_t? */
+ paren = (SSize_t)(I32)uv;
goto storeparen;
}
}
diff --git a/locale.c b/locale.c
index 4f0f4476b5..a1fe449b60 100644
--- a/locale.c
+++ b/locale.c
@@ -675,7 +675,10 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
const bool locwarn = (printwarn > 1
|| (printwarn
&& (! bad_lang_use_once
- || grok_atou(bad_lang_use_once, NULL))));
+ || (
+ /* disallow with "" or "0" */
+ *bad_lang_use_once
+ && strNE("0", bad_lang_use_once)))));
bool done = FALSE;
#ifdef WIN32
/* In some systems you can find out the system default locale
diff --git a/malloc.c b/malloc.c
index 58bec6486f..a797e7e86b 100644
--- a/malloc.c
+++ b/malloc.c
@@ -1824,7 +1824,7 @@ Perl_mfree(Malloc_t where)
if (bad_free_warn == -1) {
dTHX;
char *pbf = PerlEnv_getenv("PERL_BADFREE");
- bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1;
+ bad_free_warn = (pbf) ? strNE("0", pbf) : 1;
}
if (!bad_free_warn)
return;
@@ -1922,7 +1922,7 @@ Perl_realloc(void *mp, size_t nbytes)
if (bad_free_warn == -1) {
dTHX;
char *pbf = PerlEnv_getenv("PERL_BADFREE");
- bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1;
+ bad_free_warn = (pbf) ? strNE("0", pbf) : 1;
}
if (!bad_free_warn)
return NULL;
diff --git a/mg.c b/mg.c
index d2a8db008a..2ed1764a06 100644
--- a/mg.c
+++ b/mg.c
@@ -3020,6 +3020,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
const char *p = SvPV_const(sv, len);
Groups_t *gary = NULL;
const char* endptr;
+ UV uv;
#ifdef _SC_NGROUPS_MAX
int maxgrp = sysconf(_SC_NGROUPS_MAX);
@@ -3031,7 +3032,12 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
while (isSPACE(*p))
++p;
- new_egid = (Gid_t)grok_atou(p, &endptr);
+ if (grok_atoUV(p, &uv, &endptr))
+ new_egid = (Gid_t)uv;
+ else {
+ new_egid = 0; /* XXX is this safe? */
+ endptr = NULL;
+ }
for (i = 0; i < maxgrp; ++i) {
if (endptr == NULL)
break;
@@ -3044,7 +3050,12 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
Newx(gary, i + 1, Groups_t);
else
Renew(gary, i + 1, Groups_t);
- gary[i] = (Groups_t)grok_atou(p, &endptr);
+ if (grok_atoUV(p, &uv, &endptr))
+ gary[i] = (Groups_t)uv;
+ else {
+ gary[i] = 0; /* XXX is this safe? */
+ endptr = NULL;
+ }
}
if (i)
PERL_UNUSED_RESULT(setgroups(i, gary));
diff --git a/numeric.c b/numeric.c
index 388f363237..41f620c897 100644
--- a/numeric.c
+++ b/numeric.c
@@ -1033,11 +1033,9 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
}
/*
-=for apidoc grok_atou
+=for apidoc grok_atoUV
-grok_atou is a safer replacement for atoi and strtol.
-
-grok_atou parses a C-style zero-byte terminated string, looking for
+grok_atoUV parses a C-style zero-byte terminated string, looking for
a decimal unsigned integer.
Returns the unsigned integer, if a valid value can be parsed
@@ -1045,23 +1043,17 @@ from the beginning of the string.
Accepts only the decimal digits '0'..'9'.
-As opposed to atoi or strtol, grok_atou does NOT allow optional
+As opposed to atoi or strtol, grok_atoUV does NOT allow optional
leading whitespace, or negative inputs. If such features are
required, the calling code needs to explicitly implement those.
-If a valid value cannot be parsed, returns either zero (if non-digits
-are met before any digits) or UV_MAX (if the value overflows).
-
-Note that extraneous leading zeros also count as an overflow
-(meaning that only "0" is the zero).
-
-On failure, the *endptr is also set to NULL, unless endptr is NULL.
+Returns true if a valid value could be parsed. In that case, valptr
+is set to the parsed value, and endptr (if provided) is set to point
+to the character after the last digit.
-Trailing non-digit bytes are allowed if the endptr is non-NULL.
-On return the *endptr will contain the pointer to the first non-digit byte.
-
-If the endptr is NULL, the first non-digit byte MUST be
-the zero byte terminating the pv, or zero will be returned.
+Returns false otherwise. This can happen if a) there is a leading zero
+followed by another digit; b) the digits would overflow a UV; or c)
+there are trailing non-digits AND endptr is not provided.
Background: atoi has severe problems with illegal inputs, it cannot be
used for incremental parsing, and therefore should be avoided
@@ -1071,26 +1063,24 @@ seen as a bug (global state controlled by user environment).
=cut
*/
-UV
-Perl_grok_atou(const char *pv, const char** endptr)
+bool
+Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr)
{
const char* s = pv;
const char** eptr;
const char* end2; /* Used in case endptr is NULL. */
- UV val = 0; /* The return value. */
+ UV val = 0; /* The parsed value. */
- PERL_ARGS_ASSERT_GROK_ATOU;
+ PERL_ARGS_ASSERT_GROK_ATOUV;
eptr = endptr ? endptr : &end2;
if (isDIGIT(*s)) {
/* Single-digit inputs are quite common. */
val = *s++ - '0';
if (isDIGIT(*s)) {
- /* Extra leading zeros cause overflow. */
- if (val == 0) {
- *eptr = NULL;
- return UV_MAX;
- }
+ /* Fail on extra leading zeros. */
+ if (val == 0)
+ return FALSE;
while (isDIGIT(*s)) {
/* This could be unrolled like in grok_number(), but
* the expected uses of this are not speed-needy, and
@@ -1100,21 +1090,18 @@ Perl_grok_atou(const char *pv, const char** endptr)
(val == uv_max_div_10 && digit <= uv_max_mod_10)) {
val = val * 10 + digit;
} else {
- *eptr = NULL;
- return UV_MAX;
+ return FALSE;
}
}
}
}
- if (s == pv) {
- *eptr = NULL; /* If no progress, failed to parse anything. */
- return 0;
- }
- if (endptr == NULL && *s) {
- return 0; /* If endptr is NULL, no trailing non-digits allowed. */
- }
+ if (s == pv)
+ return FALSE;
+ if (endptr == NULL && *s)
+ return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */
*eptr = s;
- return val;
+ *valptr = val;
+ return TRUE;
}
#ifndef USE_QUADMATH
diff --git a/perl.c b/perl.c
index 3153608d71..230244baaf 100644
--- a/perl.c
+++ b/perl.c
@@ -550,7 +550,11 @@ perl_destruct(pTHXx)
if (strEQ(s, "-1")) { /* Special case: modperl folklore. */
i = -1;
} else {
- i = grok_atou(s, NULL);
+ UV uv;
+ if (grok_atoUV(s, &uv, NULL) && uv <= INT_MAX)
+ i = (int)uv;
+ else
+ i = 0;
}
#ifdef DEBUGGING
if (destruct_level < i) destruct_level = i;
@@ -1473,7 +1477,7 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env)
{
const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
- if (s && (grok_atou(s, NULL) == 1)) {
+ if (s && strEQ(s, "1")) {
unsigned char *seed= PERL_HASH_SEED;
unsigned char *seed_end= PERL_HASH_SEED + PERL_HASH_SEED_BYTES;
PerlIO_printf(Perl_debug_log, "HASH_FUNCTION = %s HASH_SEED = 0x", PERL_HASH_FUNC);
@@ -2312,7 +2316,9 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit)
#ifdef MYMALLOC
{
const char *s;
- if ((s=PerlEnv_getenv("PERL_DEBUG_MSTATS")) && grok_atou(s, NULL) >= 2)
+ UV uv;
+ s = PerlEnv_getenv("PERL_DEBUG_MSTATS");
+ if (s && grok_atoUV(s, &uv, NULL) && uv >= 2)
dump_mstats("after compilation:");
}
#endif
@@ -3046,7 +3052,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp)
" L trace some locale setting information--for Perl core development\n",
NULL
};
- int i = 0;
+ UV uv = 0;
PERL_ARGS_ASSERT_GET_DEBUG_OPTS;
@@ -3057,7 +3063,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp)
for (; isWORDCHAR(**s); (*s)++) {
const char * const d = strchr(debopts,**s);
if (d)
- i |= 1 << (d - debopts);
+ uv |= 1 << (d - debopts);
else if (ckWARN_d(WARN_DEBUGGING))
Perl_warner(aTHX_ packWARN(WARN_DEBUGGING),
"invalid option -D%c, use -D'' to see choices\n", **s);
@@ -3065,8 +3071,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp)
}
else if (isDIGIT(**s)) {
const char* e;
- i = grok_atou(*s, &e);
- if (e)
+ if (grok_atoUV(*s, &uv, &e))
*s = e;
for (; isWORDCHAR(**s); (*s)++) ;
}
@@ -3074,7 +3079,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp)
const char *const *p = usage_msgd;
while (*p) PerlIO_puts(PerlIO_stdout(), *p++);
}
- return i;
+ return (int)uv; /* ignore any UV->int conversion loss */
}
#endif
@@ -3668,14 +3673,17 @@ S_open_script(pTHX_ const char *scriptname, bool dosearch, bool *suidscript)
PL_origfilename = savepvs("-e");
}
else {
+ const char *s;
+ UV uv;
/* if find_script() returns, it returns a malloc()-ed value */
scriptname = PL_origfilename = find_script(scriptname, dosearch, NULL, 1);
- if (strnEQ(scriptname, "/dev/fd/", 8) && isDIGIT(scriptname[8]) ) {
- const char *s = scriptname + 8;
- const char* e;
- fdscript = grok_atou(s, &e);
- s = e;
+ if (strnEQ(scriptname, "/dev/fd/", 8)
+ && isDIGIT(scriptname[8])
+ && grok_atoUV(scriptname + 8, &uv, &s)
+ && uv <= PERL_INT_MAX
+ ) {
+ fdscript = (int)uv;
if (*s) {
/* PSz 18 Feb 04
* Tell apart "normal" usage of fdscript, e.g.
diff --git a/pod/perlclib.pod b/pod/perlclib.pod
index c5fb455aa0..b366c7f87e 100644
--- a/pod/perlclib.pod
+++ b/pod/perlclib.pod
@@ -203,15 +203,26 @@ C<toUPPER_uni>, as described in L<perlapi/Character case changing>.)
Instead Of: Use:
atof(s) Atof(s)
- atoi(s) grok_atou(s, &e)
- atol(s) grok_atou(s, &e)
+ atoi(s) grok_atoUV(s, &uv, &e)
+ atol(s) grok_atoUV(s, &uv, &e)
strtod(s, &p) Nothing. Just don't use it.
- strtol(s, &p, n) grok_atou(s, &e)
- strtoul(s, &p, n) grok_atou(s, &e)
+ strtol(s, &p, n) grok_atoUV(s, &uv, &e)
+ strtoul(s, &p, n) grok_atoUV(s, &uv, &e)
+
+Typical use is to do range checks on C<uv> before casting:
+
+ int i; UV uv; char* end_ptr;
+ if (grok_atoUV(input, &uv, &end_ptr)
+ && uv <= INT_MAX)
+ i = (int)uv;
+ ... /* continue parsing from end_ptr */
+ } else {
+ ... /* parse error: not a decimal integer in range 0 .. MAX_IV */
+ }
Notice also the C<grok_bin>, C<grok_hex>, and C<grok_oct> functions in
F<numeric.c> for converting strings representing numbers in the respective
-bases into C<NV>s. Note that grok_atou() doesn't handle negative inputs,
+bases into C<NV>s. Note that grok_atoUV() doesn't handle negative inputs,
or leading whitespace (being purposefully strict).
Note that strtol() and strtoul() may be disguised as Strtol(), Strtoul(),
diff --git a/pod/perlhacktips.pod b/pod/perlhacktips.pod
index 943bdfbc1c..7b345167b8 100644
--- a/pod/perlhacktips.pod
+++ b/pod/perlhacktips.pod
@@ -641,7 +641,7 @@ L<https://sourceware.org/bugzilla/show_bug.cgi?id=6530>.
Do not use atoi()
-Use grok_atou() instead. atoi() has ill-defined behavior on overflows,
+Use grok_atoUV() instead. atoi() has ill-defined behavior on overflows,
and cannot be used for incremental parsing. It is also affected by locale,
which is bad.
@@ -649,7 +649,7 @@ which is bad.
Do not use strtol() or strtoul()
-Use grok_atou() instead. strtol() or strtoul() (or their IV/UV-friendly
+Use grok_atoUV() instead. strtol() or strtoul() (or their IV/UV-friendly
macro disguises, Strtol() and Strtoul(), or Atol() and Atoul() are
affected by locale, which is bad.
diff --git a/pod/perllocale.pod b/pod/perllocale.pod
index fdf524f956..15e91814cd 100644
--- a/pod/perllocale.pod
+++ b/pod/perllocale.pod
@@ -556,7 +556,7 @@ The two quickest fixes are either to render Perl silent about any
locale inconsistencies or to run Perl under the default locale "C".
Perl's moaning about locale problems can be silenced by setting the
-environment variable C<PERL_BADLANG> to a zero value, for example "0".
+environment variable C<PERL_BADLANG> to "0" or "".
This method really just sweeps the problem under the carpet: you tell
Perl to shut up even when Perl sees that something is wrong. Do not
be surprised if later something locale-dependent misbehaves.
@@ -1196,9 +1196,8 @@ A string that can suppress Perl's warning about failed locale settings
at startup. Failure can occur if the locale support in the operating
system is lacking (broken) in some way--or if you mistyped the name of
a locale when you set up your environment. If this environment
-variable is absent, or has a value that does not evaluate to integer
-zero--that is, "0" or ""-- Perl will complain about locale setting
-failures.
+variable is absent, or has a value other than "0" or "", Perl will
+complain about locale setting failures.
B<NOTE>: C<PERL_BADLANG> only gives you a way to hide the warning message.
The message tells about some problem in your system's locale support,
diff --git a/pp_sys.c b/pp_sys.c
index e2f8edf9de..2f70006842 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -3340,6 +3340,7 @@ PP(pp_fttty)
GV *gv;
char *name = NULL;
STRLEN namelen;
+ UV uv;
tryAMAGICftest_MG('t');
@@ -3355,8 +3356,8 @@ PP(pp_fttty)
if (GvIO(gv) && IoIFP(GvIOp(gv)))
fd = PerlIO_fileno(IoIFP(GvIOp(gv)));
- else if (name && isDIGIT(*name))
- fd = grok_atou(name, NULL);
+ else if (name && isDIGIT(*name) && grok_atoUV(name, &uv, NULL) && uv <= PERL_INT_MAX)
+ fd = (int)uv;
else
FT_RETURNUNDEF;
if (fd < 0) {
diff --git a/proto.h b/proto.h
index 54115ca387..2ceb189b1f 100644
--- a/proto.h
+++ b/proto.h
@@ -1340,10 +1340,11 @@ PERL_CALLCONV int Perl_getcwd_sv(pTHX_ SV* sv)
PERL_CALLCONV void Perl_gp_free(pTHX_ GV* gv);
PERL_CALLCONV GP* Perl_gp_ref(pTHX_ GP* gp);
-PERL_CALLCONV UV Perl_grok_atou(const char* pv, const char** endptr)
- __attribute__nonnull__(1);
-#define PERL_ARGS_ASSERT_GROK_ATOU \
- assert(pv)
+PERL_CALLCONV bool Perl_grok_atoUV(const char* pv, UV* valptr, const char** endptr)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2);
+#define PERL_ARGS_ASSERT_GROK_ATOUV \
+ assert(pv); assert(valptr)
PERL_CALLCONV UV Perl_grok_bin(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result)
__attribute__nonnull__(pTHX_1)
diff --git a/regcomp.c b/regcomp.c
index 0be6f217a2..4cbcf36d3e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10121,10 +10121,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
RExC_parse++;
is_neg = TRUE;
}
- unum = grok_atou(RExC_parse, &endptr);
- num = (unum > I32_MAX) ? I32_MAX : (I32)unum;
- if (endptr)
- RExC_parse = (char*)endptr;
+ if (grok_atoUV(RExC_parse, &unum, &endptr)
+ && unum <= I32_MAX
+ ) {
+ num = (I32)unum;
+ RExC_parse = (char*)endptr;
+ } else
+ num = I32_MAX;
if (is_neg) {
/* Some limit for num? */
num = -num;
@@ -10308,9 +10311,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
RExC_parse++;
parno = 0;
if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
- parno = grok_atou(RExC_parse, &endptr);
- if (endptr)
+ UV uv;
+ if (grok_atoUV(RExC_parse, &uv, &endptr)
+ && uv <= I32_MAX
+ ) {
+ parno = (I32)uv;
RExC_parse = (char*)endptr;
+ }
+ /* XXX else what? */
} else if (RExC_parse[0] == '&') {
SV *sv_dat;
RExC_parse++;
@@ -10327,9 +10335,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
/* (?(1)...) */
char c;
char *tmp;
- parno = grok_atou(RExC_parse, &endptr);
- if (endptr)
- RExC_parse = (char*)endptr;
+ UV uv;
+ if (grok_atoUV(RExC_parse, &uv, &endptr)
+ && uv <= I32_MAX
+ ) {
+ parno = (I32)uv;
+ RExC_parse = (char*)endptr;
+ }
+ /* XXX else what? */
ret = reganode(pRExC_state, GROUPP, parno);
insert_if_check_paren:
@@ -10815,8 +10828,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
maxpos = next;
RExC_parse++;
if (isDIGIT(*RExC_parse)) {
- uv = grok_atou(RExC_parse, &endptr);
- if (!endptr)
+ if (!grok_atoUV(RExC_parse, &uv, &endptr))
vFAIL("Invalid quantifier in {,}");
if (uv >= REG_INFTY)
vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
@@ -10829,8 +10841,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
else
maxpos = RExC_parse;
if (isDIGIT(*maxpos)) {
- uv = grok_atou(maxpos, &endptr);
- if (!endptr)
+ if (!grok_atoUV(maxpos, &uv, &endptr))
vFAIL("Invalid quantifier in {,}");
if (uv >= REG_INFTY)
vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
@@ -11531,10 +11542,10 @@ static I32
S_backref_value(char *p)
{
const char* endptr;
- UV val = grok_atou(p, &endptr);
- if (endptr == p || endptr == NULL || val > I32_MAX)
- return I32_MAX;
- return (I32)val;
+ UV val;
+ if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX)
+ return (I32)val;
+ return I32_MAX;
}
diff --git a/toke.c b/toke.c
index 0eeafd4311..9715f0e157 100644
--- a/toke.c
+++ b/toke.c
@@ -1649,6 +1649,7 @@ S_incline(pTHX_ const char *s)
const char *n;
const char *e;
line_t line_num;
+ UV uv;
PERL_ARGS_ASSERT_INCLINE;
@@ -1698,7 +1699,9 @@ S_incline(pTHX_ const char *s)
if (*e != '\n' && *e != '\0')
return; /* false alarm */
- line_num = grok_atou(n, &e) - 1;
+ if (!grok_atoUV(n, &uv, &e))
+ return;
+ line_num = ((line_t)uv) - 1;
if (t - s > 0) {
const STRLEN len = t - s;
diff --git a/utf8.c b/utf8.c
index bbc2f8d6b3..184ed314cb 100644
--- a/utf8.c
+++ b/utf8.c
@@ -3549,7 +3549,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
/* The first number is a count of the rest */
l++;
- elements = grok_atou((const char *)l, &after_atou);
+ if (!grok_atoUV((const char *)l, &elements, &after_atou)) {
+ Perl_croak(aTHX_ "panic: Expecting a valid count of elements at start of inversion list");
+ }
if (elements == 0) {
invlist = _new_invlist(0);
}
@@ -3559,7 +3561,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
/* Get the 0th element, which is needed to setup the inversion list */
while (isSPACE(*l)) l++;
- element0 = (UV) grok_atou((const char *)l, &after_atou);
+ if (!grok_atoUV((const char *)l, &element0, &after_atou)) {
+ Perl_croak(aTHX_ "panic: Expecting a valid 0th element for inversion list");
+ }
l = (U8 *) after_atou;
invlist = _setup_canned_invlist(elements, element0, &other_elements_ptr);
elements--;
@@ -3570,7 +3574,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
Perl_croak(aTHX_ "panic: Expecting %"UVuf" more elements than available", elements);
}
while (isSPACE(*l)) l++;
- *other_elements_ptr++ = (UV) grok_atou((const char *)l, &after_atou);
+ if (!grok_atoUV((const char *)l, other_elements_ptr++, &after_atou)) {
+ Perl_croak(aTHX_ "panic: Expecting a valid element in inversion list");
+ }
l = (U8 *) after_atou;
}
}
diff --git a/util.c b/util.c
index 11ed10bbfd..3cb610e613 100644
--- a/util.c
+++ b/util.c
@@ -1375,11 +1375,13 @@ Perl_mess_sv(pTHX_ SV *basemsg, bool consume)
#if defined(USE_C_BACKTRACE) && defined(USE_C_BACKTRACE_ON_ERROR)
{
char *ws;
- int wi;
+ UV wi;
/* The PERL_C_BACKTRACE_ON_WARN must be an integer of one or more. */
- if ((ws = PerlEnv_getenv("PERL_C_BACKTRACE_ON_ERROR")) &&
- (wi = grok_atou(ws, NULL)) > 0) {
- Perl_dump_c_backtrace(aTHX_ Perl_debug_log, wi, 1);
+ if ((ws = PerlEnv_getenv("PERL_C_BACKTRACE_ON_ERROR"))
+ && grok_atoUV(ws, &wi, NULL)
+ && wi <= PERL_INT_MAX
+ ) {
+ Perl_dump_c_backtrace(aTHX_ Perl_debug_log, (int)wi, 1);
}
}
#endif
@@ -4420,15 +4422,20 @@ Perl_parse_unicode_opts(pTHX_ const char **popt)
if (*p) {
if (isDIGIT(*p)) {
const char* endptr;
- opt = (U32) grok_atou(p, &endptr);
- p = endptr;
- if (*p && *p != '\n' && *p != '\r') {
- if(isSPACE(*p)) goto the_end_of_the_opts_parser;
- else
- Perl_croak(aTHX_ "Unknown Unicode option letter '%c'", *p);
- }
- }
- else {
+ UV uv;
+ if (grok_atoUV(p, &uv, &endptr)
+ && uv <= U32_MAX
+ && (p = endptr)
+ && *p && *p != '\n' && *p != '\r'
+ ) {
+ opt = (U32)uv;
+ if (isSPACE(*p))
+ goto the_end_of_the_opts_parser;
+ else
+ Perl_croak(aTHX_ "Unknown Unicode option letter '%c'", *p);
+ }
+ }
+ else {
for (; *p; p++) {
switch (*p) {
case PERL_UNICODE_STDIN:
@@ -4729,14 +4736,14 @@ Perl_free_global_struct(pTHX_ struct perl_vars *plvarsp)
#ifdef PERL_MEM_LOG
-/* -DPERL_MEM_LOG: the Perl_mem_log_..() is compiled, including the
+/* -DPERL_MEM_LOG: the Perl_mem_log_..() is compiled, including
* the default implementation, unless -DPERL_MEM_LOG_NOIMPL is also
* given, and you supply your own implementation.
*
* The default implementation reads a single env var, PERL_MEM_LOG,
* expecting one or more of the following:
*
- * \d+ - fd fd to write to : must be 1st (grok_atou)
+ * \d+ - fd fd to write to : must be 1st (grok_atoUV)
* 'm' - memlog was PERL_MEM_LOG=1
* 's' - svlog was PERL_SV_LOG=1
* 't' - timestamp was PERL_MEM_LOG_TIMESTAMP=1
@@ -4805,9 +4812,15 @@ S_mem_log_common(enum mem_log_type mlt, const UV n,
{
STRLEN len;
const char* endptr;
- int fd = grok_atou(pmlenv, &endptr); /* Ignore endptr. */
- if (!fd)
+ int fd;
+ UV uv;
+ if (grok_atoUV(pmlenv, &uv, &endptr) /* Ignore endptr. */
+ && uv && uv <= PERL_INT_MAX
+ ) {
+ fd = (int)uv;
+ } else {
fd = PERL_MEM_LOG_FD;
+ }
if (strchr(pmlenv, 't')) {
len = my_snprintf(buf, sizeof(buf),
@@ -6008,6 +6021,8 @@ static const char* atos_parse(const char* p,
const char* source_name_end;
const char* source_line_end;
const char* close_paren;
+ UV uv;
+
/* Skip trailing whitespace. */
while (p > start && isspace(*p)) p--;
/* Now we should be at the close paren. */
@@ -6034,10 +6049,14 @@ static const char* atos_parse(const char* p,
return NULL;
p++;
*source_name_size = source_name_end - p;
- *source_line = grok_atou(source_number_start, &source_line_end);
- if (source_line_end != close_paren)
- return NULL;
- return p;
+ if (grok_atoUV(source_number_start, &uv, &source_line_end)
+ && source_line_end == close_paren
+ && uv <= MAX_STRLEN
+ ) {
+ *source_line = (STRLEN)uv;
+ return p;
+ }
+ return NULL;
}
/* Given a raw frame, read a pipe from the symbolicator (that's the