diff options
author | Hugo van der Sanden <hv@crypt.org> | 2015-02-23 16:48:15 +0000 |
---|---|---|
committer | Hugo van der Sanden <hv@crypt.org> | 2015-03-09 22:15:46 +0000 |
commit | 22ff313068aa37b1a24855e760e71ee9a20a1a90 (patch) | |
tree | 09da195258e4a85e42a39dd24d7c60c849409ef1 | |
parent | 35cd12d12a5a5777098caf722f8748b39c3be45f (diff) | |
download | perl-22ff313068aa37b1a24855e760e71ee9a20a1a90.tar.gz |
[perl #123814] replace grok_atou with grok_atoUV
Some questions and loose ends:
XXX gv.c:S_gv_magicalize - why are we using SSize_t for paren?
XXX mg.c:Perl_magic_set - need appopriate error handling for $)
XXX regcomp.c:S_reg - need to check if we do the right thing if parno
was not grokked
Perl_get_debug_opts should probably return something unsigned; not sure
if that's something we can change.
-rw-r--r-- | doio.c | 8 | ||||
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | ext/DynaLoader/dlutils.c | 10 | ||||
-rw-r--r-- | ext/XS-APItest/APItest.pm | 2 | ||||
-rw-r--r-- | ext/XS-APItest/numeric.xs | 12 | ||||
-rw-r--r-- | ext/XS-APItest/t/grok.t | 126 | ||||
-rw-r--r-- | gv.c | 12 | ||||
-rw-r--r-- | locale.c | 5 | ||||
-rw-r--r-- | malloc.c | 4 | ||||
-rw-r--r-- | mg.c | 15 | ||||
-rw-r--r-- | numeric.c | 59 | ||||
-rw-r--r-- | perl.c | 34 | ||||
-rw-r--r-- | pod/perlclib.pod | 21 | ||||
-rw-r--r-- | pod/perlhacktips.pod | 4 | ||||
-rw-r--r-- | pod/perllocale.pod | 7 | ||||
-rw-r--r-- | pp_sys.c | 5 | ||||
-rw-r--r-- | proto.h | 9 | ||||
-rw-r--r-- | regcomp.c | 45 | ||||
-rw-r--r-- | toke.c | 5 | ||||
-rw-r--r-- | utf8.c | 12 | ||||
-rw-r--r-- | util.c | 61 |
22 files changed, 269 insertions, 191 deletions
@@ -377,6 +377,7 @@ Perl_do_open6(pTHX_ GV *gv, const char *oname, STRLEN len, else { PerlIO *that_fp = NULL; int wanted_fd; + UV uv; if (num_svs > 1) { /* diag_listed_as: More than one argument to '%s' open */ Perl_croak(aTHX_ "More than one argument to '%c&' open",IoTYPE(io)); @@ -390,8 +391,11 @@ Perl_do_open6(pTHX_ GV *gv, const char *oname, STRLEN len, wanted_fd = SvUV(*svp); num_svs = 0; } - else if (isDIGIT(*type)) { - wanted_fd = grok_atou(type, NULL); + else if (isDIGIT(*type) + && grok_atoUV(type, &uv, NULL) + && uv <= INT_MAX + ) { + wanted_fd = (int)uv; } else { const IO* thatio; @@ -818,7 +818,7 @@ Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep Apd |int |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV *valuep|U32 flags ApdR |bool |grok_numeric_radix|NN const char **sp|NN const char *send Apd |UV |grok_oct |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result -Apdn |UV |grok_atou |NN const char* pv|NULLOK const char** endptr +Apdn |bool |grok_atoUV |NN const char* pv|NN UV* valptr|NULLOK const char** endptr : These are all indirectly referenced by globals.c. This is somewhat annoying. p |int |magic_clearenv |NN SV* sv|NN MAGIC* mg p |int |magic_clear_all_env|NN SV* sv|NN MAGIC* mg @@ -178,7 +178,7 @@ #define getcwd_sv(a) Perl_getcwd_sv(aTHX_ a) #define gp_free(a) Perl_gp_free(aTHX_ a) #define gp_ref(a) Perl_gp_ref(aTHX_ a) -#define grok_atou Perl_grok_atou +#define grok_atoUV Perl_grok_atoUV #define grok_bin(a,b,c,d) Perl_grok_bin(aTHX_ a,b,c,d) #define grok_hex(a,b,c,d) Perl_grok_hex(aTHX_ a,b,c,d) #define grok_infnan(a,b) Perl_grok_infnan(aTHX_ a,b) diff --git a/ext/DynaLoader/dlutils.c b/ext/DynaLoader/dlutils.c index 96ea8befa5..fca8e787eb 100644 --- a/ext/DynaLoader/dlutils.c +++ b/ext/DynaLoader/dlutils.c @@ -100,6 +100,7 @@ dl_generic_private_init(pTHX) /* called by dl_*.xs dl_private_init() */ { #if defined(PERL_IN_DL_HPUX_XS) || defined(PERL_IN_DL_DLOPEN_XS) char *perl_dl_nonlazy; + UV uv; #endif MY_CXT_INIT; @@ -115,9 +116,12 @@ dl_generic_private_init(pTHX) /* called by dl_*.xs dl_private_init() */ #endif #if defined(PERL_IN_DL_HPUX_XS) || defined(PERL_IN_DL_DLOPEN_XS) - if ( (perl_dl_nonlazy = getenv("PERL_DL_NONLAZY")) != NULL ) - dl_nonlazy = grok_atou(perl_dl_nonlazy, NULL); - else + if ( (perl_dl_nonlazy = getenv("PERL_DL_NONLAZY")) != NULL + && grok_atoUV(perl_dl_nonlazy, &uv, NULL) + && uv <= INT_MAX + ) { + dl_nonlazy = (int)uv; + } else dl_nonlazy = 0; if (dl_nonlazy) DLDEBUG(1,PerlIO_printf(Perl_debug_log, "DynaLoader bind mode is 'non-lazy'\n")); diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index a759492f0a..e4b7156496 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -5,7 +5,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.70'; +our $VERSION = '0.71'; require XSLoader; diff --git a/ext/XS-APItest/numeric.xs b/ext/XS-APItest/numeric.xs index 6d1ef82340..0ce9e080ff 100644 --- a/ext/XS-APItest/numeric.xs +++ b/ext/XS-APItest/numeric.xs @@ -32,22 +32,24 @@ grok_number_flags(number, flags) PUSHs(sv_2mortal(newSVuv(value))); void -grok_atou(number, endsv) +grok_atoUV(number, endsv) SV *number SV *endsv PREINIT: STRLEN len; const char *pv = SvPV(number, len); - UV result; + UV value = 0xdeadbeef; + bool result; const char* endptr; PPCODE: EXTEND(SP,2); if (endsv == &PL_sv_undef) { - result = grok_atou(pv, NULL); + result = grok_atoUV(pv, &value, NULL); } else { - result = grok_atou(pv, &endptr); + result = grok_atoUV(pv, &value, &endptr); } - PUSHs(sv_2mortal(newSVuv(result))); + PUSHs(result ? &PL_sv_yes : &PL_sv_no); + PUSHs(sv_2mortal(newSVuv(value))); if (endsv == &PL_sv_undef) { PUSHs(sv_2mortal(newSVpvn(NULL, 0))); } else { diff --git a/ext/XS-APItest/t/grok.t b/ext/XS-APItest/t/grok.t index f66717bb7c..c3169ce7cc 100644 --- a/ext/XS-APItest/t/grok.t +++ b/ext/XS-APItest/t/grok.t @@ -137,26 +137,26 @@ my @atous = [ "12x", "x", 12, 2 ], # Leading whitespace is failure. - [ " 0", " 0", 0, 0 ], - [ " 1", " 1", 0, 0 ], - [ " 12", " 12", 0, 0 ], + [ " 0", undef, 0, 0 ], + [ " 1", undef, 0, 0 ], + [ " 12", undef, 0, 0 ], # Leading garbage is outright failure. - [ "x0", "x0", 0, 0 ], - [ "x1", "x1", 0, 0 ], - [ "x12", "x12", 0, 0 ], + [ "x0", undef, 0, 0 ], + [ "x1", undef, 0, 0 ], + [ "x12", undef, 0, 0 ], # We do not parse decimal point. - [ "12.3", ".3", 12, 2 ], + [ "12.3", ".3", 12, 2 ], # Leading pluses or minuses are no good. - [ "+12", "+12", 0, 0 ], - [ "-12", "-12", 0, 0 ], + [ "+12", undef, 0, 0 ], + [ "-12", undef, 0, 0 ], - # Extra leading zeros cause overflow. - [ "00", "00", $ATOU_MAX, 0 ], - [ "01", "01", $ATOU_MAX, 0 ], - [ "012", "012", $ATOU_MAX, 0 ], + # Extra leading zeros are no good. + [ "00", undef, $ATOU_MAX, 0 ], + [ "01", undef, $ATOU_MAX, 0 ], + [ "012", undef, $ATOU_MAX, 0 ], ); # Values near overflow point. @@ -173,83 +173,93 @@ if ($Config{uvsize} == 8) { # This is well within 64-bit. [ "9999999999", "", 9999999999, 10, ], - # Values valid up to 64-bit and beyond. + # Values valid up to 64-bit, failing beyond. [ "18446744073709551613", "", 18446744073709551613, 20, ], [ "18446744073709551614", "", 18446744073709551614, 20, ], [ "18446744073709551615", "", $ATOU_MAX, 20, ], - [ "18446744073709551616", "", $ATOU_MAX, 0, ], - [ "18446744073709551617", "", $ATOU_MAX, 0, ], + [ "18446744073709551616", undef, $ATOU_MAX, 0, ], + [ "18446744073709551617", undef, $ATOU_MAX, 0, ], ); } elsif ($Config{uvsize} == 4) { push @atous, ( - # Values valid up to 32-bit and beyond. + # Values valid up to 32-bit, failing beyond. [ "4294967293", "", 4294967293, 10, ], [ "4294967294", "", 4294967294, 10, ], [ "4294967295", "", $ATOU_MAX, 10, ], - [ "4294967296", "", $ATOU_MAX, 0, ], - [ "4294967297", "", $ATOU_MAX, 0, ], + [ "4294967296", undef, $ATOU_MAX, 0, ], + [ "4294967297", undef, $ATOU_MAX, 0, ], # Still beyond 32-bit. - [ "4999999999", "", $ATOU_MAX, 0, ], - [ "5678901234", "", $ATOU_MAX, 0, ], - [ "6789012345", "", $ATOU_MAX, 0, ], - [ "7890123456", "", $ATOU_MAX, 0, ], - [ "8901234567", "", $ATOU_MAX, 0, ], - [ "9012345678", "", $ATOU_MAX, 0, ], - [ "9999999999", "", $ATOU_MAX, 0, ], - [ "10000000000", "", $ATOU_MAX, 0, ], - [ "12345678901", "", $ATOU_MAX, 0, ], + [ "4999999999", undef, $ATOU_MAX, 0, ], + [ "5678901234", undef, $ATOU_MAX, 0, ], + [ "6789012345", undef, $ATOU_MAX, 0, ], + [ "7890123456", undef, $ATOU_MAX, 0, ], + [ "8901234567", undef, $ATOU_MAX, 0, ], + [ "9012345678", undef, $ATOU_MAX, 0, ], + [ "9999999999", undef, $ATOU_MAX, 0, ], + [ "10000000000", undef, $ATOU_MAX, 0, ], + [ "12345678901", undef, $ATOU_MAX, 0, ], # 64-bit values are way beyond. - [ "18446744073709551613", "", $ATOU_MAX, 0, ], - [ "18446744073709551614", "", $ATOU_MAX, 0, ], - [ "18446744073709551615", "", $ATOU_MAX, 0, ], - [ "18446744073709551616", "", $ATOU_MAX, 0, ], - [ "18446744073709551617", "", $ATOU_MAX, 0, ], + [ "18446744073709551613", undef, $ATOU_MAX, 0, ], + [ "18446744073709551614", undef, $ATOU_MAX, 0, ], + [ "18446744073709551615", undef, $ATOU_MAX, 0, ], + [ "18446744073709551616", undef, $ATOU_MAX, 0, ], + [ "18446744073709551617", undef, $ATOU_MAX, 0, ], ); } # These will fail to fail once 128/256-bit systems arrive. push @atous, ( - [ "23456789012345678901", "", $ATOU_MAX, 0 ], - [ "34567890123456789012", "", $ATOU_MAX, 0 ], - [ "98765432109876543210", "", $ATOU_MAX, 0 ], - [ "98765432109876543211", "", $ATOU_MAX, 0 ], - [ "99999999999999999999", "", $ATOU_MAX, 0 ], + [ "23456789012345678901", undef, $ATOU_MAX, 0 ], + [ "34567890123456789012", undef, $ATOU_MAX, 0 ], + [ "98765432109876543210", undef, $ATOU_MAX, 0 ], + [ "98765432109876543211", undef, $ATOU_MAX, 0 ], + [ "99999999999999999999", undef, $ATOU_MAX, 0 ], ); for my $grok (@atous) { my $input = $grok->[0]; my $endsv = $grok->[1]; + my $expect_ok = defined $endsv; + my $strict_ok = $expect_ok && $endsv eq ''; - my ($out_uv, $out_len); + my ($ok, $out_uv, $out_len); # First with endsv. - ($out_uv, $out_len) = grok_atou($input, $endsv); - is($out_uv, $grok->[2], - "'$input' $endsv - number success (got $out_uv cf $grok->[2])"); - ok($grok->[3] <= length $input, "'$input' $endsv - length sanity 1"); - unless (length $grok->[1]) { - is($out_len, $grok->[3], "'$input' $endsv - length sanity 2"); - } # else { ... } ? - if ($out_len) { - is($endsv, substr($input, $out_len), - "'$input' $endsv - length sanity 3"); + ($ok, $out_uv, $out_len) = grok_atoUV($input, $endsv); + is($expect_ok, $ok, sprintf "'$input' expected %s, got %s", + ($expect_ok ? 'success' : 'failure'), + ($ok ? 'success' : 'failure'), + ); + if ($expect_ok) { + is($expect_ok, $ok, "'$input' expect success"); + is($out_uv, $grok->[2], + "'$input' $endsv - number success (got $out_uv cf $grok->[2])"); + ok($grok->[3] <= length $input, "'$input' $endsv - length sanity 1"); + unless (length $grok->[1]) { + is($out_len, $grok->[3], "'$input' $endsv - length sanity 2"); + } # else { ... } ? + if ($out_len) { + is($endsv, substr($input, $out_len), + "'$input' $endsv - length sanity 3"); + } + } else { + is($expect_ok, $ok, "'$input' expect failure"); + is(0xdeadbeef, $out_uv, "'$input' on failure expect value unchanged"); } # Then without endsv (undef == NULL). - ($out_uv, $out_len) = grok_atou($input, undef); - if (length $grok->[1]) { - if ($grok->[2] == $ATOU_MAX) { - is($out_uv, $ATOU_MAX, "'$input' undef - number overflow"); - } else { - is($out_uv, 0, "'$input' undef - number zero"); - } - } else { + ($ok, $out_uv, $out_len) = grok_atoUV($input, undef); + if ($strict_ok) { + is($strict_ok, $ok, "'$input' expect strict success"); is($out_uv, $grok->[2], - "'$input' undef - number success (got $out_uv cf $grok->[2])"); + "'$input' $endsv - strict number success (got $out_uv cf $grok->[2])"); + } else { + is($strict_ok, $ok, "'$input' expect strict failure"); + is(0xdeadbeef, $out_uv, "'$input' on strict failure expect value unchanged"); } } @@ -1984,13 +1984,11 @@ S_gv_magicalize(pTHX_ GV *gv, HV *stash, const char *name, STRLEN len, { /* Ensures that we have an all-digit variable, ${"1foo"} fails this test */ - /* This snippet is taken from is_gv_magical */ - const char *end = name + len; - while (--end > name) { - if (!isDIGIT(*end)) - return addmg; - } - paren = grok_atou(name, NULL); + UV uv; + if (!grok_atoUV(name, &uv, NULL) || uv > I32_MAX) + return addmg; + /* XXX why are we using a SSize_t? */ + paren = (SSize_t)(I32)uv; goto storeparen; } } @@ -675,7 +675,10 @@ Perl_init_i18nl10n(pTHX_ int printwarn) const bool locwarn = (printwarn > 1 || (printwarn && (! bad_lang_use_once - || grok_atou(bad_lang_use_once, NULL)))); + || ( + /* disallow with "" or "0" */ + *bad_lang_use_once + && strNE("0", bad_lang_use_once))))); bool done = FALSE; #ifdef WIN32 /* In some systems you can find out the system default locale @@ -1824,7 +1824,7 @@ Perl_mfree(Malloc_t where) if (bad_free_warn == -1) { dTHX; char *pbf = PerlEnv_getenv("PERL_BADFREE"); - bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1; + bad_free_warn = (pbf) ? strNE("0", pbf) : 1; } if (!bad_free_warn) return; @@ -1922,7 +1922,7 @@ Perl_realloc(void *mp, size_t nbytes) if (bad_free_warn == -1) { dTHX; char *pbf = PerlEnv_getenv("PERL_BADFREE"); - bad_free_warn = (pbf) ? grok_atou(pbf, NULL) : 1; + bad_free_warn = (pbf) ? strNE("0", pbf) : 1; } if (!bad_free_warn) return NULL; @@ -3020,6 +3020,7 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) const char *p = SvPV_const(sv, len); Groups_t *gary = NULL; const char* endptr; + UV uv; #ifdef _SC_NGROUPS_MAX int maxgrp = sysconf(_SC_NGROUPS_MAX); @@ -3031,7 +3032,12 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) while (isSPACE(*p)) ++p; - new_egid = (Gid_t)grok_atou(p, &endptr); + if (grok_atoUV(p, &uv, &endptr)) + new_egid = (Gid_t)uv; + else { + new_egid = 0; /* XXX is this safe? */ + endptr = NULL; + } for (i = 0; i < maxgrp; ++i) { if (endptr == NULL) break; @@ -3044,7 +3050,12 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) Newx(gary, i + 1, Groups_t); else Renew(gary, i + 1, Groups_t); - gary[i] = (Groups_t)grok_atou(p, &endptr); + if (grok_atoUV(p, &uv, &endptr)) + gary[i] = (Groups_t)uv; + else { + gary[i] = 0; /* XXX is this safe? */ + endptr = NULL; + } } if (i) PERL_UNUSED_RESULT(setgroups(i, gary)); @@ -1033,11 +1033,9 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) } /* -=for apidoc grok_atou +=for apidoc grok_atoUV -grok_atou is a safer replacement for atoi and strtol. - -grok_atou parses a C-style zero-byte terminated string, looking for +grok_atoUV parses a C-style zero-byte terminated string, looking for a decimal unsigned integer. Returns the unsigned integer, if a valid value can be parsed @@ -1045,23 +1043,17 @@ from the beginning of the string. Accepts only the decimal digits '0'..'9'. -As opposed to atoi or strtol, grok_atou does NOT allow optional +As opposed to atoi or strtol, grok_atoUV does NOT allow optional leading whitespace, or negative inputs. If such features are required, the calling code needs to explicitly implement those. -If a valid value cannot be parsed, returns either zero (if non-digits -are met before any digits) or UV_MAX (if the value overflows). - -Note that extraneous leading zeros also count as an overflow -(meaning that only "0" is the zero). - -On failure, the *endptr is also set to NULL, unless endptr is NULL. +Returns true if a valid value could be parsed. In that case, valptr +is set to the parsed value, and endptr (if provided) is set to point +to the character after the last digit. -Trailing non-digit bytes are allowed if the endptr is non-NULL. -On return the *endptr will contain the pointer to the first non-digit byte. - -If the endptr is NULL, the first non-digit byte MUST be -the zero byte terminating the pv, or zero will be returned. +Returns false otherwise. This can happen if a) there is a leading zero +followed by another digit; b) the digits would overflow a UV; or c) +there are trailing non-digits AND endptr is not provided. Background: atoi has severe problems with illegal inputs, it cannot be used for incremental parsing, and therefore should be avoided @@ -1071,26 +1063,24 @@ seen as a bug (global state controlled by user environment). =cut */ -UV -Perl_grok_atou(const char *pv, const char** endptr) +bool +Perl_grok_atoUV(const char *pv, UV *valptr, const char** endptr) { const char* s = pv; const char** eptr; const char* end2; /* Used in case endptr is NULL. */ - UV val = 0; /* The return value. */ + UV val = 0; /* The parsed value. */ - PERL_ARGS_ASSERT_GROK_ATOU; + PERL_ARGS_ASSERT_GROK_ATOUV; eptr = endptr ? endptr : &end2; if (isDIGIT(*s)) { /* Single-digit inputs are quite common. */ val = *s++ - '0'; if (isDIGIT(*s)) { - /* Extra leading zeros cause overflow. */ - if (val == 0) { - *eptr = NULL; - return UV_MAX; - } + /* Fail on extra leading zeros. */ + if (val == 0) + return FALSE; while (isDIGIT(*s)) { /* This could be unrolled like in grok_number(), but * the expected uses of this are not speed-needy, and @@ -1100,21 +1090,18 @@ Perl_grok_atou(const char *pv, const char** endptr) (val == uv_max_div_10 && digit <= uv_max_mod_10)) { val = val * 10 + digit; } else { - *eptr = NULL; - return UV_MAX; + return FALSE; } } } } - if (s == pv) { - *eptr = NULL; /* If no progress, failed to parse anything. */ - return 0; - } - if (endptr == NULL && *s) { - return 0; /* If endptr is NULL, no trailing non-digits allowed. */ - } + if (s == pv) + return FALSE; + if (endptr == NULL && *s) + return FALSE; /* If endptr is NULL, no trailing non-digits allowed. */ *eptr = s; - return val; + *valptr = val; + return TRUE; } #ifndef USE_QUADMATH @@ -550,7 +550,11 @@ perl_destruct(pTHXx) if (strEQ(s, "-1")) { /* Special case: modperl folklore. */ i = -1; } else { - i = grok_atou(s, NULL); + UV uv; + if (grok_atoUV(s, &uv, NULL) && uv <= INT_MAX) + i = (int)uv; + else + i = 0; } #ifdef DEBUGGING if (destruct_level < i) destruct_level = i; @@ -1473,7 +1477,7 @@ perl_parse(pTHXx_ XSINIT_t xsinit, int argc, char **argv, char **env) { const char * const s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG"); - if (s && (grok_atou(s, NULL) == 1)) { + if (s && strEQ(s, "1")) { unsigned char *seed= PERL_HASH_SEED; unsigned char *seed_end= PERL_HASH_SEED + PERL_HASH_SEED_BYTES; PerlIO_printf(Perl_debug_log, "HASH_FUNCTION = %s HASH_SEED = 0x", PERL_HASH_FUNC); @@ -2312,7 +2316,9 @@ S_parse_body(pTHX_ char **env, XSINIT_t xsinit) #ifdef MYMALLOC { const char *s; - if ((s=PerlEnv_getenv("PERL_DEBUG_MSTATS")) && grok_atou(s, NULL) >= 2) + UV uv; + s = PerlEnv_getenv("PERL_DEBUG_MSTATS"); + if (s && grok_atoUV(s, &uv, NULL) && uv >= 2) dump_mstats("after compilation:"); } #endif @@ -3046,7 +3052,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) " L trace some locale setting information--for Perl core development\n", NULL }; - int i = 0; + UV uv = 0; PERL_ARGS_ASSERT_GET_DEBUG_OPTS; @@ -3057,7 +3063,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) for (; isWORDCHAR(**s); (*s)++) { const char * const d = strchr(debopts,**s); if (d) - i |= 1 << (d - debopts); + uv |= 1 << (d - debopts); else if (ckWARN_d(WARN_DEBUGGING)) Perl_warner(aTHX_ packWARN(WARN_DEBUGGING), "invalid option -D%c, use -D'' to see choices\n", **s); @@ -3065,8 +3071,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) } else if (isDIGIT(**s)) { const char* e; - i = grok_atou(*s, &e); - if (e) + if (grok_atoUV(*s, &uv, &e)) *s = e; for (; isWORDCHAR(**s); (*s)++) ; } @@ -3074,7 +3079,7 @@ Perl_get_debug_opts(pTHX_ const char **s, bool givehelp) const char *const *p = usage_msgd; while (*p) PerlIO_puts(PerlIO_stdout(), *p++); } - return i; + return (int)uv; /* ignore any UV->int conversion loss */ } #endif @@ -3668,14 +3673,17 @@ S_open_script(pTHX_ const char *scriptname, bool dosearch, bool *suidscript) PL_origfilename = savepvs("-e"); } else { + const char *s; + UV uv; /* if find_script() returns, it returns a malloc()-ed value */ scriptname = PL_origfilename = find_script(scriptname, dosearch, NULL, 1); - if (strnEQ(scriptname, "/dev/fd/", 8) && isDIGIT(scriptname[8]) ) { - const char *s = scriptname + 8; - const char* e; - fdscript = grok_atou(s, &e); - s = e; + if (strnEQ(scriptname, "/dev/fd/", 8) + && isDIGIT(scriptname[8]) + && grok_atoUV(scriptname + 8, &uv, &s) + && uv <= PERL_INT_MAX + ) { + fdscript = (int)uv; if (*s) { /* PSz 18 Feb 04 * Tell apart "normal" usage of fdscript, e.g. diff --git a/pod/perlclib.pod b/pod/perlclib.pod index c5fb455aa0..b366c7f87e 100644 --- a/pod/perlclib.pod +++ b/pod/perlclib.pod @@ -203,15 +203,26 @@ C<toUPPER_uni>, as described in L<perlapi/Character case changing>.) Instead Of: Use: atof(s) Atof(s) - atoi(s) grok_atou(s, &e) - atol(s) grok_atou(s, &e) + atoi(s) grok_atoUV(s, &uv, &e) + atol(s) grok_atoUV(s, &uv, &e) strtod(s, &p) Nothing. Just don't use it. - strtol(s, &p, n) grok_atou(s, &e) - strtoul(s, &p, n) grok_atou(s, &e) + strtol(s, &p, n) grok_atoUV(s, &uv, &e) + strtoul(s, &p, n) grok_atoUV(s, &uv, &e) + +Typical use is to do range checks on C<uv> before casting: + + int i; UV uv; char* end_ptr; + if (grok_atoUV(input, &uv, &end_ptr) + && uv <= INT_MAX) + i = (int)uv; + ... /* continue parsing from end_ptr */ + } else { + ... /* parse error: not a decimal integer in range 0 .. MAX_IV */ + } Notice also the C<grok_bin>, C<grok_hex>, and C<grok_oct> functions in F<numeric.c> for converting strings representing numbers in the respective -bases into C<NV>s. Note that grok_atou() doesn't handle negative inputs, +bases into C<NV>s. Note that grok_atoUV() doesn't handle negative inputs, or leading whitespace (being purposefully strict). Note that strtol() and strtoul() may be disguised as Strtol(), Strtoul(), diff --git a/pod/perlhacktips.pod b/pod/perlhacktips.pod index 943bdfbc1c..7b345167b8 100644 --- a/pod/perlhacktips.pod +++ b/pod/perlhacktips.pod @@ -641,7 +641,7 @@ L<https://sourceware.org/bugzilla/show_bug.cgi?id=6530>. Do not use atoi() -Use grok_atou() instead. atoi() has ill-defined behavior on overflows, +Use grok_atoUV() instead. atoi() has ill-defined behavior on overflows, and cannot be used for incremental parsing. It is also affected by locale, which is bad. @@ -649,7 +649,7 @@ which is bad. Do not use strtol() or strtoul() -Use grok_atou() instead. strtol() or strtoul() (or their IV/UV-friendly +Use grok_atoUV() instead. strtol() or strtoul() (or their IV/UV-friendly macro disguises, Strtol() and Strtoul(), or Atol() and Atoul() are affected by locale, which is bad. diff --git a/pod/perllocale.pod b/pod/perllocale.pod index fdf524f956..15e91814cd 100644 --- a/pod/perllocale.pod +++ b/pod/perllocale.pod @@ -556,7 +556,7 @@ The two quickest fixes are either to render Perl silent about any locale inconsistencies or to run Perl under the default locale "C". Perl's moaning about locale problems can be silenced by setting the -environment variable C<PERL_BADLANG> to a zero value, for example "0". +environment variable C<PERL_BADLANG> to "0" or "". This method really just sweeps the problem under the carpet: you tell Perl to shut up even when Perl sees that something is wrong. Do not be surprised if later something locale-dependent misbehaves. @@ -1196,9 +1196,8 @@ A string that can suppress Perl's warning about failed locale settings at startup. Failure can occur if the locale support in the operating system is lacking (broken) in some way--or if you mistyped the name of a locale when you set up your environment. If this environment -variable is absent, or has a value that does not evaluate to integer -zero--that is, "0" or ""-- Perl will complain about locale setting -failures. +variable is absent, or has a value other than "0" or "", Perl will +complain about locale setting failures. B<NOTE>: C<PERL_BADLANG> only gives you a way to hide the warning message. The message tells about some problem in your system's locale support, @@ -3340,6 +3340,7 @@ PP(pp_fttty) GV *gv; char *name = NULL; STRLEN namelen; + UV uv; tryAMAGICftest_MG('t'); @@ -3355,8 +3356,8 @@ PP(pp_fttty) if (GvIO(gv) && IoIFP(GvIOp(gv))) fd = PerlIO_fileno(IoIFP(GvIOp(gv))); - else if (name && isDIGIT(*name)) - fd = grok_atou(name, NULL); + else if (name && isDIGIT(*name) && grok_atoUV(name, &uv, NULL) && uv <= PERL_INT_MAX) + fd = (int)uv; else FT_RETURNUNDEF; if (fd < 0) { @@ -1340,10 +1340,11 @@ PERL_CALLCONV int Perl_getcwd_sv(pTHX_ SV* sv) PERL_CALLCONV void Perl_gp_free(pTHX_ GV* gv); PERL_CALLCONV GP* Perl_gp_ref(pTHX_ GP* gp); -PERL_CALLCONV UV Perl_grok_atou(const char* pv, const char** endptr) - __attribute__nonnull__(1); -#define PERL_ARGS_ASSERT_GROK_ATOU \ - assert(pv) +PERL_CALLCONV bool Perl_grok_atoUV(const char* pv, UV* valptr, const char** endptr) + __attribute__nonnull__(1) + __attribute__nonnull__(2); +#define PERL_ARGS_ASSERT_GROK_ATOUV \ + assert(pv); assert(valptr) PERL_CALLCONV UV Perl_grok_bin(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result) __attribute__nonnull__(pTHX_1) @@ -10121,10 +10121,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; is_neg = TRUE; } - unum = grok_atou(RExC_parse, &endptr); - num = (unum > I32_MAX) ? I32_MAX : (I32)unum; - if (endptr) - RExC_parse = (char*)endptr; + if (grok_atoUV(RExC_parse, &unum, &endptr) + && unum <= I32_MAX + ) { + num = (I32)unum; + RExC_parse = (char*)endptr; + } else + num = I32_MAX; if (is_neg) { /* Some limit for num? */ num = -num; @@ -10308,9 +10311,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; parno = 0; if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) { - parno = grok_atou(RExC_parse, &endptr); - if (endptr) + UV uv; + if (grok_atoUV(RExC_parse, &uv, &endptr) + && uv <= I32_MAX + ) { + parno = (I32)uv; RExC_parse = (char*)endptr; + } + /* XXX else what? */ } else if (RExC_parse[0] == '&') { SV *sv_dat; RExC_parse++; @@ -10327,9 +10335,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) /* (?(1)...) */ char c; char *tmp; - parno = grok_atou(RExC_parse, &endptr); - if (endptr) - RExC_parse = (char*)endptr; + UV uv; + if (grok_atoUV(RExC_parse, &uv, &endptr) + && uv <= I32_MAX + ) { + parno = (I32)uv; + RExC_parse = (char*)endptr; + } + /* XXX else what? */ ret = reganode(pRExC_state, GROUPP, parno); insert_if_check_paren: @@ -10815,8 +10828,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) maxpos = next; RExC_parse++; if (isDIGIT(*RExC_parse)) { - uv = grok_atou(RExC_parse, &endptr); - if (!endptr) + if (!grok_atoUV(RExC_parse, &uv, &endptr)) vFAIL("Invalid quantifier in {,}"); if (uv >= REG_INFTY) vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1); @@ -10829,8 +10841,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) else maxpos = RExC_parse; if (isDIGIT(*maxpos)) { - uv = grok_atou(maxpos, &endptr); - if (!endptr) + if (!grok_atoUV(maxpos, &uv, &endptr)) vFAIL("Invalid quantifier in {,}"); if (uv >= REG_INFTY) vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1); @@ -11531,10 +11542,10 @@ static I32 S_backref_value(char *p) { const char* endptr; - UV val = grok_atou(p, &endptr); - if (endptr == p || endptr == NULL || val > I32_MAX) - return I32_MAX; - return (I32)val; + UV val; + if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX) + return (I32)val; + return I32_MAX; } @@ -1649,6 +1649,7 @@ S_incline(pTHX_ const char *s) const char *n; const char *e; line_t line_num; + UV uv; PERL_ARGS_ASSERT_INCLINE; @@ -1698,7 +1699,9 @@ S_incline(pTHX_ const char *s) if (*e != '\n' && *e != '\0') return; /* false alarm */ - line_num = grok_atou(n, &e) - 1; + if (!grok_atoUV(n, &uv, &e)) + return; + line_num = ((line_t)uv) - 1; if (t - s > 0) { const STRLEN len = t - s; @@ -3549,7 +3549,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) /* The first number is a count of the rest */ l++; - elements = grok_atou((const char *)l, &after_atou); + if (!grok_atoUV((const char *)l, &elements, &after_atou)) { + Perl_croak(aTHX_ "panic: Expecting a valid count of elements at start of inversion list"); + } if (elements == 0) { invlist = _new_invlist(0); } @@ -3559,7 +3561,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) /* Get the 0th element, which is needed to setup the inversion list */ while (isSPACE(*l)) l++; - element0 = (UV) grok_atou((const char *)l, &after_atou); + if (!grok_atoUV((const char *)l, &element0, &after_atou)) { + Perl_croak(aTHX_ "panic: Expecting a valid 0th element for inversion list"); + } l = (U8 *) after_atou; invlist = _setup_canned_invlist(elements, element0, &other_elements_ptr); elements--; @@ -3570,7 +3574,9 @@ Perl__swash_to_invlist(pTHX_ SV* const swash) Perl_croak(aTHX_ "panic: Expecting %"UVuf" more elements than available", elements); } while (isSPACE(*l)) l++; - *other_elements_ptr++ = (UV) grok_atou((const char *)l, &after_atou); + if (!grok_atoUV((const char *)l, other_elements_ptr++, &after_atou)) { + Perl_croak(aTHX_ "panic: Expecting a valid element in inversion list"); + } l = (U8 *) after_atou; } } @@ -1375,11 +1375,13 @@ Perl_mess_sv(pTHX_ SV *basemsg, bool consume) #if defined(USE_C_BACKTRACE) && defined(USE_C_BACKTRACE_ON_ERROR) { char *ws; - int wi; + UV wi; /* The PERL_C_BACKTRACE_ON_WARN must be an integer of one or more. */ - if ((ws = PerlEnv_getenv("PERL_C_BACKTRACE_ON_ERROR")) && - (wi = grok_atou(ws, NULL)) > 0) { - Perl_dump_c_backtrace(aTHX_ Perl_debug_log, wi, 1); + if ((ws = PerlEnv_getenv("PERL_C_BACKTRACE_ON_ERROR")) + && grok_atoUV(ws, &wi, NULL) + && wi <= PERL_INT_MAX + ) { + Perl_dump_c_backtrace(aTHX_ Perl_debug_log, (int)wi, 1); } } #endif @@ -4420,15 +4422,20 @@ Perl_parse_unicode_opts(pTHX_ const char **popt) if (*p) { if (isDIGIT(*p)) { const char* endptr; - opt = (U32) grok_atou(p, &endptr); - p = endptr; - if (*p && *p != '\n' && *p != '\r') { - if(isSPACE(*p)) goto the_end_of_the_opts_parser; - else - Perl_croak(aTHX_ "Unknown Unicode option letter '%c'", *p); - } - } - else { + UV uv; + if (grok_atoUV(p, &uv, &endptr) + && uv <= U32_MAX + && (p = endptr) + && *p && *p != '\n' && *p != '\r' + ) { + opt = (U32)uv; + if (isSPACE(*p)) + goto the_end_of_the_opts_parser; + else + Perl_croak(aTHX_ "Unknown Unicode option letter '%c'", *p); + } + } + else { for (; *p; p++) { switch (*p) { case PERL_UNICODE_STDIN: @@ -4729,14 +4736,14 @@ Perl_free_global_struct(pTHX_ struct perl_vars *plvarsp) #ifdef PERL_MEM_LOG -/* -DPERL_MEM_LOG: the Perl_mem_log_..() is compiled, including the +/* -DPERL_MEM_LOG: the Perl_mem_log_..() is compiled, including * the default implementation, unless -DPERL_MEM_LOG_NOIMPL is also * given, and you supply your own implementation. * * The default implementation reads a single env var, PERL_MEM_LOG, * expecting one or more of the following: * - * \d+ - fd fd to write to : must be 1st (grok_atou) + * \d+ - fd fd to write to : must be 1st (grok_atoUV) * 'm' - memlog was PERL_MEM_LOG=1 * 's' - svlog was PERL_SV_LOG=1 * 't' - timestamp was PERL_MEM_LOG_TIMESTAMP=1 @@ -4805,9 +4812,15 @@ S_mem_log_common(enum mem_log_type mlt, const UV n, { STRLEN len; const char* endptr; - int fd = grok_atou(pmlenv, &endptr); /* Ignore endptr. */ - if (!fd) + int fd; + UV uv; + if (grok_atoUV(pmlenv, &uv, &endptr) /* Ignore endptr. */ + && uv && uv <= PERL_INT_MAX + ) { + fd = (int)uv; + } else { fd = PERL_MEM_LOG_FD; + } if (strchr(pmlenv, 't')) { len = my_snprintf(buf, sizeof(buf), @@ -6008,6 +6021,8 @@ static const char* atos_parse(const char* p, const char* source_name_end; const char* source_line_end; const char* close_paren; + UV uv; + /* Skip trailing whitespace. */ while (p > start && isspace(*p)) p--; /* Now we should be at the close paren. */ @@ -6034,10 +6049,14 @@ static const char* atos_parse(const char* p, return NULL; p++; *source_name_size = source_name_end - p; - *source_line = grok_atou(source_number_start, &source_line_end); - if (source_line_end != close_paren) - return NULL; - return p; + if (grok_atoUV(source_number_start, &uv, &source_line_end) + && source_line_end == close_paren + && uv <= MAX_STRLEN + ) { + *source_line = (STRLEN)uv; + return p; + } + return NULL; } /* Given a raw frame, read a pipe from the symbolicator (that's the |