diff options
-rw-r--r-- | embed.h | 16 | ||||
-rwxr-xr-x | embed.pl | 4 | ||||
-rw-r--r-- | global.sym | 2 | ||||
-rw-r--r-- | objXSUB.h | 8 | ||||
-rw-r--r-- | perl.h | 10 | ||||
-rw-r--r-- | perlapi.c | 16 | ||||
-rw-r--r-- | pod/perlapi.pod | 36 | ||||
-rw-r--r-- | proto.h | 4 | ||||
-rw-r--r-- | sv.c | 189 | ||||
-rw-r--r-- | util.c | 211 |
10 files changed, 278 insertions, 218 deletions
@@ -343,6 +343,8 @@ #define vload_module Perl_vload_module #define localize Perl_localize #define looks_like_number Perl_looks_like_number +#define grok_number Perl_grok_number +#define grok_numeric_radix Perl_grok_numeric_radix #define magic_clearenv Perl_magic_clearenv #define magic_clear_all_env Perl_magic_clear_all_env #define magic_clearpack Perl_magic_clearpack @@ -1114,8 +1116,6 @@ # if defined(USE_ITHREADS) #define gv_share S_gv_share # endif -#define grok_number S_grok_number -#define grok_numeric_radix S_grok_numeric_radix #endif #if defined(PERL_IN_TOKE_C) || defined(PERL_DECL_PROT) #define check_uni S_check_uni @@ -1849,6 +1849,8 @@ #define vload_module(a,b,c,d) Perl_vload_module(aTHX_ a,b,c,d) #define localize(a,b) Perl_localize(aTHX_ a,b) #define looks_like_number(a) Perl_looks_like_number(aTHX_ a) +#define grok_number(a,b,c) Perl_grok_number(aTHX_ a,b,c) +#define grok_numeric_radix(a,b) Perl_grok_numeric_radix(aTHX_ a,b) #define magic_clearenv(a,b) Perl_magic_clearenv(aTHX_ a,b) #define magic_clear_all_env(a,b) Perl_magic_clear_all_env(aTHX_ a,b) #define magic_clearpack(a,b) Perl_magic_clearpack(aTHX_ a,b) @@ -2609,8 +2611,6 @@ # if defined(USE_ITHREADS) #define gv_share(a) S_gv_share(aTHX_ a) # endif -#define grok_number(a,b,c) S_grok_number(aTHX_ a,b,c) -#define grok_numeric_radix(a,b) S_grok_numeric_radix(aTHX_ a,b) #endif #if defined(PERL_IN_TOKE_C) || defined(PERL_DECL_PROT) #define check_uni() S_check_uni(aTHX) @@ -3630,6 +3630,10 @@ #define localize Perl_localize #define Perl_looks_like_number CPerlObj::Perl_looks_like_number #define looks_like_number Perl_looks_like_number +#define Perl_grok_number CPerlObj::Perl_grok_number +#define grok_number Perl_grok_number +#define Perl_grok_numeric_radix CPerlObj::Perl_grok_numeric_radix +#define grok_numeric_radix Perl_grok_numeric_radix #define Perl_magic_clearenv CPerlObj::Perl_magic_clearenv #define magic_clearenv Perl_magic_clearenv #define Perl_magic_clear_all_env CPerlObj::Perl_magic_clear_all_env @@ -5083,10 +5087,6 @@ #define S_gv_share CPerlObj::S_gv_share #define gv_share S_gv_share # endif -#define S_grok_number CPerlObj::S_grok_number -#define grok_number S_grok_number -#define S_grok_numeric_radix CPerlObj::S_grok_numeric_radix -#define grok_numeric_radix S_grok_numeric_radix #endif #if defined(PERL_IN_TOKE_C) || defined(PERL_DECL_PROT) #define S_check_uni CPerlObj::S_check_uni @@ -1682,6 +1682,8 @@ Apd |void |load_module|U32 flags|SV* name|SV* ver|... Ap |void |vload_module|U32 flags|SV* name|SV* ver|va_list* args p |OP* |localize |OP* arg|I32 lexical Apd |I32 |looks_like_number|SV* sv +Apd |int |grok_number |const char *pv|STRLEN len|UV *valuep +Apd |bool |grok_numeric_radix|const char **sp|const char *send p |int |magic_clearenv |SV* sv|MAGIC* mg p |int |magic_clear_all_env|SV* sv|MAGIC* mg p |int |magic_clearpack|SV* sv|MAGIC* mg @@ -2521,8 +2523,6 @@ s |I32 |expect_number |char** pattern # if defined(USE_ITHREADS) s |SV* |gv_share |SV *sv # endif -s |int |grok_number |const char *pv|STRLEN len|UV *valuep -s |bool |grok_numeric_radix|const char **sp|const char *send #endif #if defined(PERL_IN_TOKE_C) || defined(PERL_DECL_PROT) diff --git a/global.sym b/global.sym index 14d9ef0376..ef481fa28b 100644 --- a/global.sym +++ b/global.sym @@ -211,6 +211,8 @@ Perl_op_null Perl_load_module Perl_vload_module Perl_looks_like_number +Perl_grok_number +Perl_grok_numeric_radix Perl_markstack_grow Perl_mess Perl_vmess @@ -811,6 +811,14 @@ #define Perl_looks_like_number pPerl->Perl_looks_like_number #undef looks_like_number #define looks_like_number Perl_looks_like_number +#undef Perl_grok_number +#define Perl_grok_number pPerl->Perl_grok_number +#undef grok_number +#define grok_number Perl_grok_number +#undef Perl_grok_numeric_radix +#define Perl_grok_numeric_radix pPerl->Perl_grok_numeric_radix +#undef grok_numeric_radix +#define grok_numeric_radix Perl_grok_numeric_radix #if defined(USE_THREADS) #endif #if defined(USE_LOCALE_COLLATE) @@ -3726,6 +3726,16 @@ int flock(int fd, int op); #define EXEC_ARGV_CAST(x) x #endif +#define IS_NUMBER_IN_UV 0x01 /* number within UV range (maybe not + int). value returned in pointed- + to UV */ +#define IS_NUMBER_GREATER_THAN_UV_MAX 0x02 /* pointed to UV undefined */ +#define IS_NUMBER_NOT_INT 0x04 /* saw . or E notation */ +#define IS_NUMBER_NEG 0x08 /* leading minus sign */ +#define IS_NUMBER_INFINITY 0x10 /* this is big */ + +#define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) + /* and finally... */ #define PERL_PATCHLEVEL_H_IMPLICIT #include "patchlevel.h" @@ -1505,6 +1505,20 @@ Perl_looks_like_number(pTHXo_ SV* sv) { return ((CPerlObj*)pPerl)->Perl_looks_like_number(sv); } + +#undef Perl_grok_number +int +Perl_grok_number(pTHXo_ const char *pv, STRLEN len, UV *valuep) +{ + return ((CPerlObj*)pPerl)->Perl_grok_number(pv, len, valuep); +} + +#undef Perl_grok_numeric_radix +bool +Perl_grok_numeric_radix(pTHXo_ const char **sp, const char *send) +{ + return ((CPerlObj*)pPerl)->Perl_grok_numeric_radix(sp, send); +} #if defined(USE_THREADS) #endif #if defined(USE_LOCALE_COLLATE) @@ -1596,7 +1610,7 @@ Perl_mg_set(pTHXo_ SV* sv) } #undef Perl_mg_size -I32 +IV Perl_mg_size(pTHXo_ SV* sv) { return ((CPerlObj*)pPerl)->Perl_mg_size(sv); diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 5f0a584e58..84e2dc721f 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -519,6 +519,28 @@ respectively. =for hackers Found in file op.h +=item grok_number + +Recognise (or not) a number. The type of the number is returned +(0 if unrecognised), otherwise it is a bit-ORed combination of +IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT, +IS_NUMBER_NEG, IS_NUMBER_INFINITY (defined in perl.h). If the value +of the number can fit an in UV, it is returned in the *valuep. + + int grok_number(const char *pv, STRLEN len, UV *valuep) + +=for hackers +Found in file util.c + +=item grok_numeric_radix + +Scan and skip for a numeric decimal separator (radix). + + bool grok_numeric_radix(const char **sp, const char *send) + +=for hackers +Found in file util.c + =item GvSV Return the SV from the GV. @@ -2408,19 +2430,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B<sv.h> -in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. +Returns the type of the SV. See C<svtype>. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C<svtype>. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B<sv.h> +in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. =for hackers Found in file sv.h @@ -405,6 +405,8 @@ PERL_CALLCONV void Perl_load_module(pTHX_ U32 flags, SV* name, SV* ver, ...); PERL_CALLCONV void Perl_vload_module(pTHX_ U32 flags, SV* name, SV* ver, va_list* args); PERL_CALLCONV OP* Perl_localize(pTHX_ OP* arg, I32 lexical); PERL_CALLCONV I32 Perl_looks_like_number(pTHX_ SV* sv); +PERL_CALLCONV int Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep); +PERL_CALLCONV bool Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send); PERL_CALLCONV int Perl_magic_clearenv(pTHX_ SV* sv, MAGIC* mg); PERL_CALLCONV int Perl_magic_clear_all_env(pTHX_ SV* sv, MAGIC* mg); PERL_CALLCONV int Perl_magic_clearpack(pTHX_ SV* sv, MAGIC* mg); @@ -1245,8 +1247,6 @@ STATIC I32 S_expect_number(pTHX_ char** pattern); # if defined(USE_ITHREADS) STATIC SV* S_gv_share(pTHX_ SV *sv); # endif -STATIC int S_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep); -STATIC int S_grok_numeric_radix(pTHX_ const char **sp, const char *send); #endif #if defined(PERL_IN_TOKE_C) || defined(PERL_DECL_PROT) @@ -1486,195 +1486,6 @@ S_not_a_number(pTHX_ SV *sv) "Argument \"%s\" isn't numeric", tmpbuf); } -#define IS_NUMBER_IN_UV 0x01 /* number within UV range (maybe not - int). value returned in pointed- - to UV */ -#define IS_NUMBER_GREATER_THAN_UV_MAX 0x02 /* pointed to UV undefined */ -#define IS_NUMBER_NOT_INT 0x04 /* saw . or E notation */ -#define IS_NUMBER_NEG 0x08 /* leading minus sign */ -#define IS_NUMBER_INFINITY 0x10 /* this is big */ - -static bool -S_grok_numeric_radix(pTHX_ const char **sp, const char *send) -{ -#ifdef USE_LOCALE_NUMERIC - if (PL_numeric_radix_sv && IN_LOCALE) { - STRLEN len; - char* radix = SvPV(PL_numeric_radix_sv, len); - if (*sp + len <= send && memEQ(*sp, radix, len)) { - *sp += len; - return TRUE; - } - } - /* always try "." if numeric radix didn't match because - * we may have data from different locales mixed */ -#endif - if (*sp < send && **sp == '.') { - ++*sp; - return TRUE; - } - return FALSE; -} - -#define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) - -static int -S_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) -{ - const char *s = pv; - const char *send = pv + len; - const UV max_div_10 = UV_MAX / 10; - const char max_mod_10 = UV_MAX % 10 + '0'; - int numtype = 0; - int sawinf = 0; - - while (isSPACE(*s)) - s++; - if (*s == '-') { - s++; - numtype = IS_NUMBER_NEG; - } - else if (*s == '+') - s++; - - /* next must be digit or the radix separator or beginning of infinity */ - if (isDIGIT(*s)) { - /* UVs are at least 32 bits, so the first 9 decimal digits cannot - overflow. */ - UV value = *s - '0'; - /* This construction seems to be more optimiser friendly. - (without it gcc does the isDIGIT test and the *s - '0' separately) - With it gcc on arm is managing 6 instructions (6 cycles) per digit. - In theory the optimiser could deduce how far to unroll the loop - before checking for overflow. */ - int digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - digit = *++s - '0'; - if (digit >= 0 && digit <= 9) { - value = value * 10 + digit; - /* Now got 9 digits, so need to check - each time for overflow. */ - digit = *++s - '0'; - while (digit >= 0 && digit <= 9 - && (value < max_div_10 - || (value == max_div_10 - && *s <= max_mod_10))) { - value = value * 10 + digit; - digit = *++s - '0'; - } - if (digit >= 0 && digit <= 9) { - /* value overflowed. - skip the remaining digits, don't - worry about setting *valuep. */ - do { - s++; - } while (isDIGIT(*s)); - numtype |= - IS_NUMBER_GREATER_THAN_UV_MAX; - goto skip_value; - } - } - } - } - } - } - } - } - } - numtype |= IS_NUMBER_IN_UV; - if (valuep) - *valuep = value; - - skip_value: - if (GROK_NUMERIC_RADIX(&s, send)) { - numtype |= IS_NUMBER_NOT_INT; - while (isDIGIT(*s)) /* optional digits after the radix */ - s++; - } - } - else if (GROK_NUMERIC_RADIX(&s, send)) { - numtype |= IS_NUMBER_NOT_INT; - /* no digits before the radix means we need digits after it */ - if (isDIGIT(*s)) { - do { - s++; - } while (isDIGIT(*s)); - numtype |= IS_NUMBER_IN_UV; - if (valuep) { - /* integer approximation is valid - it's 0. */ - *valuep = 0; - } - } - else - return 0; - } - else if (*s == 'I' || *s == 'i') { - s++; if (*s != 'N' && *s != 'n') return 0; - s++; if (*s != 'F' && *s != 'f') return 0; - s++; if (*s == 'I' || *s == 'i') { - s++; if (*s != 'N' && *s != 'n') return 0; - s++; if (*s != 'I' && *s != 'i') return 0; - s++; if (*s != 'T' && *s != 't') return 0; - s++; if (*s != 'Y' && *s != 'y') return 0; - s++; - } - sawinf = 1; - } - else /* Add test for NaN here. */ - return 0; - - if (sawinf) { - numtype &= IS_NUMBER_NEG; /* Keep track of sign */ - numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; - } else { - /* we can have an optional exponent part */ - if (*s == 'e' || *s == 'E') { - /* The only flag we keep is sign. Blow away any "it's UV" */ - numtype &= IS_NUMBER_NEG; - numtype |= IS_NUMBER_NOT_INT; - s++; - if (*s == '-' || *s == '+') - s++; - if (isDIGIT(*s)) { - do { - s++; - } while (isDIGIT(*s)); - } - else - return 0; - } - } - while (isSPACE(*s)) - s++; - if (s >= send) - return numtype; - if (len == 10 && memEQ(pv, "0 but true", 10)) { - if (valuep) - *valuep = 0; - return IS_NUMBER_IN_UV; - } - return 0; -} - /* =for apidoc looks_like_number @@ -4018,6 +4018,8 @@ Perl_my_atof(pTHX_ const char* s) if (PL_numeric_local && IN_LOCALE) { NV y; + /* Scan the number twice; once using locale and once without; + * choose the larger result (in absolute value). */ Perl_atof2(aTHX_ s, &x); SET_NUMERIC_STANDARD(); Perl_atof2(aTHX_ s, &y); @@ -4057,14 +4059,210 @@ S_mulexp10(NV value, I32 exponent) return negative ? value / result : value * result; } +/* +=for apidoc grok_numeric_radix + +Scan and skip for a numeric decimal separator (radix). + +=cut + */ +bool +Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send) +{ +#ifdef USE_LOCALE_NUMERIC + if (PL_numeric_radix_sv && IN_LOCALE) { + STRLEN len; + char* radix = SvPV(PL_numeric_radix_sv, len); + if (*sp + len <= send && memEQ(*sp, radix, len)) { + *sp += len; + return TRUE; + } + } + /* always try "." if numeric radix didn't match because + * we may have data from different locales mixed */ +#endif + if (*sp < send && **sp == '.') { + ++*sp; + return TRUE; + } + return FALSE; +} + +/* +=for apidoc grok_number + +Recognise (or not) a number. The type of the number is returned +(0 if unrecognised), otherwise it is a bit-ORed combination of +IS_NUMBER_IN_UV, IS_NUMBER_GREATER_THAN_UV_MAX, IS_NUMBER_NOT_INT, +IS_NUMBER_NEG, IS_NUMBER_INFINITY (defined in perl.h). If the value +of the number can fit an in UV, it is returned in the *valuep. + +=cut + */ +int +Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) +{ + const char *s = pv; + const char *send = pv + len; + const UV max_div_10 = UV_MAX / 10; + const char max_mod_10 = UV_MAX % 10 + '0'; + int numtype = 0; + int sawinf = 0; + + while (isSPACE(*s)) + s++; + if (*s == '-') { + s++; + numtype = IS_NUMBER_NEG; + } + else if (*s == '+') + s++; + + /* next must be digit or the radix separator or beginning of infinity */ + if (isDIGIT(*s)) { + /* UVs are at least 32 bits, so the first 9 decimal digits cannot + overflow. */ + UV value = *s - '0'; + /* This construction seems to be more optimiser friendly. + (without it gcc does the isDIGIT test and the *s - '0' separately) + With it gcc on arm is managing 6 instructions (6 cycles) per digit. + In theory the optimiser could deduce how far to unroll the loop + before checking for overflow. */ + int digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + digit = *++s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + /* Now got 9 digits, so need to check + each time for overflow. */ + digit = *++s - '0'; + while (digit >= 0 && digit <= 9 + && (value < max_div_10 + || (value == max_div_10 + && *s <= max_mod_10))) { + value = value * 10 + digit; + digit = *++s - '0'; + } + if (digit >= 0 && digit <= 9) { + /* value overflowed. + skip the remaining digits, don't + worry about setting *valuep. */ + do { + s++; + } while (isDIGIT(*s)); + numtype |= + IS_NUMBER_GREATER_THAN_UV_MAX; + goto skip_value; + } + } + } + } + } + } + } + } + } + numtype |= IS_NUMBER_IN_UV; + if (valuep) + *valuep = value; + + skip_value: + if (GROK_NUMERIC_RADIX(&s, send)) { + numtype |= IS_NUMBER_NOT_INT; + while (isDIGIT(*s)) /* optional digits after the radix */ + s++; + } + } + else if (GROK_NUMERIC_RADIX(&s, send)) { + numtype |= IS_NUMBER_NOT_INT; + /* no digits before the radix means we need digits after it */ + if (isDIGIT(*s)) { + do { + s++; + } while (isDIGIT(*s)); + numtype |= IS_NUMBER_IN_UV; + if (valuep) { + /* integer approximation is valid - it's 0. */ + *valuep = 0; + } + } + else + return 0; + } + else if (*s == 'I' || *s == 'i') { + s++; if (*s != 'N' && *s != 'n') return 0; + s++; if (*s != 'F' && *s != 'f') return 0; + s++; if (*s == 'I' || *s == 'i') { + s++; if (*s != 'N' && *s != 'n') return 0; + s++; if (*s != 'I' && *s != 'i') return 0; + s++; if (*s != 'T' && *s != 't') return 0; + s++; if (*s != 'Y' && *s != 'y') return 0; + s++; + } + sawinf = 1; + } + else /* Add test for NaN here. */ + return 0; + + if (sawinf) { + numtype &= IS_NUMBER_NEG; /* Keep track of sign */ + numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; + } else { + /* we can have an optional exponent part */ + if (*s == 'e' || *s == 'E') { + /* The only flag we keep is sign. Blow away any "it's UV" */ + numtype &= IS_NUMBER_NEG; + numtype |= IS_NUMBER_NOT_INT; + s++; + if (*s == '-' || *s == '+') + s++; + if (isDIGIT(*s)) { + do { + s++; + } while (isDIGIT(*s)); + } + else + return 0; + } + } + while (isSPACE(*s)) + s++; + if (s >= send) + return numtype; + if (len == 10 && memEQ(pv, "0 but true", 10)) { + if (valuep) + *valuep = 0; + return IS_NUMBER_IN_UV; + } + return 0; +} + char* Perl_my_atof2(pTHX_ const char* orig, NV* value) { NV result = 0.0; bool negative = 0; char* s = (char*)orig; - char* point = "."; /* locale-dependent decimal point equivalent */ - STRLEN pointlen = 1; + char* send = s + strlen(orig) - 1; bool seendigit = 0; I32 expextra = 0; I32 exponent = 0; @@ -4082,11 +4280,6 @@ Perl_my_atof2(pTHX_ const char* orig, NV* value) I32 ipart = 0; /* index into part[] */ I32 offcount; /* number of digits in least significant part */ -#ifdef USE_LOCALE_NUMERIC - if (PL_numeric_radix_sv && IN_LOCALE) - point = SvPV(PL_numeric_radix_sv, pointlen); -#endif - /* sign */ switch (*s) { case '-': @@ -4130,8 +4323,7 @@ Perl_my_atof2(pTHX_ const char* orig, NV* value) } /* decimal point */ - if (memEQ(s, point, pointlen)) { - s += pointlen; + if (GROK_NUMERIC_RADIX((const char **)&s, send)) { if (isDIGIT(*s)) seendigit = 1; /* get this over with */ @@ -4893,3 +5085,4 @@ Perl_sv_realpath(pTHX_ SV *sv, char *path, STRLEN len) return FALSE; #endif } + |