diff options
author | Nicholas Clark <nick@ccl4.org> | 2009-09-26 18:36:33 +0100 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2009-09-26 18:36:33 +0100 |
commit | b286197009ac308fc656630edad2ba5390660499 (patch) | |
tree | 6758c3b08a84ffb260bcab5473698652f73049ec /cpan/Devel-PPPort/parts/inc/grok | |
parent | bd051ffa533cd11a03353dc0bf0c294bc68598f6 (diff) | |
download | perl-b286197009ac308fc656630edad2ba5390660499.tar.gz |
Move Devel::PPPort from ext/ to cpan/
Diffstat (limited to 'cpan/Devel-PPPort/parts/inc/grok')
-rw-r--r-- | cpan/Devel-PPPort/parts/inc/grok | 677 |
1 files changed, 677 insertions, 0 deletions
diff --git a/cpan/Devel-PPPort/parts/inc/grok b/cpan/Devel-PPPort/parts/inc/grok new file mode 100644 index 0000000000..5e32f8e990 --- /dev/null +++ b/cpan/Devel-PPPort/parts/inc/grok @@ -0,0 +1,677 @@ +################################################################################ +## +## $Revision: 16 $ +## $Author: mhx $ +## $Date: 2009/01/18 14:10:55 +0100 $ +## +################################################################################ +## +## Version 3.x, Copyright (C) 2004-2009, Marcus Holland-Moritz. +## Version 2.x, Copyright (C) 2001, Paul Marquess. +## Version 1.x, Copyright (C) 1999, Kenneth Albanowski. +## +## This program is free software; you can redistribute it and/or +## modify it under the same terms as Perl itself. +## +################################################################################ + +=provides + +grok_hex +grok_oct +grok_bin +grok_numeric_radix +grok_number +__UNDEFINED__ + +=implementation + +__UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling) +__UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE) +__UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE) +__UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME) + +__UNDEFINED__ IS_NUMBER_IN_UV 0x01 +__UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02 +__UNDEFINED__ IS_NUMBER_NOT_INT 0x04 +__UNDEFINED__ IS_NUMBER_NEG 0x08 +__UNDEFINED__ IS_NUMBER_INFINITY 0x10 +__UNDEFINED__ IS_NUMBER_NAN 0x20 + +__UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send) + +__UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02 +__UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04 +__UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01 +__UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02 + +#ifndef grok_numeric_radix +#if { NEED grok_numeric_radix } +bool +grok_numeric_radix(pTHX_ const char **sp, const char *send) +{ +#ifdef USE_LOCALE_NUMERIC +#ifdef PL_numeric_radix_sv + if (PL_numeric_radix_sv && IN_LOCALE) { + STRLEN len; + char* radix = SvPV(PL_numeric_radix_sv, len); + if (*sp + len <= send && memEQ(*sp, radix, len)) { + *sp += len; + return TRUE; + } + } +#else + /* older perls don't have PL_numeric_radix_sv so the radix + * must manually be requested from locale.h + */ +#include <locale.h> + dTHR; /* needed for older threaded perls */ + struct lconv *lc = localeconv(); + char *radix = lc->decimal_point; + if (radix && IN_LOCALE) { + STRLEN len = strlen(radix); + if (*sp + len <= send && memEQ(*sp, radix, len)) { + *sp += len; + return TRUE; + } + } +#endif +#endif /* USE_LOCALE_NUMERIC */ + /* always try "." if numeric radix didn't match because + * we may have data from different locales mixed */ + if (*sp < send && **sp == '.') { + ++*sp; + return TRUE; + } + return FALSE; +} +#endif +#endif + +#ifndef grok_number +#if { NEED grok_number } +int +grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) +{ + const char *s = pv; + const char *send = pv + len; + const UV max_div_10 = UV_MAX / 10; + const char max_mod_10 = UV_MAX % 10; + int numtype = 0; + int sawinf = 0; + int sawnan = 0; + + while (s < send && isSPACE(*s)) + s++; + if (s == send) { + return 0; + } else if (*s == '-') { + s++; + numtype = IS_NUMBER_NEG; + } + else if (*s == '+') + s++; + + if (s == send) + return 0; + + /* next must be digit or the radix separator or beginning of infinity */ + if (isDIGIT(*s)) { + /* UVs are at least 32 bits, so the first 9 decimal digits cannot + overflow. */ + UV value = *s - '0'; + /* This construction seems to be more optimiser friendly. + (without it gcc does the isDIGIT test and the *s - '0' separately) + With it gcc on arm is managing 6 instructions (6 cycles) per digit. + In theory the optimiser could deduce how far to unroll the loop + before checking for overflow. */ + if (++s < send) { + int digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + digit = *s - '0'; + if (digit >= 0 && digit <= 9) { + value = value * 10 + digit; + if (++s < send) { + /* Now got 9 digits, so need to check + each time for overflow. */ + digit = *s - '0'; + while (digit >= 0 && digit <= 9 + && (value < max_div_10 + || (value == max_div_10 + && digit <= max_mod_10))) { + value = value * 10 + digit; + if (++s < send) + digit = *s - '0'; + else + break; + } + if (digit >= 0 && digit <= 9 + && (s < send)) { + /* value overflowed. + skip the remaining digits, don't + worry about setting *valuep. */ + do { + s++; + } while (s < send && isDIGIT(*s)); + numtype |= + IS_NUMBER_GREATER_THAN_UV_MAX; + goto skip_value; + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + numtype |= IS_NUMBER_IN_UV; + if (valuep) + *valuep = value; + + skip_value: + if (GROK_NUMERIC_RADIX(&s, send)) { + numtype |= IS_NUMBER_NOT_INT; + while (s < send && isDIGIT(*s)) /* optional digits after the radix */ + s++; + } + } + else if (GROK_NUMERIC_RADIX(&s, send)) { + numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ + /* no digits before the radix means we need digits after it */ + if (s < send && isDIGIT(*s)) { + do { + s++; + } while (s < send && isDIGIT(*s)); + if (valuep) { + /* integer approximation is valid - it's 0. */ + *valuep = 0; + } + } + else + return 0; + } else if (*s == 'I' || *s == 'i') { + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; + s++; if (s < send && (*s == 'I' || *s == 'i')) { + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; + s++; if (s == send || (*s != 'T' && *s != 't')) return 0; + s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; + s++; + } + sawinf = 1; + } else if (*s == 'N' || *s == 'n') { + /* XXX TODO: There are signaling NaNs and quiet NaNs. */ + s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; + sawnan = 1; + } else + return 0; + + if (sawinf) { + numtype &= IS_NUMBER_NEG; /* Keep track of sign */ + numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; + } else if (sawnan) { + numtype &= IS_NUMBER_NEG; /* Keep track of sign */ + numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; + } else if (s < send) { + /* we can have an optional exponent part */ + if (*s == 'e' || *s == 'E') { + /* The only flag we keep is sign. Blow away any "it's UV" */ + numtype &= IS_NUMBER_NEG; + numtype |= IS_NUMBER_NOT_INT; + s++; + if (s < send && (*s == '-' || *s == '+')) + s++; + if (s < send && isDIGIT(*s)) { + do { + s++; + } while (s < send && isDIGIT(*s)); + } + else + return 0; + } + } + while (s < send && isSPACE(*s)) + s++; + if (s >= send) + return numtype; + if (len == 10 && memEQ(pv, "0 but true", 10)) { + if (valuep) + *valuep = 0; + return IS_NUMBER_IN_UV; + } + return 0; +} +#endif +#endif + +/* + * The grok_* routines have been modified to use warn() instead of + * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit, + * which is why the stack variable has been renamed to 'xdigit'. + */ + +#ifndef grok_bin +#if { NEED grok_bin } +UV +grok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) +{ + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_2 = UV_MAX / 2; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + + if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { + /* strip off leading b or 0b. + for compatibility silently suffer "b" and "0b" as valid binary + numbers. */ + if (len >= 1) { + if (s[0] == 'b') { + s++; + len--; + } + else if (len >= 2 && s[0] == '0' && s[1] == 'b') { + s+=2; + len-=2; + } + } + } + + for (; len-- && *s; s++) { + char bit = *s; + if (bit == '0' || bit == '1') { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + With gcc seems to be much straighter code than old scan_bin. */ + redo: + if (!overflowed) { + if (value <= max_div_2) { + value = (value << 1) | (bit - '0'); + continue; + } + /* Bah. We're just overflowed. */ + warn("Integer overflow in binary number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 2.0; + /* If an NV has not enough bits in its mantissa to + * represent a UV this summing of small low-order numbers + * is a waste of time (because the NV cannot preserve + * the low-order bits anyway): we could just remember when + * did we overflow and in the end just multiply value_nv by the + * right amount. */ + value_nv += (NV)(bit - '0'); + continue; + } + if (bit == '_' && len && allow_underscores && (bit = s[1]) + && (bit == '0' || bit == '1')) + { + --len; + ++s; + goto redo; + } + if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) + warn("Illegal binary digit '%c' ignored", *s); + break; + } + + if ( ( overflowed && value_nv > 4294967295.0) +#if UVSIZE > 4 + || (!overflowed && value > 0xffffffff ) +#endif + ) { + warn("Binary number > 0b11111111111111111111111111111111 non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} +#endif +#endif + +#ifndef grok_hex +#if { NEED grok_hex } +UV +grok_hex(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) +{ + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_16 = UV_MAX / 16; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + const char *xdigit; + + if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { + /* strip off leading x or 0x. + for compatibility silently suffer "x" and "0x" as valid hex numbers. + */ + if (len >= 1) { + if (s[0] == 'x') { + s++; + len--; + } + else if (len >= 2 && s[0] == '0' && s[1] == 'x') { + s+=2; + len-=2; + } + } + } + + for (; len-- && *s; s++) { + xdigit = strchr((char *) PL_hexdigit, *s); + if (xdigit) { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + With gcc seems to be much straighter code than old scan_hex. */ + redo: + if (!overflowed) { + if (value <= max_div_16) { + value = (value << 4) | ((xdigit - PL_hexdigit) & 15); + continue; + } + warn("Integer overflow in hexadecimal number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 16.0; + /* If an NV has not enough bits in its mantissa to + * represent a UV this summing of small low-order numbers + * is a waste of time (because the NV cannot preserve + * the low-order bits anyway): we could just remember when + * did we overflow and in the end just multiply value_nv by the + * right amount of 16-tuples. */ + value_nv += (NV)((xdigit - PL_hexdigit) & 15); + continue; + } + if (*s == '_' && len && allow_underscores && s[1] + && (xdigit = strchr((char *) PL_hexdigit, s[1]))) + { + --len; + ++s; + goto redo; + } + if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) + warn("Illegal hexadecimal digit '%c' ignored", *s); + break; + } + + if ( ( overflowed && value_nv > 4294967295.0) +#if UVSIZE > 4 + || (!overflowed && value > 0xffffffff ) +#endif + ) { + warn("Hexadecimal number > 0xffffffff non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} +#endif +#endif + +#ifndef grok_oct +#if { NEED grok_oct } +UV +grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result) +{ + const char *s = start; + STRLEN len = *len_p; + UV value = 0; + NV value_nv = 0; + + const UV max_div_8 = UV_MAX / 8; + bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; + bool overflowed = FALSE; + + for (; len-- && *s; s++) { + /* gcc 2.95 optimiser not smart enough to figure that this subtraction + out front allows slicker code. */ + int digit = *s - '0'; + if (digit >= 0 && digit <= 7) { + /* Write it in this wonky order with a goto to attempt to get the + compiler to make the common case integer-only loop pretty tight. + */ + redo: + if (!overflowed) { + if (value <= max_div_8) { + value = (value << 3) | digit; + continue; + } + /* Bah. We're just overflowed. */ + warn("Integer overflow in octal number"); + overflowed = TRUE; + value_nv = (NV) value; + } + value_nv *= 8.0; + /* If an NV has not enough bits in its mantissa to + * represent a UV this summing of small low-order numbers + * is a waste of time (because the NV cannot preserve + * the low-order bits anyway): we could just remember when + * did we overflow and in the end just multiply value_nv by the + * right amount of 8-tuples. */ + value_nv += (NV)digit; + continue; + } + if (digit == ('_' - '0') && len && allow_underscores + && (digit = s[1] - '0') && (digit >= 0 && digit <= 7)) + { + --len; + ++s; + goto redo; + } + /* Allow \octal to work the DWIM way (that is, stop scanning + * as soon as non-octal characters are seen, complain only iff + * someone seems to want to use the digits eight and nine). */ + if (digit == 8 || digit == 9) { + if (!(*flags & PERL_SCAN_SILENT_ILLDIGIT)) + warn("Illegal octal digit '%c' ignored", *s); + } + break; + } + + if ( ( overflowed && value_nv > 4294967295.0) +#if UVSIZE > 4 + || (!overflowed && value > 0xffffffff ) +#endif + ) { + warn("Octal number > 037777777777 non-portable"); + } + *len_p = s - start; + if (!overflowed) { + *flags = 0; + return value; + } + *flags = PERL_SCAN_GREATER_THAN_UV_MAX; + if (result) + *result = value_nv; + return UV_MAX; +} +#endif +#endif + +=xsinit + +#define NEED_grok_number +#define NEED_grok_numeric_radix +#define NEED_grok_bin +#define NEED_grok_hex +#define NEED_grok_oct + +=xsubs + +UV +grok_number(string) + SV *string + PREINIT: + const char *pv; + STRLEN len; + CODE: + pv = SvPV(string, len); + if (!grok_number(pv, len, &RETVAL)) + XSRETURN_UNDEF; + OUTPUT: + RETVAL + +UV +grok_bin(string) + SV *string + PREINIT: + char *pv; + I32 flags; + STRLEN len; + CODE: + pv = SvPV(string, len); + RETVAL = grok_bin(pv, &len, &flags, NULL); + OUTPUT: + RETVAL + +UV +grok_hex(string) + SV *string + PREINIT: + char *pv; + I32 flags; + STRLEN len; + CODE: + pv = SvPV(string, len); + RETVAL = grok_hex(pv, &len, &flags, NULL); + OUTPUT: + RETVAL + +UV +grok_oct(string) + SV *string + PREINIT: + char *pv; + I32 flags; + STRLEN len; + CODE: + pv = SvPV(string, len); + RETVAL = grok_oct(pv, &len, &flags, NULL); + OUTPUT: + RETVAL + +UV +Perl_grok_number(string) + SV *string + PREINIT: + const char *pv; + STRLEN len; + CODE: + pv = SvPV(string, len); + if (!Perl_grok_number(aTHX_ pv, len, &RETVAL)) + XSRETURN_UNDEF; + OUTPUT: + RETVAL + +UV +Perl_grok_bin(string) + SV *string + PREINIT: + char *pv; + I32 flags; + STRLEN len; + CODE: + pv = SvPV(string, len); + RETVAL = Perl_grok_bin(aTHX_ pv, &len, &flags, NULL); + OUTPUT: + RETVAL + +UV +Perl_grok_hex(string) + SV *string + PREINIT: + char *pv; + I32 flags; + STRLEN len; + CODE: + pv = SvPV(string, len); + RETVAL = Perl_grok_hex(aTHX_ pv, &len, &flags, NULL); + OUTPUT: + RETVAL + +UV +Perl_grok_oct(string) + SV *string + PREINIT: + char *pv; + I32 flags; + STRLEN len; + CODE: + pv = SvPV(string, len); + RETVAL = Perl_grok_oct(aTHX_ pv, &len, &flags, NULL); + OUTPUT: + RETVAL + +=tests plan => 10 + +ok(&Devel::PPPort::grok_number("42"), 42); +ok(!defined(&Devel::PPPort::grok_number("A"))); +ok(&Devel::PPPort::grok_bin("10000001"), 129); +ok(&Devel::PPPort::grok_hex("deadbeef"), 0xdeadbeef); +ok(&Devel::PPPort::grok_oct("377"), 255); + +ok(&Devel::PPPort::Perl_grok_number("42"), 42); +ok(!defined(&Devel::PPPort::Perl_grok_number("A"))); +ok(&Devel::PPPort::Perl_grok_bin("10000001"), 129); +ok(&Devel::PPPort::Perl_grok_hex("deadbeef"), 0xdeadbeef); +ok(&Devel::PPPort::Perl_grok_oct("377"), 255); + |