summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-01-10 11:45:39 -0700
committerKarl Williamson <khw@cpan.org>2020-01-13 20:58:56 -0700
commitc969ff22d59d2aa1fa495b790f009037dc500787 (patch)
tree9dff45a770839d057665f4ef17facd4000bdf0a1
parent4b24f70353412ec749279fbcdb75b84ffd574c1c (diff)
downloadperl-c969ff22d59d2aa1fa495b790f009037dc500787.tar.gz
Improve performance of grok_bin_oct_hex()
This commit uses a variety of techniques for speeding this up. It is now faster than blead, and has less maintenance cost than before. Most of the checks that the current character isn't NUL are unnecssary. The logic works on that character, even if, for some reason, you can't trust the input length. A special test is added to not output the illegal character message if that character is a NUL. This is simply for backcompat. And a switch statement is used to unroll the loop for the leading digits in the number. This should handle most common cases. Beyond these, and one has to start worrying about overflow. So this version has removed that worrying from the common cases. Extra conditionals are avoided for large numbers by extracting the portability warning message code into a separate static function called from two different places. Simplifying this logic led me to see that if it overflowed, it must be non-portable, so another conditional could be removed. Other conditionals were removed at the expense of adding parameters to the function. This function isn't public, but is called from the grok_hex, et. al. macros. grok_hex knows, for example, that it is looking for an 'x' prefix and not a 'b'. Previously the code had a conditional to determine that. Similarly in pp.c, we look for the prefix. Having found it we can start the parse after the prefix, and tell this function not to look for it. Previously, this work was duplicated. The previous changes had left this function slower than blead. That is in part due to the fact that the loop doesn't go through that many iterations per function call, and the gcc compiler managed to optimize away the conditionals in XDIGIT_VALUE in the call of it from the loop. (The other call in this function did have the conditionals.) Thanks to Sergey Aleynikov for his help on this
-rw-r--r--embed.fnc7
-rw-r--r--embed.h5
-rw-r--r--numeric.c171
-rw-r--r--perl.h10
-rw-r--r--pp.c7
-rw-r--r--proto.h6
6 files changed, 154 insertions, 52 deletions
diff --git a/embed.fnc b/embed.fnc
index e383fffc8f..88c99939ca 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1168,7 +1168,12 @@ Cpd |UV |grok_bin_oct_hex|NN const char* start \
|NN STRLEN* len_p \
|NN I32* flags \
|NULLOK NV *result \
- |const unsigned shift
+ |const unsigned shift \
+ |const U8 lookup_bit \
+ |const char prefix
+#ifdef PERL_IN_NUMERIC_C
+S |void |output_non_portable|const U8 shift
+#endif
EXpdT |bool |grok_atoUV |NN const char* pv|NN UV* valptr|NULLOK const char** endptr
: These are all indirectly referenced by globals.c. This is somewhat annoying.
p |int |magic_clearenv |NN SV* sv|NN MAGIC* mg
diff --git a/embed.h b/embed.h
index 0c0a75b893..a2c0dc1d97 100644
--- a/embed.h
+++ b/embed.h
@@ -188,7 +188,7 @@
#define getcwd_sv(a) Perl_getcwd_sv(aTHX_ a)
#define gp_free(a) Perl_gp_free(aTHX_ a)
#define gp_ref(a) Perl_gp_ref(aTHX_ a)
-#define grok_bin_oct_hex(a,b,c,d,e) Perl_grok_bin_oct_hex(aTHX_ a,b,c,d,e)
+#define grok_bin_oct_hex(a,b,c,d,e,f,g) Perl_grok_bin_oct_hex(aTHX_ a,b,c,d,e,f,g)
#define grok_infnan(a,b) Perl_grok_infnan(aTHX_ a,b)
#define grok_number(a,b,c) Perl_grok_number(aTHX_ a,b,c)
#define grok_number_flags(a,b,c,d) Perl_grok_number_flags(aTHX_ a,b,c,d)
@@ -1688,6 +1688,9 @@
#define mro_gather_and_rename(a,b,c,d,e) S_mro_gather_and_rename(aTHX_ a,b,c,d,e)
#define mro_get_linear_isa_dfs(a,b) S_mro_get_linear_isa_dfs(aTHX_ a,b)
# endif
+# if defined(PERL_IN_NUMERIC_C)
+#define output_non_portable(a) S_output_non_portable(aTHX_ a)
+# endif
# if defined(PERL_IN_OP_C)
#define apply_attrs(a,b,c) S_apply_attrs(aTHX_ a,b,c)
#define apply_attrs_my(a,b,c,d) S_apply_attrs_my(aTHX_ a,b,c,d)
diff --git a/numeric.c b/numeric.c
index a9ccd7545f..0c3c48e174 100644
--- a/numeric.c
+++ b/numeric.c
@@ -331,33 +331,71 @@ Perl_grok_oct(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result)
return grok_oct(start, len_p, flags, result);
}
+STATIC void
+S_output_non_portable(pTHX_ const U8 base)
+{
+ /* Display the proper message for a number in the given input base not
+ * fitting in 32 bits */
+ const char * which = (base == 2)
+ ? "Binary number > 0b11111111111111111111111111111111"
+ : (base == 8)
+ ? "Octal number > 037777777777"
+ : "Hexadecimal number > 0xffffffff";
+
+ PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE;
+
+ /* Also there are listings for the other two. That's because, since they
+ * are the first word, it would be hard for a user to find them there
+ * starting with a %s */
+ /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
+ Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
+}
+
UV
Perl_grok_bin_oct_hex(pTHX_ const char *start,
STRLEN *len_p,
I32 *flags,
NV *result,
- const unsigned shift) /* 1 for binary; 3 for octal;
+ const unsigned shift, /* 1 for binary; 3 for octal;
4 for hex */
+ const U8 class_bit,
+ const char prefix
+ )
+
{
- const char *s = start;
+ const char *s0 = start;
+ const char *s;
STRLEN len = *len_p;
+ STRLEN bytes_so_far; /* How many real digits have been processed */
UV value = 0;
NV value_nv = 0;
- const PERL_UINT_FAST8_T base = 1 << shift;
- const UV max_div= UV_MAX / base;
- const PERL_UINT_FAST8_T class_bit = (base == 2)
- ? _CC_BINDIGIT
- : (base == 8)
- ? _CC_OCTDIGIT
- : _CC_XDIGIT;
+ const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */
+ const UV max_div= UV_MAX / base; /* Value above which, the next digit
+ processed would overflow */
const I32 input_flags = *flags;
const bool allow_underscores =
cBOOL(input_flags & PERL_SCAN_ALLOW_UNDERSCORES);
- bool already_output_overflow_warning = FALSE;
+ bool overflowed = FALSE;
/* In overflows, this keeps track of how much to multiply the overflowed NV
* by as we continue to parse the remaining digits */
- UV factor = 1;
+ UV factor;
+
+ /* This function unifies the core of grok_bin, grok_oct, and grok_hex. It
+ * is optimized for hex conversion. For example, it uses XDIGIT_VALUE to
+ * find the numeric value of a digit. That requires more instructions than
+ * OCTAL_VALUE would, but gives the same result for the narrowed range of
+ * octal digits; same for binary. If it were ever critical to squeeze more
+ * performance from this, the function could become grok_hex, and a regen
+ * perl script could scan it and write out two edited copies for the other
+ * two functions. That would improve the performance of all three
+ * somewhat. Besides eliminating XDIGIT_VALUE for the other two, extra
+ * parameters are now passed to this to avoid conditionals. Those could
+ * become declared consts, like:
+ * const U8 base = 16;
+ * const U8 base = 8;
+ * ...
+ */
PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX;
@@ -366,25 +404,78 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
/* Clear output flags; unlikely to find a problem that sets them */
*flags = 0;
- if (base != 8 && !(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
- const char prefix = base == 2 ? 'b' : 'x';
+ if (!(input_flags & PERL_SCAN_DISALLOW_PREFIX)) {
/* strip off leading b or 0b; x or 0x.
for compatibility silently suffer "b" and "0b" as valid binary; "x"
and "0x" as valid hex numbers. */
if (len >= 1) {
- if (isALPHA_FOLD_EQ(s[0], prefix)) {
- s++;
+ if (isALPHA_FOLD_EQ(s0[0], prefix)) {
+ s0++;
len--;
}
- else if (len >= 2 && s[0] == '0' && (isALPHA_FOLD_EQ(s[1], prefix))) {
- s+=2;
+ else if (len >= 2 && s0[0] == '0' && (isALPHA_FOLD_EQ(s0[1], prefix))) {
+ s0+=2;
len-=2;
}
}
}
- for (; len-- && *s; s++) {
+ s = s0; /* s0 potentially advanced from 'start' */
+
+ /* Unroll the loop so that the first 7 digits are branchless except for the
+ * switch. An eighth one could overflow a 32 bit word. This should
+ * completely handle the common case without needing extra checks */
+ switch (len) {
+ case 0:
+ return 0;
+ default:
+ if (! _generic_isCC(*s, class_bit)) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 6:
+ if (! _generic_isCC(*s, class_bit)) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 5:
+ if (! _generic_isCC(*s, class_bit)) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 4:
+ if (! _generic_isCC(*s, class_bit)) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 3:
+ if (! _generic_isCC(*s, class_bit)) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 2:
+ if (! _generic_isCC(*s, class_bit)) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+ s++;
+ /* FALLTHROUGH */
+ case 1:
+ if (! _generic_isCC(*s, class_bit)) break;
+ value = (value << shift) | XDIGIT_VALUE(*s);
+
+ if (LIKELY(len <= 7)) {
+ return value;
+ }
+
+ s++;
+ break;
+ }
+
+ bytes_so_far = s - s0;
+ factor = shift << bytes_so_far;
+ len -= bytes_so_far;
+
+ for (; len--; s++) {
if (_generic_isCC(*s, class_bit)) {
/* Write it in this wonky order with a goto to attempt to get the
compiler to make the common case integer-only loop pretty tight.
@@ -416,8 +507,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
value = XDIGIT_VALUE(*s);
factor = 1 << shift;
- if (! already_output_overflow_warning) {
- already_output_overflow_warning = TRUE;
+ if (! overflowed) {
+ overflowed = TRUE;
Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
"Integer overflow in %s number",
(base == 16) ? "hexadecimal"
@@ -438,14 +529,15 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
goto redo;
}
- if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
+ if ( *s
+ && ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
&& ckWARN(WARN_DIGIT))
{
if (base != 8) {
Perl_warner(aTHX_ packWARN(WARN_DIGIT),
"Illegal %s digit '%c' ignored",
- ((base == 2)
- ? "binary"
+ ((base == 2)
+ ? "binary"
: "hexadecimal"),
*s);
}
@@ -464,35 +556,24 @@ Perl_grok_bin_oct_hex(pTHX_ const char *start,
break;
}
- /* Calculate the final overflow approximation */
- if (value_nv != 0.0) {
- value_nv *= (NV) factor;
- value_nv += (NV) value;
- }
+ *len_p = s - start;
- if ( ( value_nv > 4294967295.0)
+ if (LIKELY(! overflowed)) {
#if UVSIZE > 4
- || ( value_nv == 0.0 && value > 0xffffffff
+ if ( UNLIKELY(value > 0xffffffff)
&& ! (input_flags & PERL_SCAN_SILENT_NON_PORTABLE))
+ {
+ output_non_portable(base);
+ }
#endif
- ) {
- const char * which = (base == 2)
- ? "Binary number > 0b11111111111111111111111111111111"
- : (base == 8)
- ? "Octal number > 037777777777"
- : "Hexadecimal number > 0xffffffff";
- /* Also there are listings for the other two. Since they are the first
- * word, it would be hard for a user to find them there starting with a
- * %s. */
- /* diag_listed_as: Hexadecimal number > 0xffffffff non-portable */
- Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), "%s non-portable", which);
+ return value;
}
- *len_p = s - start;
+ /* Overflowed: Calculate the final overflow approximation */
+ value_nv *= (NV) factor;
+ value_nv += (NV) value;
- if (value_nv == 0.0) { /* No overflow */
- return value;
- }
+ output_non_portable(base);
*flags = PERL_SCAN_GREATER_THAN_UV_MAX;
if (result)
diff --git a/perl.h b/perl.h
index 969dfc92f4..6b33b9d457 100644
--- a/perl.h
+++ b/perl.h
@@ -6890,9 +6890,13 @@ C<strtoul>.
# define Atoul(s) Strtoul(s, NULL, 10)
#endif
-#define grok_bin(s,lp,f,r) grok_bin_oct_hex(s, lp, f, r, 1)
-#define grok_oct(s,lp,f,r) grok_bin_oct_hex(s, lp, f, r, 3)
-#define grok_hex(s,lp,f,r) grok_bin_oct_hex(s, lp, f, r, 4)
+#define grok_bin(s,lp,fp,rp) \
+ grok_bin_oct_hex(s, lp, fp, rp, 1, _CC_BINDIGIT, 'b')
+#define grok_oct(s,lp,fp,rp) \
+ (*(fp) |= PERL_SCAN_DISALLOW_PREFIX, \
+ grok_bin_oct_hex(s, lp, fp, rp, 3, _CC_OCTDIGIT, '\0'))
+#define grok_hex(s,lp,fp,rp) \
+ grok_bin_oct_hex(s, lp, fp, rp, 4, _CC_XDIGIT, 'x')
#ifndef PERL_SCRIPT_MODE
#define PERL_SCRIPT_MODE "r"
diff --git a/pp.c b/pp.c
index 5cd32e1c8a..b86593eedb 100644
--- a/pp.c
+++ b/pp.c
@@ -3084,11 +3084,16 @@ PP(pp_oct)
if (*tmps == '0')
tmps++, len--;
if (isALPHA_FOLD_EQ(*tmps, 'x')) {
+ tmps++, len--;
+ flags |= PERL_SCAN_DISALLOW_PREFIX;
hex:
result_uv = grok_hex (tmps, &len, &flags, &result_nv);
}
- else if (isALPHA_FOLD_EQ(*tmps, 'b'))
+ else if (isALPHA_FOLD_EQ(*tmps, 'b')) {
+ tmps++, len--;
+ flags |= PERL_SCAN_DISALLOW_PREFIX;
result_uv = grok_bin (tmps, &len, &flags, &result_nv);
+ }
else
result_uv = grok_oct (tmps, &len, &flags, &result_nv);
diff --git a/proto.h b/proto.h
index 7c719b5e42..deed243a19 100644
--- a/proto.h
+++ b/proto.h
@@ -1148,7 +1148,7 @@ PERL_CALLCONV bool Perl_grok_atoUV(const char* pv, UV* valptr, const char** endp
PERL_CALLCONV UV Perl_grok_bin(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result);
#define PERL_ARGS_ASSERT_GROK_BIN \
assert(start); assert(len_p); assert(flags)
-PERL_CALLCONV UV Perl_grok_bin_oct_hex(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result, const unsigned shift);
+PERL_CALLCONV UV Perl_grok_bin_oct_hex(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result, const unsigned shift, const U8 lookup_bit, const char prefix);
#define PERL_ARGS_ASSERT_GROK_BIN_OCT_HEX \
assert(start); assert(len_p); assert(flags)
PERL_CALLCONV UV Perl_grok_hex(pTHX_ const char* start, STRLEN* len_p, I32* flags, NV *result);
@@ -5098,6 +5098,10 @@ STATIC AV* S_mro_get_linear_isa_dfs(pTHX_ HV* stash, U32 level);
#define PERL_ARGS_ASSERT_MRO_GET_LINEAR_ISA_DFS \
assert(stash)
#endif
+#if defined(PERL_IN_NUMERIC_C)
+STATIC void S_output_non_portable(pTHX_ const U8 shift);
+#define PERL_ARGS_ASSERT_OUTPUT_NON_PORTABLE
+#endif
#if defined(PERL_IN_OP_C)
STATIC void S_apply_attrs(pTHX_ HV *stash, SV *target, OP *attrs);
#define PERL_ARGS_ASSERT_APPLY_ATTRS \