summaryrefslogtreecommitdiff
path: root/inline.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-07-20 12:14:35 -0600
committerKarl Williamson <khw@cpan.org>2021-07-30 05:41:28 -0600
commit2e0bc9cea3bb80cb7c96411cf8e20977e0d9f8c5 (patch)
tree88e889f5bd32a827cb54fab45ddfbdd4221706d4 /inline.h
parent250e5324a59b505b79ee7b1dc924b0e8e5b51f95 (diff)
downloadperl-2e0bc9cea3bb80cb7c96411cf8e20977e0d9f8c5.tar.gz
Always use any fast available msb/lsb method
Some platforms have a fast way to get the msb but not the lsb; others, more rarely, have the reverse. But using a few shift and the like instructions allows us to reduce either instance to terms of the other. This commit causes any available fast method to be used by turning the non-available case into the available one
Diffstat (limited to 'inline.h')
-rw-r--r--inline.h49
1 files changed, 49 insertions, 0 deletions
diff --git a/inline.h b/inline.h
index eddb3710c3..11288dad78 100644
--- a/inline.h
+++ b/inline.h
@@ -755,10 +755,12 @@ Perl_lsbit_pos64(U64 word)
* this info, use that */
# if defined(PERL_CTZ_64)
+# define PERL_HAS_FAST_GET_LSB_POS64
return (unsigned) PERL_CTZ_64(word);
# elif U64SIZE == 8 && defined(_MSC_VER) && _MSC_VER >= 1400
+# define PERL_HAS_FAST_GET_LSB_POS64
{
unsigned long index;
@@ -807,10 +809,12 @@ Perl_lsbit_pos32(U32 word)
ASSUME(word != 0);
#if defined(PERL_CTZ_32)
+# define PERL_HAS_FAST_GET_LSB_POS32
return (unsigned) PERL_CTZ_32(word);
#elif U32SIZE == 4 && defined(_MSC_VER) && _MSC_VER >= 1400
+# define PERL_HAS_FAST_GET_LSB_POS32
{
unsigned long index;
@@ -844,10 +848,12 @@ Perl_msbit_pos64(U64 word)
* this, use that */
# if defined(PERL_CLZ_64)
+# define PERL_HAS_FAST_GET_MSB_POS64
return (unsigned) LZC_TO_MSBIT_POS_(U64, PERL_CLZ_64(word));
# elif U64SIZE == 8 && defined(_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1400
+# define PERL_HAS_FAST_GET_MSB_POS64
{
unsigned long index;
@@ -899,10 +905,12 @@ Perl_msbit_pos32(U32 word)
ASSUME(word != 0);
#if defined(PERL_CLZ_32)
+# define PERL_HAS_FAST_GET_MSB_POS32
return (unsigned) LZC_TO_MSBIT_POS_(U32, PERL_CLZ_32(word));
#elif U32SIZE == 4 && defined(_MSC_VER) && _MSC_VER >= 1400
+# define PERL_HAS_FAST_GET_MSB_POS32
{
unsigned long index;
@@ -938,11 +946,29 @@ Perl_single_1bit_pos64(U64 word)
ASSUME(word && (word & (word-1)) == 0);
# endif
+ /* The only set bit is both the most and least significant bit. If we have
+ * a fast way of finding either one, use that.
+ *
+ * It may appear at first glance that those functions call this one, but
+ * they don't if the corresponding #define is set */
+
+# ifdef PERL_HAS_FAST_GET_MSB_POS64
+
+ return msbit_pos64(word);
+
+# elif defined(PERL_HAS_FAST_GET_LSB_POS64)
+
+ return lsbit_pos64(word);
+
+# else
+
/* The position of the only set bit in a word can be quickly calculated
* using deBruijn sequences. See for example
* https://en.wikipedia.org/wiki/De_Bruijn_sequence */
return PL_deBruijn_bitpos_tab64[(word * PERL_deBruijnMagic64_)
>> PERL_deBruijnShift64_];
+# endif
+
}
#endif
@@ -958,9 +984,32 @@ Perl_single_1bit_pos32(U32 word)
#else
ASSUME(word && (word & (word-1)) == 0);
#endif
+#ifdef PERL_HAS_FAST_GET_MSB_POS32
+
+ return msbit_pos32(word);
+
+#elif defined(PERL_HAS_FAST_GET_LSB_POS32)
+
+ return lsbit_pos32(word);
+
+/* Unlikely, but possible for the platform to have a wider fast operation but
+ * not a narrower one. But easy enough to handle the case by widening the
+ * parameter size. (Going the other way, emulating 64 bit by two 32 bit ops
+ * would be slower than the deBruijn method.) */
+#elif defined(PERL_HAS_FAST_GET_MSB_POS64)
+
+ return msbit_pos64(word);
+
+#elif defined(PERL_HAS_FAST_GET_LSB_POS64)
+
+ return lsbit_pos64(word);
+
+#else
return PL_deBruijn_bitpos_tab32[(word * PERL_deBruijnMagic32_)
>> PERL_deBruijnShift32_];
+#endif
+
}
#ifndef EBCDIC