diff options
author | Ganesh Kathiresan <ganesh3597@gmail.com> | 2021-11-02 03:35:27 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-01 17:05:27 -0500 |
commit | fae6fa47a3cf9b9c64af2f5bd11a3b644b1763d2 (patch) | |
tree | 835df556a0beaa6822692428c9ca4e9d6ccb76e1 /numpy/core/src | |
parent | 987d25bebb3564eca4a86712ccdeba93016758ed (diff) | |
download | numpy-fae6fa47a3cf9b9c64af2f5bd11a3b644b1763d2.tar.gz |
ENH: Adding `scalar.bit_count()` (popcount) (#19355)
Adding bitcount method to scalars, e.g.:
a = np.int32(1023).bit_count()
* ENH: Implementation of bit_count (popcount)
* ENH: Add bit_count to integer scalar type
* ENH: Annotations for bit_count
* ENH, WIP: Documentation for bit_count
* DOC: Added `bit_count` (#19355)
* BUG: Fixed windows 32 bit issue with no `__popcnt64`
* DOC: Refined docstring for bit_count
* TST: Tests for bit_count
* ENH, MAINT: Changed return type to uint_8 | Removed extra braces and fixed typo
* BUG: Fixed syntax of bit_count
* DOC, BUG: Fixed bit_count example
* DOC, BUG: (#19355) Removed bit_count from routines.math.rst | Improved release notes
* BUG: Added type suffix to magic constants
* ENH: Handle 32 bit windows popcount | Refactored popcount implementation to new function
* MAINT: Refactor type_methods, separate integer definitions
* DOC: Added double-ticks
Diffstat (limited to 'numpy/core/src')
-rw-r--r-- | numpy/core/src/multiarray/scalartypes.c.src | 52 | ||||
-rw-r--r-- | numpy/core/src/npymath/npy_math_internal.h.src | 86 |
2 files changed, 136 insertions, 2 deletions
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 0d52211a8..bbbc5bfa2 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -219,6 +219,27 @@ gentype_multiply(PyObject *m1, PyObject *m2) } /**begin repeat + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #c = hh, uhh, h, uh,, u, l, ul, ll, ull# + * #Name = Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong# + * #convert = Long*8, LongLong*2# + */ +static PyObject * +@type@_bit_count(PyObject *self) +{ + @type@ scalar = PyArrayScalar_VAL(self, @Name@); + uint8_t count = npy_popcount@c@(scalar); + PyObject *result = PyLong_From@convert@(count); + + return result; +} +/**end repeat**/ + +/**begin repeat * * #name = positive, negative, absolute, invert, int, float# */ @@ -2316,8 +2337,7 @@ static PyMethodDef @name@type_methods[] = { /**end repeat**/ /**begin repeat - * #name = byte, short, int, long, longlong, ubyte, ushort, - * uint, ulong, ulonglong, timedelta, cdouble# + * #name = timedelta, cdouble# */ static PyMethodDef @name@type_methods[] = { /* for typing; requires python >= 3.9 */ @@ -2328,6 +2348,23 @@ static PyMethodDef @name@type_methods[] = { }; /**end repeat**/ +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong# + */ +static PyMethodDef @name@type_methods[] = { + /* for typing; requires python >= 3.9 */ + {"__class_getitem__", + (PyCFunction)numbertype_class_getitem, + METH_CLASS | METH_O, NULL}, + {"bit_count", + (PyCFunction)npy_@name@_bit_count, + METH_NOARGS, NULL}, + {NULL, NULL, 0, NULL} /* sentinel */ +}; +/**end repeat**/ + + /************* As_mapping functions for void array scalar ************/ static Py_ssize_t @@ -4105,6 +4142,17 @@ initialize_numeric_types(void) /**end repeat**/ /**begin repeat + * #name = byte, short, int, long, longlong, + * ubyte, ushort, uint, ulong, ulonglong# + * #Name = Byte, Short, Int, Long, LongLong, + * UByte, UShort, UInt, ULong, ULongLong# + */ + + Py@Name@ArrType_Type.tp_methods = @name@type_methods; + + /**end repeat**/ + + /**begin repeat * #name = half, float, double, longdouble# * #Name = Half, Float, Double, LongDouble# */ diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src index cae84befe..dd2424db8 100644 --- a/numpy/core/src/npymath/npy_math_internal.h.src +++ b/numpy/core/src/npymath/npy_math_internal.h.src @@ -55,6 +55,29 @@ */ #include "npy_math_private.h" +/* Magic binary numbers used by bit_count + * For type T, the magic numbers are computed as follows: + * Magic[0]: 01 01 01 01 01 01... = (T)~(T)0/3 + * Magic[1]: 0011 0011 0011... = (T)~(T)0/15 * 3 + * Magic[2]: 00001111 00001111... = (T)~(T)0/255 * 15 + * Magic[3]: 00000001 00000001... = (T)~(T)0/255 + * + * Counting bits set, in parallel + * Based on: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + * + * Generic Algorithm for type T: + * a = a - ((a >> 1) & (T)~(T)0/3); + * a = (a & (T)~(T)0/15*3) + ((a >> 2) & (T)~(T)0/15*3); + * a = (a + (a >> 4)) & (T)~(T)0/255*15; + * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT; +*/ + +static const npy_uint8 MAGIC8[] = {0x55u, 0x33u, 0x0Fu, 0x01u}; +static const npy_uint16 MAGIC16[] = {0x5555u, 0x3333u, 0x0F0Fu, 0x0101u}; +static const npy_uint32 MAGIC32[] = {0x55555555ul, 0x33333333ul, 0x0F0F0F0Ful, 0x01010101ul}; +static const npy_uint64 MAGIC64[] = {0x5555555555555555ull, 0x3333333333333333ull, 0x0F0F0F0F0F0F0F0Full, 0x0101010101010101ull}; + + /* ***************************************************************************** ** BASIC MATH FUNCTIONS ** @@ -814,3 +837,66 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) } /**end repeat1**/ /**end repeat**/ + + +#define __popcnt32 __popcnt +/**begin repeat + * + * #type = ubyte, ushort, uint, ulong, ulonglong# + * #STYPE = BYTE, SHORT, INT, LONG, LONGLONG# + * #c = hh, h, , l, ll# + */ +#undef TO_BITS_LEN +#if 0 +/**begin repeat1 + * #len = 8, 16, 32, 64# + */ +#elif NPY_BITSOF_@STYPE@ == @len@ + #define TO_BITS_LEN(X) X##@len@ +/**end repeat1**/ +#endif + + +NPY_INPLACE uint8_t +npy_popcount_parallel@c@(npy_@type@ a) +{ + a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]); + a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]); + a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2]; + return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT); +} + +NPY_INPLACE uint8_t +npy_popcountu@c@(npy_@type@ a) +{ +/* use built-in popcount if present, else use our implementation */ +#if (defined(__clang__) || defined(__GNUC__)) && NPY_BITSOF_@STYPE@ >= 32 + return __builtin_popcount@c@(a); +#elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16 + /* no builtin __popcnt64 for 32 bits */ + #if defined(_WIN64) || (defined(_WIN32) && NPY_BITSOF_@STYPE@ != 64) + return TO_BITS_LEN(__popcnt)(a); + /* split 64 bit number into two 32 bit ints and return sum of counts */ + #elif (defined(_WIN32) && NPY_BITSOF_@STYPE@ == 64) + npy_uint32 left = (npy_uint32) (a>>32); + npy_uint32 right = (npy_uint32) a; + return __popcnt32(left) + __popcnt32(right); + #endif +#else + return npy_popcount_parallel@c@(a); +#endif +} +/**end repeat**/ + +/**begin repeat + * + * #type = byte, short, int, long, longlong# + * #c = hh, h, , l, ll# + */ +NPY_INPLACE uint8_t +npy_popcount@c@(npy_@type@ a) +{ + /* Return popcount of abs(a) */ + return npy_popcountu@c@(a < 0 ? -a : a); +} +/**end repeat**/ |