ENH: Adding `scalar.bit_count()` (popcount) (#19355)

Adding bitcount method to scalars, e.g.: a = np.int32(1023).bit_count() * ENH: Implementation of bit_count (popcount) * ENH: Add bit_count to integer scalar type * ENH: Annotations for bit_count * ENH, WIP: Documentation for bit_count * DOC: Added `bit_count` (#19355) * BUG: Fixed windows 32 bit issue with no `__popcnt64` * DOC: Refined docstring for bit_count * TST: Tests for bit_count * ENH, MAINT: Changed return type to uint_8 | Removed extra braces and fixed typo * BUG: Fixed syntax of bit_count * DOC, BUG: Fixed bit_count example * DOC, BUG: (#19355) Removed bit_count from routines.math.rst | Improved release notes * BUG: Added type suffix to magic constants * ENH: Handle 32 bit windows popcount | Refactored popcount implementation to new function * MAINT: Refactor type_methods, separate integer definitions * DOC: Added double-ticks
author: Ganesh Kathiresan <ganesh3597@gmail.com> 2021-11-02 03:35:27 +0530
committer: GitHub <noreply@github.com> 2021-11-01 17:05:27 -0500
commit: fae6fa47a3cf9b9c64af2f5bd11a3b644b1763d2 (patch)
tree: 835df556a0beaa6822692428c9ca4e9d6ccb76e1 /numpy/core/src
parent: 987d25bebb3564eca4a86712ccdeba93016758ed (diff)
download: numpy-fae6fa47a3cf9b9c64af2f5bd11a3b644b1763d2.tar.gz
2 files changed, 136 insertions, 2 deletions
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 0d52211a8..bbbc5bfa2 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -219,6 +219,27 @@ gentype_multiply(PyObject *m1, PyObject *m2)
 }
 
 /**begin repeat
+ * #TYPE    = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ *            LONG, ULONG, LONGLONG, ULONGLONG#
+ * #type    = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
+ *            npy_long, npy_ulong, npy_longlong, npy_ulonglong#
+ * #c       = hh, uhh, h, uh,, u, l, ul, ll, ull#
+ * #Name    = Byte, UByte, Short, UShort, Int, UInt,
+ *            Long, ULong, LongLong, ULongLong#
+ * #convert = Long*8, LongLong*2#
+ */
+static PyObject *
+@type@_bit_count(PyObject *self)
+{
+    @type@ scalar = PyArrayScalar_VAL(self, @Name@);
+    uint8_t count = npy_popcount@c@(scalar);
+    PyObject *result = PyLong_From@convert@(count);
+
+    return result;
+}
+/**end repeat**/
+
+/**begin repeat
  *
  * #name = positive, negative, absolute, invert, int, float#
  */
@@ -2316,8 +2337,7 @@ static PyMethodDef @name@type_methods[] = {
 /**end repeat**/
 
 /**begin repeat
- * #name = byte, short, int, long, longlong, ubyte, ushort,
- *         uint, ulong, ulonglong, timedelta, cdouble#
+ * #name = timedelta, cdouble#
  */
 static PyMethodDef @name@type_methods[] = {
     /* for typing; requires python >= 3.9 */
@@ -2328,6 +2348,23 @@ static PyMethodDef @name@type_methods[] = {
 };
 /**end repeat**/
 
+/**begin repeat
+ * #name = byte, ubyte, short, ushort, int, uint,
+ *         long, ulong, longlong, ulonglong#
+ */
+static PyMethodDef @name@type_methods[] = {
+    /* for typing; requires python >= 3.9 */
+    {"__class_getitem__",
+        (PyCFunction)numbertype_class_getitem,
+        METH_CLASS | METH_O, NULL},
+    {"bit_count",
+        (PyCFunction)npy_@name@_bit_count,
+        METH_NOARGS, NULL},
+    {NULL, NULL, 0, NULL} /* sentinel */
+};
+/**end repeat**/
+
+
 /************* As_mapping functions for void array scalar ************/
 
 static Py_ssize_t
@@ -4105,6 +4142,17 @@ initialize_numeric_types(void)
     /**end repeat**/
 
     /**begin repeat
+     * #name = byte, short, int, long, longlong,
+     *         ubyte, ushort, uint, ulong, ulonglong#
+     * #Name = Byte, Short, Int, Long, LongLong,
+     *         UByte, UShort, UInt, ULong, ULongLong#
+     */
+
+    Py@Name@ArrType_Type.tp_methods = @name@type_methods;
+
+    /**end repeat**/
+
+    /**begin repeat
      * #name = half, float, double, longdouble#
      * #Name = Half, Float, Double, LongDouble#
      */
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index cae84befe..dd2424db8 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -55,6 +55,29 @@
  */
 #include "npy_math_private.h"
 
+/* Magic binary numbers used by bit_count
+ * For type T, the magic numbers are computed as follows:
+ * Magic[0]: 01 01 01 01 01 01... = (T)~(T)0/3
+ * Magic[1]: 0011 0011 0011...    = (T)~(T)0/15  * 3
+ * Magic[2]: 00001111 00001111... = (T)~(T)0/255 * 15
+ * Magic[3]: 00000001 00000001... = (T)~(T)0/255
+ *
+ * Counting bits set, in parallel
+ * Based on: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ *
+ * Generic Algorithm for type T:
+ * a = a - ((a >> 1) & (T)~(T)0/3);
+ * a = (a & (T)~(T)0/15*3) + ((a >> 2) & (T)~(T)0/15*3);
+ * a = (a + (a >> 4)) & (T)~(T)0/255*15;
+ * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT;
+*/
+
+static const npy_uint8  MAGIC8[]  = {0x55u,                 0x33u,                 0x0Fu,                 0x01u};
+static const npy_uint16 MAGIC16[] = {0x5555u,               0x3333u,               0x0F0Fu,               0x0101u};
+static const npy_uint32 MAGIC32[] = {0x55555555ul,          0x33333333ul,          0x0F0F0F0Ful,          0x01010101ul};
+static const npy_uint64 MAGIC64[] = {0x5555555555555555ull, 0x3333333333333333ull, 0x0F0F0F0F0F0F0F0Full, 0x0101010101010101ull};
+
+
 /*
  *****************************************************************************
  **                     BASIC MATH FUNCTIONS                                **
@@ -814,3 +837,66 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
 }
 /**end repeat1**/
 /**end repeat**/
+
+
+#define __popcnt32 __popcnt
+/**begin repeat
+ *
+ * #type  = ubyte, ushort, uint, ulong, ulonglong#
+ * #STYPE = BYTE,  SHORT,  INT,  LONG,  LONGLONG#
+ * #c     = hh,    h,      ,     l,     ll#
+ */
+#undef TO_BITS_LEN
+#if 0
+/**begin repeat1
+ * #len = 8, 16, 32, 64#
+ */
+#elif NPY_BITSOF_@STYPE@ == @len@
+    #define TO_BITS_LEN(X) X##@len@
+/**end repeat1**/
+#endif
+
+
+NPY_INPLACE uint8_t
+npy_popcount_parallel@c@(npy_@type@ a)
+{
+    a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]);
+    a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]);
+    a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2];
+    return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT);
+}
+
+NPY_INPLACE uint8_t
+npy_popcountu@c@(npy_@type@ a)
+{
+/* use built-in popcount if present, else use our implementation */
+#if (defined(__clang__) || defined(__GNUC__)) && NPY_BITSOF_@STYPE@ >= 32
+    return __builtin_popcount@c@(a);
+#elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16
+    /* no builtin __popcnt64 for 32 bits */
+    #if defined(_WIN64) || (defined(_WIN32) && NPY_BITSOF_@STYPE@ != 64)
+        return TO_BITS_LEN(__popcnt)(a);
+    /* split 64 bit number into two 32 bit ints and return sum of counts */
+    #elif (defined(_WIN32) && NPY_BITSOF_@STYPE@ == 64)
+        npy_uint32 left  = (npy_uint32) (a>>32);
+        npy_uint32 right = (npy_uint32) a;
+        return __popcnt32(left) + __popcnt32(right);
+    #endif
+#else
+    return npy_popcount_parallel@c@(a);
+#endif
+}
+/**end repeat**/
+
+/**begin repeat
+ *
+ * #type = byte, short, int, long, longlong#
+ * #c    = hh,   h,     ,    l,    ll#
+ */
+NPY_INPLACE uint8_t
+npy_popcount@c@(npy_@type@ a)
+{
+    /* Return popcount of abs(a) */
+    return npy_popcountu@c@(a < 0 ? -a : a);
+}
+/**end repeat**/
author	Ganesh Kathiresan <ganesh3597@gmail.com>	2021-11-02 03:35:27 +0530
committer	GitHub <noreply@github.com>	2021-11-01 17:05:27 -0500
commit	fae6fa47a3cf9b9c64af2f5bd11a3b644b1763d2 (patch)
tree	835df556a0beaa6822692428c9ca4e9d6ccb76e1 /numpy/core/src
parent	987d25bebb3564eca4a86712ccdeba93016758ed (diff)
download	numpy-fae6fa47a3cf9b9c64af2f5bd11a3b644b1763d2.tar.gz