diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2010-11-09 16:02:27 -0800 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2010-12-01 20:02:15 -0700 |
commit | 15c68128fea5618902c62c62436e2bad1eb865b7 (patch) | |
tree | 180eb6412b4d7dc54d89ba260d56fc4b4ebdd3e2 /numpy/core/src | |
parent | af84876fac13ac2e4e44ac0cae599fe9d6e68643 (diff) | |
download | numpy-15c68128fea5618902c62c62436e2bad1eb865b7.tar.gz |
ENH: core: Create half/float16 data type
Diffstat (limited to 'numpy/core/src')
-rw-r--r-- | numpy/core/src/multiarray/arraytypes.c.src | 395 | ||||
-rw-r--r-- | numpy/core/src/multiarray/conversion_utils.c | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/scalarapi.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/scalartypes.c.src | 85 | ||||
-rw-r--r-- | numpy/core/src/npymath/halffloat.c | 461 | ||||
-rw-r--r-- | numpy/core/src/scalarmathmodule.c.src | 248 |
7 files changed, 997 insertions, 198 deletions
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index fb48704ba..c9d517c50 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -12,6 +12,7 @@ #include "numpy/npy_3kcompat.h" #include "numpy/npy_math.h" +#include "numpy/halffloat.h" #include "common.h" #include "ctors.h" @@ -21,6 +22,7 @@ #include "numpyos.h" + /* ***************************************************************************** ** PYTHON TYPES TO C TYPES ** @@ -45,6 +47,18 @@ MyPyFloat_AsDouble(PyObject *obj) return ret; } +static npy_half +MyPyFloat_AsHalf(PyObject *obj) +{ + return npy_double_to_half(MyPyFloat_AsDouble(obj)); +} + +static PyObject * +MyPyFloat_FromHalf(npy_half h) +{ + return PyFloat_FromDouble(npy_half_to_double(h)); +} + /**begin repeat * #type = long, longlong# @@ -97,18 +111,18 @@ static char * _SEQUENCE_MESSAGE = "error setting an array element with a sequenc /**begin repeat * * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, LONG, UINT, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE# + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE# * #func1 = PyBool_FromLong, PyInt_FromLong*6, PyLong_FromUnsignedLong*2, * PyLong_FromLongLong, PyLong_FromUnsignedLongLong, - * PyFloat_FromDouble*2# + * MyPyFloat_FromHalf, PyFloat_FromDouble*2# * #func2 = PyObject_IsTrue, MyPyLong_AsLong*6, MyPyLong_AsUnsignedLong*2, * MyPyLong_AsLongLong, MyPyLong_AsUnsignedLongLong, - * MyPyFloat_AsDouble*2# + * MyPyFloat_AsHalf, MyPyFloat_AsDouble*2# * #type = Bool, byte, ubyte, short, ushort, int, long, uint, ulong, - * longlong, ulonglong, float, double# - * #type1 = long*7, ulong*2, longlong, ulonglong, float, double# + * longlong, ulonglong, npy_half, float, double# + * #type1 = long*7, ulong*2, longlong, ulonglong, npy_half, float, double# * #kind = Bool, Byte, UByte, Short, UShort, Int, Long, UInt, ULong, - * LongLong, ULongLong, Float, Double# + * LongLong, ULongLong, Half, Float, Double# */ static PyObject * @TYPE@_getitem(char *ip, PyArrayObject *ap) { @@ -1236,7 +1250,7 @@ TIMEDELTA_setitem(PyObject *op, char *ov, PyArrayObject *ap) { * #totype = byte, ubyte, short, ushort, int, uint, long, ulong, * longlong, ulonglong, float, double, longdouble, datetime, * timedelta# -*/ + */ /**begin repeat1 * @@ -1278,6 +1292,97 @@ static void /**begin repeat * + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, + * LONGLONG, ULONGLONG, LONGDOUBLE, DATETIME, + * TIMEDELTA# + * #type = byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, longdouble, datetime, + * timedelta# + */ + +static void +@TYPE@_to_HALF(@type@ *ip, npy_half *op, intp n, + PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) +{ + while (n--) { + *op++ = npy_float_to_half((float)(*ip++)); + } +} + +static void +HALF_to_@TYPE@(npy_half *ip, @type@ *op, intp n, + PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) +{ + while (n--) { + *op++ = (@type@)npy_half_to_float(*ip++); + } +} + +/**end repeat**/ +#if SIZEOF_SHORT == 2 +#define HALF_to_HALF SHORT_to_SHORT +#elif SIZEOF_INT == 2 +#define HALF_to_HALF INT_to_INT +#endif + +/**begin repeat + * + * #TYPE = FLOAT, DOUBLE, CFLOAT, CDOUBLE# + * #type = float, double, float, double# + * #itype = npy_uint32, npy_uint64, npy_uint32, npy_uint64# + * #iscomplex = 0, 0, 1, 1# + */ + +static void +@TYPE@_to_HALF(@itype@ *ip, npy_half *op, intp n, + PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) +{ + while (n--) { + *op++ = npy_@type@bits_to_halfbits(*ip); +#if @iscomplex@ + ip += 2; +#else + ip++; +#endif + } +} + +static void +HALF_to_@TYPE@(npy_half *ip, @itype@ *op, intp n, + PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) +{ + while (n--) { + *op++ = npy_halfbits_to_@type@bits(*ip++); +#if @iscomplex@ + *op++ = 0; +#endif + } +} + +/**end repeat**/ + +static void +CLONGDOUBLE_to_HALF(longdouble *ip, npy_half *op, intp n, + PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) +{ + while (n--) { + *op++ = npy_double_to_half((double) (*ip++)); + ip += 2; + } +} + +static void +HALF_to_CLONGDOUBLE(npy_half *ip, longdouble *op, intp n, + PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) +{ + while (n--) { + *op++ = npy_half_to_double(*ip++); + *op++ = 0; + } +} + +/**begin repeat + * * #FROMTYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, DATETIME, * TIMEDELTA# @@ -1295,6 +1400,15 @@ static void } /**end repeat**/ +static void +HALF_to_BOOL(npy_half *ip, Bool *op, intp n, + PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) +{ + while (n--) { + *op++ = (Bool)(!npy_half_iszero(*ip++)); + } +} + /**begin repeat * * #FROMTYPE = CFLOAT, CDOUBLE, CLONGDOUBLE# @@ -1314,18 +1428,20 @@ static void /**begin repeat * #TOTYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, DATETIME, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, DATETIME, * TIMEDELTA# * #totype = byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, datetime, + * longlong, ulonglong, npy_half, float, double, longdouble, datetime, * timedelta# -*/ + * #one = 1*10, NPY_HALF_ONE, 1*5# + * #zero = 0*10, NPY_HALF_ZERO, 0*5# + */ static void BOOL_to_@TOTYPE@(Bool *ip, @totype@ *op, intp n, PyArrayObject *NPY_UNUSED(aip), PyArrayObject *NPY_UNUSED(aop)) { while (n--) { - *op++ = (@totype@)(*ip++ != FALSE); + *op++ = (@totype@)((*ip++ != FALSE) ? @one@ : @zero@); } } /**end repeat**/ @@ -1383,14 +1499,14 @@ static void /**begin repeat * * #FROMTYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, STRING, UNICODE, VOID, OBJECT, * DATETIME, TIMEDELTA# * #fromtype = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * cfloat, cdouble, clongdouble, char, char, char, PyObject *, * datetime, timedelta# - * #skip = 1*17, aip->descr->elsize*3, 1*3# + * #skip = 1*18, aip->descr->elsize*3, 1*3# */ static void @FROMTYPE@_to_OBJECT(@fromtype@ *ip, PyObject **op, intp n, PyArrayObject *aip, @@ -1431,14 +1547,14 @@ static void /**begin repeat * * #TOTYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, STRING, UNICODE, VOID, DATETIME, * TIMEDELTA# * #totype = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * cfloat, cdouble, clongdouble, char, char, char, datetime, * timedelta# - * #skip = 1*17, aop->descr->elsize*3, 1*2# + * #skip = 1*18, aop->descr->elsize*3, 1*2# */ static void OBJECT_to_@TOTYPE@(PyObject **ip, @totype@ *op, intp n, @@ -1461,13 +1577,13 @@ OBJECT_to_@TOTYPE@(PyObject **ip, @totype@ *op, intp n, /**begin repeat * - * #from = STRING*22, UNICODE*22, VOID*22# - * #fromtyp = char*66# - * #to = (BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE, STRING, UNICODE, VOID, DATETIME, TIMEDELTA)*3# - * #totyp = (Bool, byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble, char, char, char, datetime, timedelta)*3# - * #oskip = (1*17,aop->descr->elsize*3,1*2)*3# - * #convert = 1*17, 0*3, 1*2, 1*17, 0*3, 1*2, 0*22# - * #convstr = (Int*9, Long*2, Float*3, Complex*3, Tuple*3, Long*2)*3# + * #from = STRING*23, UNICODE*23, VOID*23# + * #fromtyp = char*69# + * #to = (BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE, STRING, UNICODE, VOID, DATETIME, TIMEDELTA)*3# + * #totyp = (Bool, byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong, npy_half, float, double, longdouble, cfloat, cdouble, clongdouble, char, char, char, datetime, timedelta)*3# + * #oskip = (1*18,aop->descr->elsize*3,1*2)*3# + * #convert = 1*18, 0*3, 1*2, 1*18, 0*3, 1*2, 0*23# + * #convstr = (Int*9, Long*2, Float*4, Complex*3, Tuple*3, Long*2)*3# */ static void @from@_to_@to@(@fromtyp@ *ip, @totyp@ *op, intp n, PyArrayObject *aip, @@ -1484,23 +1600,23 @@ static void return; } /* convert from Python object to needed one */ - if (@convert@) { - PyObject *new, *args; - /* call out to the Python builtin given by convstr */ - args = Py_BuildValue("(N)", temp); +#if @convert@ + PyObject *new, *args; + /* call out to the Python builtin given by convstr */ + args = Py_BuildValue("(N)", temp); #if defined(NPY_PY3K) #define PyInt_Type PyLong_Type #endif - new = Py@convstr@_Type.tp_new(&Py@convstr@_Type, args, NULL); + new = Py@convstr@_Type.tp_new(&Py@convstr@_Type, args, NULL); #if defined(NPY_PY3K) #undef PyInt_Type #endif - Py_DECREF(args); - temp = new; - if (temp == NULL) { - return; - } + Py_DECREF(args); + temp = new; + if (temp == NULL) { + return; } +#endif /* @convert@ */ if (@to@_setitem(temp,(char *)op, aop)) { Py_DECREF(temp); return; @@ -1513,13 +1629,13 @@ static void /**begin repeat * - * #to = STRING*19, UNICODE*19, VOID*19# - * #totyp = char*19, char*19, char*19# + * #to = STRING*20, UNICODE*20, VOID*20# + * #totyp = char*20, char*20, char*20# * #from = (BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA)*3# * #fromtyp = (Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * cfloat, cdouble, clongdouble, datetime, timedelta)*3# */ static void @@ -1587,6 +1703,17 @@ static int } /**end repeat**/ +static int +HALF_scan(FILE *fp, npy_half *ip, void *NPY_UNUSED(ignore), PyArray_Descr *NPY_UNUSED(ignored)) +{ + double result; + int ret; + + ret = NumPyOS_ascii_ftolf(fp, &result); + *ip = npy_double_to_half(result); + return ret; +} + /**begin repeat * #fname = BYTE, UBYTE# * #type = byte, ubyte# @@ -1666,6 +1793,15 @@ static int } /**end repeat**/ +static int +HALF_fromstr(char *str, npy_half *ip, char **endptr, PyArray_Descr *NPY_UNUSED(ignore)) +{ + double result; + + result = NumPyOS_ascii_strtod(str, endptr); + *ip = npy_double_to_half(result); + return 0; +} /**begin repeat @@ -1684,11 +1820,11 @@ static int /**begin repeat * - * #fname = SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, FLOAT, + * #fname = SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, HALF, FLOAT, * DOUBLE, LONGDOUBLE, DATETIME, TIMEDELTA# - * #fsize = SHORT, SHORT, INT, INT, LONG, LONG, LONGLONG, LONGLONG, FLOAT, + * #fsize = SHORT, SHORT, INT, INT, LONG, LONG, LONGLONG, LONGLONG, HALF, FLOAT, * DOUBLE, LONGDOUBLE, DATETIME, TIMEDELTA# - * #type = short, ushort, int, uint, long, ulong, longlong, ulonglong, float, + * #type = short, ushort, int, uint, long, ulong, longlong, ulonglong, npy_half, float, * double, longdouble, datetime, timedelta# */ static void @@ -2223,22 +2359,25 @@ UNICODE_copyswap (char *dst, char *src, int swap, PyArrayObject *arr) ***************************************************************************** */ +#define _NONZERO(a) ((a) != 0) + /**begin repeat * * #fname = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * DATETIME, TIMEDELTA# * #type = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * datetime, timedelta# - * #isfloat = 0*11, 1*3, 0*2# + * #isfloat = 0*11, 1*4, 0*2# + * #nonzero = _NONZERO*11, !npy_half_iszero, _NONZERO*5# */ static Bool @fname@_nonzero (char *ip, PyArrayObject *ap) { if (ap == NULL || PyArray_ISBEHAVED_RO(ap)) { @type@ *ptmp = (@type@ *)ip; - return (Bool) (*ptmp != 0); + return (Bool) @nonzero@(*ptmp); } else { /* @@ -2252,7 +2391,7 @@ static Bool #else memcpy(&tmp, ip, sizeof(@type@)); #endif - return (Bool) (tmp != 0); + return (Bool) @nonzero@(tmp); } } /**end repeat**/ @@ -2562,6 +2701,31 @@ C@TYPE@_compare(@type@ *pa, @type@ *pb) /**end repeat**/ +static int +HALF_compare (npy_half *pa, npy_half *pb, PyArrayObject *NPY_UNUSED(ap)) +{ + npy_half a = *pa, b = *pb; + Bool a_isnan, b_isnan; + int ret; + + a_isnan = npy_half_isnan(a); + b_isnan = npy_half_isnan(b); + + if (a_isnan) { + ret = b_isnan ? 0 : -1; + } else if (b_isnan) { + ret = 1; + } else if(npy_half_lt_nonan(a, b)) { + ret = -1; + } else if(npy_half_lt_nonan(b, a)) { + ret = 1; + } else { + ret = 0; + } + + return ret; +} + /* object type */ @@ -2728,18 +2892,21 @@ finish: ***************************************************************************** */ +#define _LESS_THAN_OR_EQUAL(a,b) ((a) <= (b)) /**begin repeat * * #fname = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# * #type = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * float, double, longdouble, datetime, timedelta# - * #isfloat = 0*11, 1*6, 0*2# - * #iscomplex = 0*14, 1*3, 0*2# - * #incr = ip++*14, ip+=2*3, ip++*2# + * #isfloat = 0*11, 1*7, 0*2# + * #isnan = nop*11, npy_half_isnan, npy_isnan*6, nop*2# + * #le = _LESS_THAN_OR_EQUAL*11, npy_half_le, _LESS_THAN_OR_EQUAL*8# + * #iscomplex = 0*15, 1*3, 0*2# + * #incr = ip++*15, ip+=2*3, ip++*2# */ static int @fname@_argmax(@type@ *ip, intp n, intp *max_ind, PyArrayObject *NPY_UNUSED(aip)) @@ -2753,13 +2920,13 @@ static int *max_ind = 0; #if @isfloat@ - if (npy_isnan(mp)) { + if (@isnan@(mp)) { /* nan encountered; it's maximal */ return 0; } #endif #if @iscomplex@ - if (npy_isnan(mp_im)) { + if (@isnan@(mp_im)) { /* nan encountered; it's maximal */ return 0; } @@ -2773,21 +2940,21 @@ static int #if @iscomplex@ /* Lexical order for complex numbers */ if ((ip[0] > mp) || ((ip[0] == mp) && (ip[1] > mp_im)) - || npy_isnan(ip[0]) || npy_isnan(ip[1])) { + || @isnan@(ip[0]) || @isnan@(ip[1])) { mp = ip[0]; mp_im = ip[1]; *max_ind = i; - if (npy_isnan(mp) || npy_isnan(mp_im)) { + if (@isnan@(mp) || @isnan@(mp_im)) { /* nan encountered, it's maximal */ break; } } #else - if (!(*ip <= mp)) { /* negated, for correct nan handling */ + if (!@le@(*ip, mp)) { /* negated, for correct nan handling */ mp = *ip; *max_ind = i; #if @isfloat@ - if (npy_isnan(mp)) { + if (@isnan@(mp)) { /* nan encountered, it's maximal */ break; } @@ -2800,6 +2967,8 @@ static int /**end repeat**/ +#undef _LESS_THAN_OR_EQUAL + static int OBJECT_argmax(PyObject **ip, intp n, intp *max_ind, PyArrayObject *NPY_UNUSED(aip)) { @@ -2910,6 +3079,19 @@ static void } /**end repeat**/ +static void +HALF_dot(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, + void *NPY_UNUSED(ignore)) +{ + float tmp = 0.0f; + npy_intp i; + + for (i = 0; i < n; i++, ip1 += is1, ip2 += is2) { + tmp += npy_half_to_float(*((npy_half *)ip1)) * + npy_half_to_float(*((npy_half *)ip2)); + } + *((npy_half *)op) = npy_float_to_half(tmp); +} /**begin repeat * @@ -3041,6 +3223,19 @@ static void } /**end repeat**/ +static void +HALF_fill(npy_half *buffer, npy_intp length, void *NPY_UNUSED(ignored)) +{ + npy_intp i; + float start = npy_half_to_float(buffer[0]); + float delta = npy_half_to_float(buffer[1]); + + delta -= start; + for (i = 2; i < length; ++i) { + buffer[i] = npy_float_to_half(start + i*delta); + } +} + /**begin repeat * * #NAME = CFLOAT, CDOUBLE, CLONGDOUBLE# @@ -3095,10 +3290,10 @@ static void /**begin repeat * * #NAME = SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, ULONGLONG, - * FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE, + * HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE, * DATETIME, TIMEDELTA# * #typ = short, ushort, int, uint, long, ulong, longlong, ulonglong, - * float, double, longdouble, cfloat, cdouble, clongdouble, + * npy_half, float, double, longdouble, cfloat, cdouble, clongdouble, * datetime, timedelta# */ static void @@ -3120,15 +3315,27 @@ static void ***************************************************************************** */ +#define _LESS_THAN(a, b) ((a) < (b)) +#define _GREATER_THAN(a, b) ((a) > (b)) +/* + * In fastclip, 'b' was already checked for NaN, so the half comparison + * only needs to check 'a' for NaN. + */ +#define _HALF_LESS_THAN(a, b) (!npy_half_isnan(a) && npy_half_lt_nonan(a, b)) +#define _HALF_GREATER_THAN(a, b) (!npy_half_isnan(a) && npy_half_lt_nonan(b, a)) /**begin repeat * * #name = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * DATETIME, TIMEDELTA# * #type = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * datetime, timedelta# + * #isfloat = 0*11, 1*4, 0*2# + * #isnan = nop*11, npy_half_isnan, npy_isnan*3, nop*2# + * #lt = _LESS_THAN*11, _HALF_LESS_THAN, _LESS_THAN*5# + * #gt = _GREATER_THAN*11, _HALF_GREATER_THAN, _GREATER_THAN*5# */ static void @name@_fastclip(@type@ *in, intp ni, @type@ *min, @type@ *max, @type@ *out) @@ -3138,30 +3345,47 @@ static void if (max != NULL) { max_val = *max; +#if @isfloat@ + /* NaNs result in no clipping, so optimize the case away */ + if (@isnan@(max_val)) { + if (min == NULL) { + return; + } + max = NULL; + } +#endif } if (min != NULL) { min_val = *min; +#if @isfloat@ + if (@isnan@(min_val)) { + if (max == NULL) { + return; + } + min = NULL; + } +#endif } if (max == NULL) { for (i = 0; i < ni; i++) { - if (in[i] < min_val) { + if (@lt@(in[i], min_val)) { out[i] = min_val; } } } else if (min == NULL) { for (i = 0; i < ni; i++) { - if (in[i] > max_val) { + if (@gt@(in[i], max_val)) { out[i] = max_val; } } } else { for (i = 0; i < ni; i++) { - if (in[i] < min_val) { + if (@lt@(in[i], min_val)) { out[i] = min_val; } - else if (in[i] > max_val) { + else if (@gt@(in[i], max_val)) { out[i] = max_val; } } @@ -3169,6 +3393,11 @@ static void } /**end repeat**/ +#undef _LESS_THAN +#undef _GREATER_THAN +#undef _HALF_LESS_THAN +#undef _HALF_GREATER_THAN + /**begin repeat * * #name = CFLOAT, CDOUBLE, CLONGDOUBLE# @@ -3229,10 +3458,10 @@ static void /**begin repeat * * #name = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# * #type = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * cfloat, cdouble, clongdouble, datetime, timedelta# */ static void @@ -3273,10 +3502,10 @@ static void /**begin repeat * * #name = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA# * #type = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * cfloat, cdouble, clongdouble, datetime, timedelta# */ static int @@ -3431,6 +3660,7 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { (PyArray_VectorUnaryFunc*)@from@_to_ULONG, (PyArray_VectorUnaryFunc*)@from@_to_LONGLONG, (PyArray_VectorUnaryFunc*)@from@_to_ULONGLONG, + (PyArray_VectorUnaryFunc*)@from@_to_HALF, (PyArray_VectorUnaryFunc*)@from@_to_FLOAT, (PyArray_VectorUnaryFunc*)@from@_to_DOUBLE, (PyArray_VectorUnaryFunc*)@from@_to_LONGDOUBLE, @@ -3469,24 +3699,23 @@ static PyArray_Descr @from@_Descr = { /**end repeat**/ - /**begin repeat * * #from = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, OBJECT, DATETIME, TIMEDELTA# - * #num = 1*14, 2*3, 1*3# + * #num = 1*15, 2*3, 1*3# * #fromtyp = Bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, longdouble, + * longlong, ulonglong, npy_half, float, double, longdouble, * float, double, longdouble, PyObject *, datetime, timedelta# * #NAME = Bool, Byte, UByte, Short, UShort, Int, UInt, Long, ULong, - * LongLong, ULongLong, Float, Double, LongDouble, + * LongLong, ULongLong, Half, Float, Double, LongDouble, * CFloat, CDouble, CLongDouble, Object, Datetime, Timedelta# * #kind = GENBOOL, SIGNED, UNSIGNED, SIGNED, UNSIGNED, SIGNED, UNSIGNED, SIGNED, UNSIGNED, - * SIGNED, UNSIGNED, FLOATING, FLOATING, FLOATING, + * SIGNED, UNSIGNED, FLOATING, FLOATING, FLOATING, FLOATING, * COMPLEX, COMPLEX, COMPLEX, OBJECT, DATETIME, TIMEDELTA# - * #endian = |*3, =*14, |, =*2# - * #isobject= 0*17,NPY_OBJECT_DTYPE_FLAGS,0*2# + * #endian = |*3, =*15, |, =*2# + * #isobject= 0*18,NPY_OBJECT_DTYPE_FLAGS,0*2# */ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { (PyArray_GetItemFunc*)@from@_getitem, @@ -3527,6 +3756,7 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = { (PyArray_VectorUnaryFunc*)@from@_to_ULONG, (PyArray_VectorUnaryFunc*)@from@_to_LONGLONG, (PyArray_VectorUnaryFunc*)@from@_to_ULONGLONG, + (PyArray_VectorUnaryFunc*)@from@_to_HALF, (PyArray_VectorUnaryFunc*)@from@_to_FLOAT, (PyArray_VectorUnaryFunc*)@from@_to_DOUBLE, (PyArray_VectorUnaryFunc*)@from@_to_LONGDOUBLE, @@ -3603,6 +3833,7 @@ static PyArray_Descr *_builtin_descrs[] = { &ULONG_Descr, &LONGLONG_Descr, &ULONGLONG_Descr, + &HALF_Descr, &FLOAT_Descr, &DOUBLE_Descr, &LONGDOUBLE_Descr, @@ -3699,7 +3930,7 @@ set_typeinfo(PyObject *dict) /**begin repeat * * #name = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, INTP, UINTP, - * LONG, ULONG, LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONG, ULONG, LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, OBJECT, STRING, UNICODE, VOID, * DATETIME,TIMEDELTA# */ @@ -3709,7 +3940,7 @@ set_typeinfo(PyObject *dict) /**begin repeat * #name = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, - * LONG, ULONG, LONGLONG, ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, + * LONG, ULONG, LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, * CFLOAT, CDOUBLE, CLONGDOUBLE, OBJECT, STRING, UNICODE, VOID, * DATETIME, TIMEDELTA# */ @@ -3766,9 +3997,9 @@ set_typeinfo(PyObject *dict) /**begin repeat * - * #type = float, double, longdouble, cfloat, cdouble, clongdouble# - * #name = FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE# - * #Name = Float, Double, LongDouble, CFloat, CDouble, CLongDouble# + * #type = npy_half, float, double, longdouble, cfloat, cdouble, clongdouble# + * #name = HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE# + * #Name = Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# */ PyDict_SetItemString(infodict, "@name@", #if defined(NPY_PY3K) diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c index abc254058..4ad2e9f51 100644 --- a/numpy/core/src/multiarray/conversion_utils.c +++ b/numpy/core/src/multiarray/conversion_utils.c @@ -687,6 +687,9 @@ PyArray_TypestrConvert(int itemsize, int gentype) } else if (gentype == PyArray_FLOATINGLTR) { switch(itemsize) { + case 2: + newtype = PyArray_FLOAT16; + break; case 4: newtype = PyArray_FLOAT32; break; diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index f35bfd662..d4dba719c 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -2946,6 +2946,7 @@ setup_scalartypes(PyObject *NPY_UNUSED(dict)) SINGLE_INHERIT(ULong, UnsignedInteger); SINGLE_INHERIT(ULongLong, UnsignedInteger); + SINGLE_INHERIT(Half, Floating); SINGLE_INHERIT(Float, Floating); DUAL_INHERIT(Double, Float, Floating); SINGLE_INHERIT(LongDouble, Floating); diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index f549ad35c..87e140c4e 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -57,6 +57,7 @@ scalar_value(PyObject *scalar, PyArray_Descr *descr) CASE(ULONG, ULong); CASE(LONGLONG, LongLong); CASE(ULONGLONG, ULongLong); + CASE(HALF, Half); CASE(FLOAT, Float); CASE(DOUBLE, Double); CASE(LONGDOUBLE, LongDouble); @@ -110,6 +111,7 @@ scalar_value(PyObject *scalar, PyArray_Descr *descr) else { /* Inexact */ if _CHK(Floating) { + _IFCASE(Half); _IFCASE(Float); _IFCASE(Double); _IFCASE(LongDouble); diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index b2e8690c7..0ac374fd5 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -9,6 +9,7 @@ #define NPY_NO_PREFIX #include "numpy/arrayobject.h" #include "numpy/npy_math.h" +#include "numpy/halffloat.h" #include "numpy/arrayscalars.h" #include "numpy/npy_3kcompat.h" @@ -478,6 +479,12 @@ format_c@name@(char *buf, size_t buflen, c@name@ val, unsigned int prec) /**end repeat**/ +NPY_NO_EXPORT void +format_half(char *buf, size_t buflen, npy_half val, unsigned int prec) +{ + format_float(buf, buflen, npy_half_to_float(val), prec); +} + /* * over-ride repr and str of array-scalar strings and unicode to * remove NULL bytes and then call the corresponding functions @@ -516,7 +523,9 @@ static PyObject * } /**end repeat**/ -/* These values are finfo.precision + 2 */ +/* The REPR values are finfo.precision + 2 */ +#define HALFPREC_REPR 5 +#define HALFPREC_STR 5 #define FLOATPREC_REPR 8 #define FLOATPREC_STR 6 #define DOUBLEPREC_REPR 17 @@ -536,9 +545,10 @@ static PyObject * */ /**begin repeat - * #name = float, double, longdouble# - * #Name = Float, Double, LongDouble# - * #NAME = FLOAT, DOUBLE, LONGDOUBLE# + * #name = half, float, double, longdouble# + * #Name = Half, Float, Double, LongDouble# + * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE# + * #hascomplex = 0, 1, 1, 1# */ /**begin repeat1 * #kind = str, repr# @@ -551,12 +561,13 @@ static PyObject * @name@type_@kind@(PyObject *self) { char buf[100]; - @name@ val = ((Py@Name@ScalarObject *)self)->obval; + npy_@name@ val = ((Py@Name@ScalarObject *)self)->obval; format_@name@(buf, sizeof(buf), val, PREC); return PyUString_FromString(buf); } +#if @hascomplex@ static PyObject * c@name@type_@kind@(PyObject *self) { @@ -566,6 +577,7 @@ c@name@type_@kind@(PyObject *self) format_c@name@(buf, sizeof(buf), val, PREC); return PyUString_FromString(buf); } +#endif #undef PREC @@ -576,16 +588,17 @@ c@name@type_@kind@(PyObject *self) * float type print (control print a, where a is a float type instance) */ /**begin repeat - * #name = float, double, longdouble# - * #Name = Float, Double, LongDouble# - * #NAME = FLOAT, DOUBLE, LONGDOUBLE# + * #name = half, float, double, longdouble# + * #Name = Half, Float, Double, LongDouble# + * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE# + * #hascomplex = 0, 1, 1, 1# */ static int @name@type_print(PyObject *v, FILE *fp, int flags) { char buf[100]; - @name@ val = ((Py@Name@ScalarObject *)v)->obval; + npy_@name@ val = ((Py@Name@ScalarObject *)v)->obval; format_@name@(buf, sizeof(buf), val, (flags & Py_PRINT_RAW) ? @NAME@PREC_STR : @NAME@PREC_REPR); @@ -595,6 +608,7 @@ static int return 0; } +#if @hascomplex@ static int c@name@type_print(PyObject *v, FILE *fp, int flags) { @@ -609,6 +623,7 @@ c@name@type_print(PyObject *v, FILE *fp, int flags) Py_END_ALLOW_THREADS return 0; } +#endif /**end repeat**/ @@ -2117,13 +2132,13 @@ object_arrtype_dealloc(PyObject *v) /**begin repeat * #name = byte, short, int, long, longlong, ubyte, ushort, uint, ulong, - * ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble, - * string, unicode, object, datetime, timedelta# + * ulonglong, half, float, double, longdouble, cfloat, cdouble, + * clongdouble, string, unicode, object, datetime, timedelta# * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG, UBYTE, USHORT, UINT, ULONG, - * ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE, - * STRING, UNICODE, OBJECT, DATETIME, TIMEDELTA# - * #work = 0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,z,z,0,0,0# - * #default = 0*16,1*2,2,0*2# + * ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, + * CLONGDOUBLE, STRING, UNICODE, OBJECT, DATETIME, TIMEDELTA# + * #work = 0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,z,z,0,0,0# + * #default = 0*17,1*2,2,0*2# */ #define _NPY_UNUSED2_1 @@ -2161,9 +2176,9 @@ static PyObject * */ if (obj == NULL) { #if @default@ == 0 - char *mem = malloc(sizeof(@name@)); + char *mem = malloc(sizeof(npy_@name@)); - memset(mem, 0, sizeof(@name@)); + memset(mem, 0, sizeof(npy_@name@)); robj = PyArray_Scalar(mem, typecode, NULL); free(mem); #elif @default@ == 1 @@ -2625,6 +2640,12 @@ c@lname@_arrtype_hash(PyObject *obj) /**end repeat**/ static long +half_arrtype_hash(PyObject *obj) +{ + return _Py_HashDouble(npy_half_to_double(((PyHalfScalarObject *)obj)->obval)); +} + +static long object_arrtype_hash(PyObject *obj) { return PyObject_Hash(((PyObjectScalarObject *)obj)->obval); @@ -3058,10 +3079,10 @@ NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = { /**begin repeat * #NAME = Byte, Short, Int, Long, LongLong, UByte, UShort, UInt, ULong, - * ULongLong, Float, Double, LongDouble, Datetime, Timedelta# - * #name = int*5, uint*5, float*3, datetime, timedelta# - * #CNAME = (CHAR, SHORT, INT, LONG, LONGLONG)*2, FLOAT, DOUBLE, LONGDOUBLE, - * DATETIME, TIMEDELTA# + * ULongLong, Half, Float, Double, LongDouble, Datetime, Timedelta# + * #name = int*5, uint*5, float*4, datetime, timedelta# + * #CNAME = (CHAR, SHORT, INT, LONG, LONGLONG)*2, HALF, FLOAT, DOUBLE, + * LONGDOUBLE, DATETIME, TIMEDELTA# */ #if BITSOF_@CNAME@ == 8 #define _THIS_SIZE "8" @@ -3499,11 +3520,13 @@ initialize_numeric_types(void) /**begin repeat * #name = bool, byte, short, int, long, longlong, ubyte, ushort, uint, - * ulong, ulonglong, float, double, longdouble, cfloat, cdouble, - * clongdouble, string, unicode, void, object, datetime, timedelta# + * ulong, ulonglong, half, float, double, longdouble, cfloat, + * cdouble, clongdouble, string, unicode, void, object, datetime, + * timedelta# * #NAME = Bool, Byte, Short, Int, Long, LongLong, UByte, UShort, UInt, - * ULong, ULongLong, Float, Double, LongDouble, CFloat, CDouble, - * CLongDouble, String, Unicode, Void, Object, Datetime, Timedelta# + * ULong, ULongLong, Half, Float, Double, LongDouble, CFloat, + * CDouble, CLongDouble, String, Unicode, Void, Object, Datetime, + * Timedelta# */ Py@NAME@ArrType_Type.tp_flags = BASEFLAGS; Py@NAME@ArrType_Type.tp_new = @name@_arrtype_new; @@ -3512,11 +3535,11 @@ initialize_numeric_types(void) /**begin repeat * #name = bool, byte, short, ubyte, ushort, uint, ulong, ulonglong, - * float, longdouble, cfloat, clongdouble, void, object, datetime, - * timedelta# + * half, float, longdouble, cfloat, clongdouble, void, object, + * datetime, timedelta# * #NAME = Bool, Byte, Short, UByte, UShort, UInt, ULong, ULongLong, - * Float, LongDouble, CFloat, CLongDouble, Void, Object, Datetime, - * Timedelta# + * Half, Float, LongDouble, CFloat, CLongDouble, Void, Object, + * Datetime, Timedelta# */ Py@NAME@ArrType_Type.tp_hash = @name@_arrtype_hash; /**end repeat**/ @@ -3546,6 +3569,8 @@ initialize_numeric_types(void) /**begin repeat * #name = repr, str# */ + PyHalfArrType_Type.tp_@name@ = halftype_@name@; + PyFloatArrType_Type.tp_@name@ = floattype_@name@; PyCFloatArrType_Type.tp_@name@ = cfloattype_@name@; @@ -3553,6 +3578,7 @@ initialize_numeric_types(void) PyCDoubleArrType_Type.tp_@name@ = cdoubletype_@name@; /**end repeat**/ + PyHalfArrType_Type.tp_print = halftype_print; PyFloatArrType_Type.tp_print = floattype_print; PyDoubleArrType_Type.tp_print = doubletype_print; PyLongDoubleArrType_Type.tp_print = longdoubletype_print; @@ -3608,6 +3634,7 @@ static PyTypeObject *typeobjects[] = { &PyULongArrType_Type, &PyLongLongArrType_Type, &PyULongLongArrType_Type, + &PyHalfArrType_Type, &PyFloatArrType_Type, &PyDoubleArrType_Type, &PyLongDoubleArrType_Type, diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c new file mode 100644 index 000000000..f5ed80b6d --- /dev/null +++ b/numpy/core/src/npymath/halffloat.c @@ -0,0 +1,461 @@ +#include "numpy/halffloat.h" +#include "numpy/ufuncobject.h" + +/*TODO + * Should the conversion routines query the CPU float rounding flags? + * The routine currently does 'round to nearest', and the following + * define chooses between 'ties to even' and 'ties away from zero'. + */ +#define NPY_HALF_ROUND_TIES_TO_EVEN 1 + +/* + ******************************************************************** + * HALF-PRECISION ROUTINES * + ******************************************************************** + */ + +float npy_half_to_float(npy_half h) +{ + union { float ret; npy_uint32 retbits; } conv; + conv.retbits = npy_halfbits_to_floatbits(h); + return conv.ret; +} + +double npy_half_to_double(npy_half h) +{ + union { double ret; npy_uint64 retbits; } conv; + conv.retbits = npy_halfbits_to_doublebits(h); + return conv.ret; +} + +npy_half npy_float_to_half(float f) +{ + union { float f; npy_uint32 fbits; } conv; + conv.f = f; + return npy_floatbits_to_halfbits(conv.fbits); +} + +npy_half npy_double_to_half(double d) +{ + union { double d; npy_uint64 dbits; } conv; + conv.d = d; + return npy_doublebits_to_halfbits(conv.dbits); +} + +int npy_half_iszero(npy_half h) +{ + return (h&0x7fff) == 0; +} + +int npy_half_isnan(npy_half h) +{ + return ((h&0x7c00u) == 0x7c00u) && ((h&0x03ffu) != 0x0000u); +} + +int npy_half_isinf(npy_half h) +{ + return ((h&0x7fffu) == 0x7c00u); +} + +int npy_half_isfinite(npy_half h) +{ + return ((h&0x7c00u) != 0x7c00u); +} + +int npy_half_signbit(npy_half h) +{ + return (h&0x8000u) != 0; +} + +npy_half npy_half_spacing(npy_half h) +{ + npy_half ret; + npy_uint16 h_exp = h&0x7c00u; + npy_uint16 h_sig = h&0x03ffu; + if (h_exp == 0x7c00u || h == 0x7bffu) { + ret = NPY_HALF_NAN; + } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */ + if (h_exp > 0x2c00u) { /* If result is normalized */ + ret = h_exp - 0x2c00u; + } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ + ret = 1 << ((h_exp >> 10) - 2); + } else { + ret = 0x0001u; /* Smallest subnormal half */ + } + } else if (h_exp > 0x2800u) { /* If result is still normalized */ + ret = h_exp - 0x2800u; + } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ + ret = 1 << ((h_exp >> 10) - 1); + } else { + ret = 0x0001u; + } + + return ret; +} + +npy_half npy_half_copysign(npy_half x, npy_half y) +{ + return (x&0x7fffu) | (y&0x8000u); +} + +npy_half npy_half_nextafter(npy_half x, npy_half y) +{ + npy_half ret; + + if (!npy_half_isfinite(x) || npy_half_isnan(y)) { + ret = NPY_HALF_NAN; + } else if (npy_half_eq_nonan(x, y)) { + ret = x; + } else if (npy_half_iszero(x)) { + ret = (y&0x8000u) + 1; /* Smallest subnormal half */ + } else if (!(x&0x8000u)) { /* x > 0 */ + if ((npy_int16)x > (npy_int16)y) { /* x > y */ + ret = x-1; + } else { + ret = x+1; + } + } else { + if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */ + ret = x-1; + } else { + ret = x+1; + } + } + + return ret; +} + +int npy_half_eq_nonan(npy_half h1, npy_half h2) +{ + return (h1 == h2 || ((h1 | h2) & 0x7fff) == 0); +} + +int npy_half_eq(npy_half h1, npy_half h2) +{ + /* + * The equality cases are as follows: + * - If either value is NaN, never equal. + * - If the values are equal, equal. + * - If the values are both signed zeros, equal. + */ + return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && + (h1 == h2 || ((h1 | h2) & 0x7fff) == 0); +} + +int npy_half_ne(npy_half h1, npy_half h2) +{ + return !npy_half_eq(h1, h2); +} + +int npy_half_lt_nonan(npy_half h1, npy_half h2) +{ + if (h1&0x8000u) { + if (h2&0x8000u) { + return (h1&0x7fffu) > (h2&0x7fffu); + } else { + /* Signed zeros are equal, have to check for it */ + return (h1 != 0x8000u) || (h2 != 0x0000u); + } + } else { + if (h2&0x8000u) { + return 0; + } else { + return (h1&0x7fffu) < (h2&0x7fffu); + } + } +} + +int npy_half_lt(npy_half h1, npy_half h2) +{ + return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_lt_nonan(h1, h2); +} + +int npy_half_gt(npy_half h1, npy_half h2) +{ + return npy_half_lt(h2, h1); +} + +int npy_half_le_nonan(npy_half h1, npy_half h2) +{ + if (h1&0x8000u) { + if (h2&0x8000u) { + return (h1&0x7fffu) >= (h2&0x7fffu); + } else { + return 1; + } + } else { + if (h2&0x8000u) { + /* Signed zeros are equal, have to check for it */ + return (h1 == 0x0000u) && (h2 == 0x8000u); + } else { + return (h1&0x7fffu) <= (h2&0x7fffu); + } + } +} + +int npy_half_le(npy_half h1, npy_half h2) +{ + return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_le_nonan(h1, h2); +} + +int npy_half_ge(npy_half h1, npy_half h2) +{ + return npy_half_le(h2, h1); +} + + + +/* + ******************************************************************** + * BIT-LEVEL CONVERSIONS * + ******************************************************************** + */ + +npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f) +{ + npy_uint32 f_exp, f_sig; + npy_uint16 h_sgn, h_exp, h_sig; + + h_sgn = (npy_uint16) ((f&0x80000000u) >> 16); + f_exp = (f&0x7f800000u); + + /* Exponent overflow/NaN converts to signed inf/NaN */ + if (f_exp >= 0x47800000u) { + if (f_exp == 0x7f800000u) { + f_sig = (f&0x007fffffu); + if (f_sig != 0) { + /* NaN - propagate the flag in the significand... */ + npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13)); + /* ...but make sure it stays a NaN */ + if (ret == 0x7c00u) { + ret++; + } + return h_sgn + ret; + } else { + /* signed inf */ + return (npy_uint16) (h_sgn + 0x7c00u); + } + } else { + /* signed inf */ + return (npy_uint16) (h_sgn + 0x7c00u); + } + } + + /* Exponent underflow converts to a subnormal half or signed zero */ + if (f_exp <= 0x38000000u) { + /* + * Signed zeros, subnormal floats, and floats with small + * exponents all convert to signed zero halfs. + */ + if (f_exp < 0x33000000u) { + return h_sgn; + } + /* Make the subnormal significand */ + f_exp >>= 23; + f_sig = (0x00800000u + (f&0x007fffffu)); + f_sig >>= (113 - f_exp); + /* Handle rounding by adding 1 to the bit beyond half precision */ +#if NPY_HALF_ROUND_TIES_TO_EVEN + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((f_sig&0x00003fffu) != 0x00001000u) { + f_sig += 0x00001000u; + } +#else + f_sig += 0x00001000u; +#endif + h_sig = (npy_uint16) (f_sig >> 13); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp from zero to one and h_sig will be zero. + * This is the correct result. + */ + return (npy_uint16) (h_sgn + h_sig); + } + + /* Regular case with no overflow or underflow */ + h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13); + /* Handle rounding by adding 1 to the bit beyond half precision */ + f_sig = (f&0x007fffffu); +#if NPY_HALF_ROUND_TIES_TO_EVEN + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((f_sig&0x00003fffu) != 0x00001000u) { + f_sig += 0x00001000u; + } +#else + f_sig += 0x00001000u; +#endif + h_sig = (npy_uint16) (f_sig >> 13); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp by one and h_sig will be zero. This is the + * correct result. h_exp may increment to 15, at greatest, in + * which case the result overflows to a signed inf. + */ + return h_sgn + h_exp + h_sig; +} + +npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d) +{ + npy_uint64 d_exp, d_sig; + npy_uint16 h_sgn, h_exp, h_sig; + + h_sgn = (d&0x8000000000000000u) >> 48; + d_exp = (d&0x7ff0000000000000u); + + /* Exponent overflow/NaN converts to signed inf/NaN */ + if (d_exp >= 0x40f0000000000000u) { + if (d_exp == 0x7ff0000000000000u) { + d_sig = (d&0x000fffffffffffffu); + if (d_sig != 0) { + /* NaN - propagate the flag in the significand... */ + npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42)); + /* ...but make sure it stays a NaN */ + if (ret == 0x7c00u) { + ret++; + } + return h_sgn + ret; + } else { + /* signed inf */ + return (npy_uint16) (h_sgn + 0x7c00u); + } + } else { + /* signed inf */ + return h_sgn + 0x7c00u; + } + } + + /* Exponent underflow converts to subnormal half or signed zero */ + if (d_exp <= 0x3f00000000000000u) { + /* + * Signed zeros, subnormal floats, and floats with small + * exponents all convert to signed zero halfs. + */ + if (d_exp < 0x3e60000000000000u) { + return h_sgn; + } + /* Make the subnormal significand */ + d_exp >>= 52; + d_sig = (0x0010000000000000u + (d&0x000fffffffffffffu)); + d_sig >>= (1009 - d_exp); + /* Handle rounding by adding 1 to the bit beyond half precision */ +#if NPY_HALF_ROUND_TIES_TO_EVEN + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((d_sig&0x000007ffffffffffu) != 0x0000020000000000u) { + d_sig += 0x0000020000000000u; + } +#else + d_sig += 0x0000020000000000u; +#endif + h_sig = (npy_uint16) (d_sig >> 42); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp from zero to one and h_sig will be zero. + * This is the correct result. + */ + return (npy_uint16) (h_sgn + h_sig); + } + + /* Regular case with no overflow or underflow */ + h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000u) >> 42); + /* Handle rounding by adding 1 to the bit beyond half precision */ + d_sig = (d&0x000fffffffffffffu); +#if NPY_HALF_ROUND_TIES_TO_EVEN + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((d_sig&0x000007ffffffffffu) != 0x0000020000000000u) { + d_sig += 0x0000020000000000u; + } +#else + d_sig += 0x0000020000000000u; +#endif + h_sig = (npy_uint16) (d_sig >> 42); + + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp by one and h_sig will be zero. This is the + * correct result. h_exp may increment to 15, at greatest, in + * which case the result overflows to a signed inf. + */ + return h_sgn + h_exp + h_sig; +} + +npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h) +{ + npy_uint16 h_exp, h_sig; + npy_uint32 f_sgn, f_exp, f_sig; + + h_exp = (h&0x7c00u); + f_sgn = ((npy_uint32)h&0x8000u) << 16; + switch (h_exp) { + case 0x0000u: /* 0 or subnormal */ + h_sig = (h&0x03ffu); + /* Signed zero */ + if (h_sig == 0) { + return f_sgn; + } + /* Subnormal */ + h_sig <<= 1; + while ((h_sig&0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23; + f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13; + return f_sgn + f_exp + f_sig; + case 0x7c00u: /* inf or NaN */ + /* All-ones exponent and a copy of the significand */ + return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13); + default: /* normalized */ + /* Just need to adjust the exponent and shift */ + return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13); + } +} + +npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h) +{ + npy_uint16 h_exp, h_sig; + npy_uint64 d_sgn, d_exp, d_sig; + + h_exp = (h&0x7c00u); + d_sgn = ((npy_uint64)h&0x8000u) << 48; + switch (h_exp) { + case 0x0000u: /* 0 or subnormal */ + h_sig = (h&0x03ffu); + /* Signed zero */ + if (h_sig == 0) { + return d_sgn; + } + /* Subnormal */ + h_sig <<= 1; + while ((h_sig&0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52; + d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42; + return d_sgn + d_exp + d_sig; + case 0x7c00u: /* inf or NaN */ + /* All-ones exponent and a copy of the significand */ + return d_sgn + 0x7ff0000000000000u + + (((npy_uint64)(h&0x03ffu)) << 42); + default: /* normalized */ + /* Just need to adjust the exponent and shift */ + return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42); + } +} + diff --git a/numpy/core/src/scalarmathmodule.c.src b/numpy/core/src/scalarmathmodule.c.src index c4e6263ba..712932958 100644 --- a/numpy/core/src/scalarmathmodule.c.src +++ b/numpy/core/src/scalarmathmodule.c.src @@ -13,6 +13,8 @@ #include "numpy/npy_3kcompat.h" +#include "numpy/halffloat.h" + /** numarray adapted routines.... **/ #if SIZEOF_LONGLONG == 64 || SIZEOF_LONGLONG == 128 @@ -352,6 +354,17 @@ static @name@ (*_basic_@name@_fmod)(@name@, @name@); *(outp) = _basic_@name@_floor((a) / (b)) /**end repeat**/ +static npy_half (*_basic_half_floor)(npy_half); +static npy_half (*_basic_half_sqrt)(npy_half); +static npy_half (*_basic_half_fmod)(npy_half, npy_half); +#define half_ctype_add(a, b, outp) *(outp) = npy_float_to_half(npy_half_to_float(a) + npy_half_to_float(b)) +#define half_ctype_subtract(a, b, outp) *(outp) = npy_float_to_half(npy_half_to_float(a) - npy_half_to_float(b)) +#define half_ctype_multiply(a, b, outp) *(outp) = npy_float_to_half(npy_half_to_float(a) * npy_half_to_float(b)) +#define half_ctype_divide(a, b, outp) *(outp) = npy_float_to_half(npy_half_to_float(a) / npy_half_to_float(b)) +#define half_ctype_true_divide half_ctype_divide +#define half_ctype_floor_divide(a, b, outp) \ + *(outp) = npy_float_to_half(_basic_float_floor(npy_half_to_float(a) / npy_half_to_float(b))) + /**begin repeat * #name = cfloat, cdouble, clongdouble# * #rtype = float, double, longdouble# @@ -397,11 +410,20 @@ static void } /**end repeat**/ +static void +half_ctype_remainder(npy_half a, npy_half b, npy_half *out) { + float mod, fa = npy_half_to_float(a), fb = npy_half_to_float(b); + mod = _basic_float_fmod(fa, fb); + if (mod && (((fb < 0) != (mod < 0)))) { + mod += fb; + } + *out = npy_float_to_half(mod); +} /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong, - * ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble# + * ulonglong, half, float, double, longdouble, cfloat, cdouble, clongdouble# */ #define @name@_ctype_divmod(a, b, out, out2) { \ @name@_ctype_floor_divide(a, b, out); \ @@ -410,11 +432,11 @@ static void /**end repeat**/ /**begin repeat - * #name = float, double, longdouble# + * #name = half, float, double, longdouble# */ -static @name@ (*_basic_@name@_pow)(@name@ a, @name@ b); +static npy_@name@ (*_basic_@name@_pow)(npy_@name@ a, npy_@name@ b); static void -@name@_ctype_power(@name@ a, @name@ b, @name@ *out) { +@name@_ctype_power(npy_@name@ a, npy_@name@ b, npy_@name@ *out) { *out = _basic_@name@_pow(a, b); } /**end repeat**/ @@ -425,7 +447,7 @@ static void * #uns = (0,1)*5,0*3# */ static void -@name@_ctype_negative(@name@ a, @name@ *out) +@name@_ctype_negative(npy_@name@ a, npy_@name@ *out) { #if @uns@ generate_overflow_error(); @@ -434,6 +456,12 @@ static void } /**end repeat**/ +static void +half_ctype_negative(npy_half a, npy_half *out) +{ + *out = a^0x8000u; +} + /**begin repeat * #name = cfloat, cdouble, clongdouble# @@ -448,10 +476,10 @@ static void /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong, - * ulonglong, float, double, longdouble# + * ulonglong, half, float, double, longdouble# */ static void -@name@_ctype_positive(@name@ a, @name@ *out) +@name@_ctype_positive(npy_@name@ a, npy_@name@ *out) { *out = a; } @@ -497,6 +525,12 @@ static void } /**end repeat**/ +static void +half_ctype_absolute(npy_half a, npy_half *out) +{ + *out = a&0x7fffu; +} + /**begin repeat * #name = cfloat, cdouble, clongdouble# * #rname = float, double, longdouble# @@ -534,15 +568,15 @@ static void /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong, - * ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble# + * ulonglong, half, float, double, longdouble, cfloat, cdouble, clongdouble# * #Name = Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, - * ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# + * ULongLong, Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, LONGLONG, - * ULONGLONG, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE# + * ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE# */ static int -_@name@_convert_to_ctype(PyObject *a, @name@ *arg1) +_@name@_convert_to_ctype(PyObject *a, npy_@name@ *arg1) { PyObject *temp; @@ -585,11 +619,11 @@ _@name@_convert_to_ctype(PyObject *a, @name@ *arg1) /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong, float, double, cfloat, cdouble# + * longlong, ulonglong, half, float, double, cfloat, cdouble# */ static int -_@name@_convert2_to_ctypes(PyObject *a, @name@ *arg1, - PyObject *b, @name@ *arg2) +_@name@_convert2_to_ctypes(PyObject *a, npy_@name@ *arg1, + PyObject *b, npy_@name@ *arg2) { int ret; ret = _@name@_convert_to_ctype(a, arg1); @@ -635,14 +669,32 @@ _@name@_convert2_to_ctypes(PyObject *a, @name@ *arg1, #endif /**begin repeat - #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong)*13, (float, double, longdouble, cfloat, cdouble, clongdouble)*6, (float, double, longdouble)*2# - #Name=(Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong)*13, (Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*6, (Float, Double, LongDouble)*2# - #oper=add*10, subtract*10, multiply*10, divide*10, remainder*10, divmod*10, floor_divide*10, lshift*10, rshift*10, and*10, or*10, xor*10, true_divide*10, add*6, subtract*6, multiply*6, divide*6, floor_divide*6, true_divide*6, divmod*3, remainder*3# - #fperr=1*70,0*50,1*52# - #twoout=0*50,1*10,0*106,1*3,0*3# - #otyp=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong)*12, float*4, double*6, (float, double, longdouble, cfloat, cdouble, clongdouble)*6, (float, double, longdouble)*2# - #OName=(Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong)*12, Float*4, Double*6, (Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*6, (Float, Double, LongDouble)*2# -**/ + * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong)*13, + * (half, float, double, longdouble, cfloat, cdouble, clongdouble)*6, + * (half, float, double, longdouble)*2# + * #Name=(Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong)*13, + * (Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*6, + * (Half, Float, Double, LongDouble)*2# + * #oper=add*10, subtract*10, multiply*10, divide*10, remainder*10, + * divmod*10, floor_divide*10, lshift*10, rshift*10, and*10, + * or*10, xor*10, true_divide*10, + * add*7, subtract*7, multiply*7, divide*7, floor_divide*7, true_divide*7, + * divmod*4, remainder*4# + * #fperr=1*70,0*50,1*10, + * 1*42, + * 1*8# + * #twoout=0*50,1*10,0*70, + * 0*42, + * 1*4,0*4# + * #otyp=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong)*12, + * float*4, double*6, + * (half, float, double, longdouble, cfloat, cdouble, clongdouble)*6, + * (half, float, double, longdouble)*2# + * #OName=(Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong)*12, + * Float*4, Double*6, + * (Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*6, + * (Half, Float, Double, LongDouble)*2# + */ #if !defined(CODEGEN_SKIP_@oper@_FLAG) @@ -650,16 +702,16 @@ static PyObject * @name@_@oper@(PyObject *a, PyObject *b) { PyObject *ret; - @name@ arg1, arg2; + npy_@name@ arg1, arg2; /* * NOTE: In gcc >= 4.1, the compiler will reorder floating point operations and * floating point error state checks. In particular, the arithmetic operations * were being reordered so that the errors weren't caught. Declaring this output * variable volatile was the minimal fix for the issue. (Ticket #1671) */ - volatile @otyp@ out; + volatile npy_@otyp@ out; #if @twoout@ - @otyp@ out2; + npy_@otyp@ out2; PyObject *obj; #endif @@ -698,9 +750,9 @@ static PyObject * * as a function call. */ #if @twoout@ - @name@_ctype_@oper@(arg1, arg2, (@otyp@ *)&out, &out2); + @name@_ctype_@oper@(arg1, arg2, (npy_@otyp@ *)&out, &out2); #else - @name@_ctype_@oper@(arg1, arg2, (@otyp@ *)&out); + @name@_ctype_@oper@(arg1, arg2, (npy_@otyp@ *)&out); #endif #if @fperr@ @@ -758,30 +810,36 @@ static PyObject * #undef CODEGEN_SKIP_divide_FLAG +#define _IS_ZERO(x) (x ==0) /**begin repeat - #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float, double, longdouble, cfloat, cdouble, clongdouble# - #Name=Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# - #otyp=float*4, double*6, float, double, longdouble, cfloat, cdouble, clongdouble# - #OName=Float*4, Double*6, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# - #isint=(1,0)*5,0*6# - #cmplx=0*13,1*3# -**/ + * #name=byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong, + * half, float, double, longdouble, cfloat, cdouble, clongdouble# + * #Name=Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, + * Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# + * #otyp=float*4, double*6, half, float, double, longdouble, cfloat, cdouble, clongdouble# + * #OName=Float*4, Double*6, Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# + * #isint=(1,0)*5,0*7# + * #cmplx=0*14,1*3# + * #iszero=_IS_ZERO*10, npy_half_iszero, _IS_ZERO*6# + * #zero=0*10, NPY_HALF_ZERO, 0*6# + * #one=1*10, NPY_HALF_ONE, 1*6# + */ static PyObject * @name@_power(PyObject *a, PyObject *b, PyObject *NPY_UNUSED(c)) { PyObject *ret; - @name@ arg1, arg2; + npy_@name@ arg1, arg2; int retstatus; int first; #if @cmplx@ - @name@ out = {0,0}; - @otyp@ out1; - out1.real = out.imag = 0; + npy_@name@ out = {@zero@,@zero@}; + npy_@otyp@ out1; + out1.real = out.imag = @zero@; #else - @name@ out = 0; - @otyp@ out1=0; + npy_@name@ out = @zero@; + npy_@otyp@ out1 = @zero@; #endif switch(_@name@_convert2_to_ctypes(a, &arg1, b, &arg2)) { @@ -812,13 +870,13 @@ static PyObject * * as a function call. */ #if @cmplx@ - if (arg2.real == 0 && arg2.imag == 0) { - out1.real = out.real = 1; - out1.imag = out.imag = 0; + if (@iszero@(arg2.real) && @iszero@(arg2.imag)) { + out1.real = out.real = @one@; + out1.imag = out.imag = @zero@; } #else - if (arg2 == 0) { - out1 = out = 1; + if (@iszero@(arg2)) { + out1 = out = @one@; } #endif #if @isint@ @@ -875,6 +933,7 @@ static PyObject * return ret; } /**end repeat**/ +#undef _IS_ZERO /**begin repeat @@ -885,24 +944,29 @@ static PyObject * /**end repeat**/ /**begin repeat - * #name = (float,double,longdouble,cfloat,cdouble,clongdouble)*5# - * #oper = lshift*6, rshift*6, and*6, or*6, xor*6# + * #name = (half,float,double,longdouble,cfloat,cdouble,clongdouble)*5# + * #oper = lshift*7, rshift*7, and*7, or*7, xor*7# */ #define @name@_@oper@ NULL /**end repeat**/ /**begin repeat - * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*3, byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong# - * #otyp=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*2,byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,float,double,longdouble,byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong# - * #OName=(Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*2, Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, Float, Double, LongDouble, Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong# - * #oper=negative*16, positive*16, absolute*16, invert*10# + * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,cfloat,cdouble,clongdouble)*3, + * byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong# + * #otyp=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,cfloat,cdouble,clongdouble)*2, + * byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,float,double,longdouble, + * byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong# + * #OName=(Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble)*2, + Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Half, Float, Double, LongDouble, Float, Double, LongDouble, + Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong# + * #oper=negative*17, positive*17, absolute*17, invert*10# */ static PyObject * @name@_@oper@(PyObject *a) { - @name@ arg1; - @otyp@ out; + npy_@name@ arg1; + npy_@otyp@ out; PyObject *ret; switch(_@name@_convert_to_ctype(a, &arg1)) { @@ -935,7 +999,7 @@ static PyObject * /**end repeat**/ /**begin repeat - * #name = float, double, longdouble, cfloat, cdouble, clongdouble# + * #name = half, float, double, longdouble, cfloat, cdouble, clongdouble# */ #define @name@_invert NULL /**end repeat**/ @@ -946,16 +1010,18 @@ static PyObject * #define NONZERO_NAME(prefix, suffix) prefix##nonzero##suffix #endif +#define _IS_NONZERO(x) (x != 0) /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, long, ulong, longlong, - * ulonglong, float, double, longdouble, cfloat, cdouble, clongdouble# - * #simp=1*13,0*3# + * ulonglong, half, float, double, longdouble, cfloat, cdouble, clongdouble# + * #simp=1*14,0*3# + * #nonzero=_IS_NONZERO*10, !npy_half_iszero, _IS_NONZERO*6# */ static int NONZERO_NAME(@name@_,)(PyObject *a) { int ret; - @name@ arg1; + npy_@name@ arg1; if (_@name@_convert_to_ctype(a, &arg1) < 0) { if (PyErr_Occurred()) { @@ -970,21 +1036,21 @@ NONZERO_NAME(@name@_,)(PyObject *a) */ #if @simp@ - ret = (arg1 != 0); + ret = @nonzero@(arg1); #else - ret = ((arg1.real != 0) || (arg1.imag != 0)); + ret = (@nonzero@(arg1.real) || @nonzero@(arg1.imag)); #endif return ret; } /**end repeat**/ +#undef _IS_NONZERO static int emit_complexwarning() { static PyObject *cls = NULL; - int ret; if (cls == NULL) { PyObject *mod; mod = PyImport_ImportModule("numpy.core"); @@ -1006,23 +1072,24 @@ emit_complexwarning() /**begin repeat * - * #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble# - * #Name=Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong,Float,Double,LongDouble,CFloat,CDouble,CLongDouble# - * #cmplx=0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1# - * #sign=(signed,unsigned)*5,,,,,,# - * #unsigntyp=0,1,0,1,0,1,0,1,0,1,0*6# - * #ctype=long*8,PY_LONG_LONG*2,double*6# - * #realtyp=0*10,1*6# - * #func=(PyLong_FromLong,PyLong_FromUnsignedLong)*4,PyLong_FromLongLong,PyLong_FromUnsignedLongLong,PyLong_FromDouble*6# + * #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,cfloat,cdouble,clongdouble# + * #Name=Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong,Half,Float,Double,LongDouble,CFloat,CDouble,CLongDouble# + * #cmplx=0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1# + * #sign=(signed,unsigned)*5,,,,,,,# + * #unsigntyp=0,1,0,1,0,1,0,1,0,1,0*7# + * #ctype=long*8,PY_LONG_LONG*2,double*7# + * #to_ctype=,,,,,,,,,,npy_half_to_double,,,,,,# + * #realtyp=0*10,1*7# + * #func=(PyLong_FromLong,PyLong_FromUnsignedLong)*4,PyLong_FromLongLong,PyLong_FromUnsignedLongLong,PyLong_FromDouble*7# */ static PyObject * @name@_int(PyObject *obj) { #if @cmplx@ - @sign@ @ctype@ x= PyArrayScalar_VAL(obj, @Name@).real; + @sign@ @ctype@ x= @to_ctype@(PyArrayScalar_VAL(obj, @Name@).real); int ret; #else - @sign@ @ctype@ x= PyArrayScalar_VAL(obj, @Name@); + @sign@ @ctype@ x= @to_ctype@(PyArrayScalar_VAL(obj, @Name@)); #endif #if @realtyp@ double ix; @@ -1049,11 +1116,12 @@ static PyObject * /**begin repeat * - * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*2# - * #Name=(Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong,Float,Double,LongDouble,CFloat,CDouble,CLongDouble)*2# - * #cmplx=(0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1)*2# - * #which=long*16,float*16# - * #func=(PyLong_FromLongLong, PyLong_FromUnsignedLongLong)*5,PyLong_FromDouble*6,PyFloat_FromDouble*16# + * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,cfloat,cdouble,clongdouble)*2# + * #Name=(Byte,UByte,Short,UShort,Int,UInt,Long,ULong,LongLong,ULongLong,Half,Float,Double,LongDouble,CFloat,CDouble,CLongDouble)*2# + * #cmplx=(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1)*2# + * #to_ctype=(,,,,,,,,,,npy_half_to_double,,,,,,)*2# + * #which=long*17,float*17# + * #func=(PyLong_FromLongLong, PyLong_FromUnsignedLongLong)*5,PyLong_FromDouble*7,PyFloat_FromDouble*17# */ static PyObject * @name@_@which@(PyObject *obj) @@ -1064,9 +1132,9 @@ static PyObject * if (ret < 0) { return NULL; } - return @func@((PyArrayScalar_VAL(obj, @Name@)).real); + return @func@(@to_ctype@((PyArrayScalar_VAL(obj, @Name@)).real)); #else - return @func@((PyArrayScalar_VAL(obj, @Name@))); + return @func@(@to_ctype@(PyArrayScalar_VAL(obj, @Name@))); #endif } /**end repeat**/ @@ -1075,10 +1143,10 @@ static PyObject * /**begin repeat * - * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble)*2# - * #oper=oct*16, hex*16# - * #kind=(int*5, long*5, int, long*2, int, long*2)*2# - * #cap=(Int*5, Long*5, Int, Long*2, Int, Long*2)*2# + * #name=(byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,cfloat,cdouble,clongdouble)*2# + * #oper=oct*17, hex*17# + * #kind=(int*5, long*5, int*2, long*2, int, long*2)*2# + * #cap=(Int*5, Long*5, Int*2, Long*2, Int, Long*2)*2# */ static PyObject * @name@_@oper@(PyObject *obj) @@ -1095,21 +1163,23 @@ static PyObject * /**begin repeat * #oper=le,ge,lt,gt,eq,ne# * #op=<=,>=,<,>,==,!=# + * #halfop=npy_half_le,npy_half_ge,npy_half_lt,npy_half_gt,npy_half_eq,npy_half_ne# */ #define def_cmp_@oper@(arg1, arg2) (arg1 @op@ arg2) #define cmplx_cmp_@oper@(arg1, arg2) ((arg1.real == arg2.real) ? \ arg1.imag @op@ arg2.imag : \ arg1.real @op@ arg2.real) +#define def_half_cmp_@oper@(arg1, arg2) @halfop@(arg1, arg2) /**end repeat**/ /**begin repeat - * #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble# - * #simp=def*13,cmplx*3# + * #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,cfloat,cdouble,clongdouble# + * #simp=def*10,def_half,def*3,cmplx*3# */ static PyObject* @name@_richcompare(PyObject *self, PyObject *other, int cmp_op) { - @name@ arg1, arg2; + npy_@name@ arg1, arg2; int out=0; switch(_@name@_convert2_to_ctypes(self, &arg1, other, &arg2)) { @@ -1158,7 +1228,7 @@ static PyObject* /**begin repeat - #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble# + #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,half,float,double,longdouble,cfloat,cdouble,clongdouble# **/ static PyNumberMethods @name@_as_number = { (binaryfunc)@name@_add, /*nb_add*/ @@ -1231,8 +1301,8 @@ static void add_scalarmath(void) { /**begin repeat - #name=byte,ubyte,short,ushort,int,uint,long,ulong,longlong,ulonglong,float,double,longdouble,cfloat,cdouble,clongdouble# - #NAME=Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# + #name=byte, ubyte, short, ushort, int, uint, long, ulong, longlong, ulonglong, half, float, double, longdouble, cfloat, cdouble, clongdouble# + #NAME=Byte, UByte, Short, UShort, Int, UInt, Long, ULong, LongLong, ULongLong, Half, Float, Double, LongDouble, CFloat, CDouble, CLongDouble# **/ #if PY_VERSION_HEX >= 0x02050000 @name@_as_number.nb_index = Py@NAME@ArrType_Type.tp_as_number->nb_index; @@ -1280,6 +1350,7 @@ get_functions(void) i = 0; j = 0; while(signatures[i] != PyArray_FLOAT) {i+=3; j++;} + _basic_half_pow = funcdata[j-1]; _basic_float_pow = funcdata[j]; _basic_double_pow = funcdata[j+1]; _basic_longdouble_pow = funcdata[j+2]; @@ -1296,6 +1367,7 @@ get_functions(void) i = 0; j = 0; while(signatures[i] != PyArray_FLOAT) {i+=2; j++;} + _basic_half_floor = funcdata[j-1]; _basic_float_floor = funcdata[j]; _basic_double_floor = funcdata[j+1]; _basic_longdouble_floor = funcdata[j+2]; @@ -1309,6 +1381,7 @@ get_functions(void) i = 0; j = 0; while(signatures[i] != PyArray_FLOAT) {i+=2; j++;} + _basic_half_sqrt = funcdata[j-1]; _basic_float_sqrt = funcdata[j]; _basic_double_sqrt = funcdata[j+1]; _basic_longdouble_sqrt = funcdata[j+2]; @@ -1322,6 +1395,7 @@ get_functions(void) i = 0; j = 0; while(signatures[i] != PyArray_FLOAT) {i+=3; j++;} + _basic_half_fmod = funcdata[j-1]; _basic_float_fmod = funcdata[j]; _basic_double_fmod = funcdata[j+1]; _basic_longdouble_fmod = funcdata[j+2]; |