From 38bd31bc46b992f68c9455ed50a6280943fe6a75 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 2 Oct 2022 22:17:31 +0300 Subject: mpi/longlong.h: x86-64: use tzcnt instruction for trailing zeros * mpi/longlong.h [__x86_64__] (count_trailing_zeros): Add 'rep' prefix for 'bsfq'. -- "rep;bsf" aka "tzcnt" is new instruction with well defined operation on zero input and as result is faster on new CPUs. On old CPUs, "tzcnt" functions as old "bsf" with undefined behaviour on zero input. Signed-off-by: Jussi Kivilinna --- mpi/longlong.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mpi') diff --git a/mpi/longlong.h b/mpi/longlong.h index 2921e9bd..706ac723 100644 --- a/mpi/longlong.h +++ b/mpi/longlong.h @@ -624,7 +624,7 @@ extern USItype __udiv_qrnnd (); # define count_trailing_zeros(count, x) \ do { \ UDItype __cbtmp; \ - __asm__ ("bsfq %1,%0" \ + __asm__ ("rep;bsfq %1,%0" \ : "=r" (__cbtmp) : "rm" ((UDItype)(x)) \ __CLOBBER_CC); \ (count) = __cbtmp; \ -- cgit v1.2.1