summaryrefslogtreecommitdiff
path: root/config/c-compiler.m4
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2015-04-14 23:58:16 +0300
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2015-04-14 23:58:16 +0300
commit936546dcbc24ad1f2b3d33e73aa5c5fde4d2be84 (patch)
treeb6d29aa4308a156d363a08bf4f7ebf89f4047e85 /config/c-compiler.m4
parentb73e7a0716264e5159947b1a755b9ab864142489 (diff)
downloadpostgresql-936546dcbc24ad1f2b3d33e73aa5c5fde4d2be84.tar.gz
Optimize pg_comp_crc32c_sse42 routine slightly, and also use it on x86.
Eliminate the separate 'len' variable from the loops, and also use the 4 byte instruction. This shaves off a few more cycles. Even though this routine that uses the special SSE 4.2 instructions is much faster than a generic routine, it's still a hot spot, so let's make it as fast as possible. Change the configure test to not test _mm_crc32_u64. That variant is only available in the 64-bit x86-64 architecture, not in 32-bit x86. Modify pg_comp_crc32c_sse42 so that it only uses _mm_crc32_u64 on x86-64. With these changes, the SSE accelerated CRC-32C implementation can also be used on 32-bit x86 systems. This also fixes the 32-bit MSVC build.
Diffstat (limited to 'config/c-compiler.m4')
-rw-r--r--config/c-compiler.m410
1 files changed, 7 insertions, 3 deletions
diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index f81e7d6139..4ef0de65a8 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -476,12 +476,16 @@ fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
# PGAC_SSE42_CRC32_INTRINSICS
# -----------------------
-# Check if the compiler supports _mm_crc32_u8 and _mm_crc32_u64 intrinsics.
+# Check if the compiler supports the x86 CRC instructions added in SSE 4.2,
+# using the _mm_crc32_u8 and _mm_crc32_u32 intrinsic functions. (We don't
+# test the 8-byte variant, _mm_crc32_u64, but it is assumed to be present if
+# the other ones are, on x86-64 platforms)
+#
# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the
# intrinsics are supported, sets pgac_sse42_crc32_intrinsics, and CFLAGS_SSE42.
AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS],
[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics_$1])])dnl
-AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=$1], [Ac_cachevar],
+AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u32 with CFLAGS=$1], [Ac_cachevar],
[pgac_save_CFLAGS=$CFLAGS
CFLAGS="$pgac_save_CFLAGS $1"
ac_save_c_werror_flag=$ac_c_werror_flag
@@ -489,7 +493,7 @@ ac_c_werror_flag=yes
AC_TRY_LINK([#include <nmmintrin.h>],
[unsigned int crc = 0;
crc = _mm_crc32_u8(crc, 0);
- crc = (unsigned int) _mm_crc32_u64(crc, 0);],
+ crc = _mm_crc32_u32(crc, 0);],
[Ac_cachevar=yes],
[Ac_cachevar=no])
ac_c_werror_flag=$ac_save_c_werror_flag