summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Black <daniel@linux.vnet.ibm.com>2018-04-04 20:16:30 +1000
committerMonty <monty@mariadb.org>2018-05-07 16:04:01 +0300
commitd405bee0580eb30e53e5391bbb49ef44fa3d73ad (patch)
tree1871af0ae7786c9343dddfde5d9b736c9c0d4653
parent1a4c355a1c5e6b0e323be450fb38633921b42324 (diff)
downloadmariadb-git-d405bee0580eb30e53e5391bbb49ef44fa3d73ad.tar.gz
mysys: disable "optimized" memcpy from 18 years ago
MDEV-15843 mysys: remove optimized memcpy from 18 years ago While this code has remained dormant for 18 years, libc implementers have used assembly features to gain improvements using achitecture features optimized and by the buffer length like: * https://svnweb.freebsd.org/base/head/lib/libc/amd64/string/memcmp.S * https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/aarch64/memcmp.S * https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/powerpc/powerpc64/memcpy.S From an sysbench-1.0.6 oltp_read_only test on binary charset table: x86_64: was ptr_compare_0: perf report -g --no-children: + 3.37% mysqld mysqld [.] hp_rec_hashnr + 3.15% mysqld libc-2.26.so [.] __memmove_avx_unaligned_erms + 2.73% mysqld mysqld [.] row_search_mvcc + 1.97% mysqld mysqld [.] rec_get_offsets_func + 1.24% mysqld mysqld [.] ptr_compare_0 + 1.14% mysqld mysqld [.] my_qsort2 After: __memcmp_avx2_movbe + 3.42% mysqld mysqld [.] hp_rec_hashnr + 2.96% mysqld libc-2.26.so [.] __memmove_avx_unaligned_erms + 2.91% mysqld mysqld [.] row_search_mvcc + 2.13% mysqld mysqld [.] rec_get_offsets_func + 1.18% mysqld libc-2.26.so [.] __memcmp_avx2_movbe + 1.04% mysqld mysqld [.] evaluate_join_record + 1.02% mysqld mysqld [.] my_qsort2 Power9: Before: ptr_compare_0 + 4.24% mysqld mysqld [.] _Z15row_search_mvccPh15page_cur_mode_tP14row_prebuilt_tmm + 2.18% mysqld mysqld [.] hp_rec_hashnr + 2.07% mysqld mysqld [.] _Z20rec_get_offsets_funcPKhPK12dict_index_tPmbmPP16mem_block_info_t + 1.60% mysqld mysqld [.] _ZL20evaluate_join_recordP4JOINP13st_join_tablei + 1.20% mysqld mysqld [.] _ZN11ha_innobase13general_fetchEPhjj + 1.05% mysqld mysqld [.] _ZN17Item_func_between15val_int_cmp_intEv + 0.92% mysqld mysqld [.] _Z40row_sel_field_store_in_mysql_format_funcPhPK17mysql_row_templ_tPKhm + 0.91% mysqld mysqld [.] _ZNK10Item_param6PValue7val_intEPK19Type_std_attributes + 0.84% mysqld mysqld [.] ptr_compare_0 After: __memcmp_power8 + 2.29% mysqld mysqld [.] _Z15row_search_mvccPh15page_cur_mode_tP14row_prebuilt_tmm + 1.32% mysqld mysqld [.] hp_rec_hashnr + 1.18% swapper [kernel.kallsyms] [k] power_enter_stop + 1.12% mysqld mysqld [.] _Z20rec_get_offsets_funcPKhPK12dict_index_tPmbmPP16mem_block_info_t + 0.87% mysqld mysqld [.] _ZL20evaluate_join_recordP4JOINP13st_join_tablei + 0.87% mysqld [kernel.kallsyms] [k] ___bpf_prog_run + 0.76% mysqld libc-2.26.so [.] __memcmp_power8 + 0.68% mysqld mysqld [.] _ZN11ha_innobase13general_fetchEPhjj + 0.58% mysqld mysqld [.] _ZN17Item_func_between15val_int_cmp_intEv
-rw-r--r--mysys/ptr_cmp.c36
1 files changed, 15 insertions, 21 deletions
diff --git a/mysys/ptr_cmp.c b/mysys/ptr_cmp.c
index 6d853a8db25..1880e60a811 100644
--- a/mysys/ptr_cmp.c
+++ b/mysys/ptr_cmp.c
@@ -27,14 +27,18 @@
* written in assembler. for example one in /usr/lib/libc/libc_hwcap*.so.1.
* on Solaris, or on Windows inside C runtime linrary.
*
- * On Solaris, native implementation is also usually faster than the
- * built-in memcmp supplied by GCC, so it is recommended to build
+ * On Solaris, native implementation is also usually faster than the
+ * built-in memcmp supplied by GCC, so it is recommended to build
* with "-fno-builtin-memcmp"in CFLAGS if building with GCC on Solaris.
*/
-#if defined (__sun) || defined (_WIN32)
+/*
+ Daniel Blacks tests shows that libc memcmp is generally faster than
+ ptr_cmp() at least of x86 and power8 platforms, so we use the libc
+ code as deafult for now
+*/
+
#define USE_NATIVE_MEMCMP 1
-#endif
#ifdef USE_NATIVE_MEMCMP
@@ -45,23 +49,19 @@ static int native_compare(size_t *length, unsigned char **a, unsigned char **b)
return memcmp(*a, *b, *length);
}
-#else /* USE_NATIVE_MEMCMP */
+qsort2_cmp get_ptr_compare (size_t size __attribute__((unused)))
+{
+ return (qsort2_cmp) native_compare;
+}
+
+#else /* USE_NATIVE_MEMCMP */
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_0(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_1(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_2(size_t *compare_length, uchar **a, uchar **b);
static int ptr_compare_3(size_t *compare_length, uchar **a, uchar **b);
-#endif /* __sun */
- /* Get a pointer to a optimal byte-compare function for a given size */
-
-#ifdef USE_NATIVE_MEMCMP
-qsort2_cmp get_ptr_compare (size_t size __attribute__((unused)))
-{
- return (qsort2_cmp) native_compare;
-}
-#else
qsort2_cmp get_ptr_compare (size_t size)
{
if (size < 4)
@@ -74,9 +74,6 @@ qsort2_cmp get_ptr_compare (size_t size)
}
return 0; /* Impossible */
}
-#endif /* USE_NATIVE_MEMCMP */
-
-
/*
Compare to keys to see witch is smaller.
Loop unrolled to make it quick !!
@@ -84,8 +81,6 @@ qsort2_cmp get_ptr_compare (size_t size)
#define cmp(N) if (first[N] != last[N]) return (int) first[N] - (int) last[N]
-#ifndef USE_NATIVE_MEMCMP
-
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b)
{
size_t length= *compare_length;
@@ -189,7 +184,7 @@ static int ptr_compare_3(size_t *compare_length,uchar **a, uchar **b)
return (0);
}
-#endif /* !__sun */
+#endif /* USE_NATIVE_MEMCMP */
void my_store_ptr(uchar *buff, size_t pack_length, my_off_t pos)
{
@@ -227,4 +222,3 @@ my_off_t my_get_ptr(uchar *ptr, size_t pack_length)
}
return pos;
}
-