diff options
-rw-r--r-- | configure.ac | 18 | ||||
-rw-r--r-- | src/Makefile_Eo.am | 1 | ||||
-rw-r--r-- | src/lib/eo/eo_ptr_indirection.c | 44 |
3 files changed, 49 insertions, 14 deletions
diff --git a/configure.ac b/configure.ac index c399877c59..c295879338 100644 --- a/configure.ac +++ b/configure.ac @@ -550,6 +550,7 @@ EFLALL_CFLAGS="${EFLALL_CFLAGS}" ## CPU architecture specific assembly build_cpu_mmx="no" +build_cpu_avx2="no" build_cpu_sse3="no" build_cpu_altivec="no" build_cpu_neon="no" @@ -566,6 +567,7 @@ AC_ARG_ENABLE([neon], ]) SSE3_CFLAGS="" +AVX2_CFLAGS="" ALTIVEC_CFLAGS="" NEON_CFLAGS="" @@ -581,10 +583,23 @@ case $host_cpu in [build_cpu_sse3="no"]) AC_MSG_CHECKING([whether to build SSE3 code]) AC_MSG_RESULT([${build_cpu_sse3}]) - if test "x$build_cpu_sse3" = "xyes" ; then SSE3_CFLAGS="-msse3" fi + + + AC_CHECK_HEADER([avxintrin.h], + [ + AC_DEFINE(BUILD_AVX2, 1, [Build AVX2 Code]) + build_cpu_avx2="yes" + ], + [build_cpu_avx2="no"]) + AC_MSG_CHECKING([whether to build AVX2 code]) + AC_MSG_RESULT([${build_cpu_avx2}]) + +# if test "x$build_cpu_avx2" = "xyes" ; then + AVX2_CFLAGS="-mavx2" + # fi ;; *power* | *ppc*) build_cpu_altivec="yes" @@ -665,6 +680,7 @@ esac AC_SUBST([ALTIVEC_CFLAGS]) AC_SUBST([SSE3_CFLAGS]) +AC_SUBST([AVX2_CFLAGS]) AC_SUBST([NEON_CFLAGS]) #### Checks for linker characteristics diff --git a/src/Makefile_Eo.am b/src/Makefile_Eo.am index 438fbf222a..db55420cd7 100644 --- a/src/Makefile_Eo.am +++ b/src/Makefile_Eo.am @@ -37,6 +37,7 @@ lib/eo/efl_future.c lib_eo_libeo_la_CPPFLAGS = \ -I$(top_builddir)/src/lib/efl \ @EO_CFLAGS@ \ +@AVX2_CFLAGS@ \ @VALGRIND_CFLAGS@ lib_eo_libeo_la_LIBADD = @EO_LIBS@ lib_eo_libeo_la_DEPENDENCIES = @EO_INTERNAL_LIBS@ diff --git a/src/lib/eo/eo_ptr_indirection.c b/src/lib/eo/eo_ptr_indirection.c index 4b3adbf1a5..1d4766ca66 100644 --- a/src/lib/eo/eo_ptr_indirection.c +++ b/src/lib/eo/eo_ptr_indirection.c @@ -2,6 +2,7 @@ # include <config.h> #endif +#include <immintrin.h> #include "eo_ptr_indirection.h" extern Eina_Thread _efl_object_main_thread; @@ -95,7 +96,24 @@ _eo_obj_pointer_get(const Eo_Id obj_id, const char *restrict func_name, const ch &&do_domain_thread, &&do_domain_other, }; - const unsigned int domain = (obj_id >> SHIFT_DOMAIN) & MASK_DOMAIN; + + /* DOMAIN, GENERATION, OBJ_TAG, ENTRY */ + const __m256i src = _mm256_set_epi64x(obj_id, obj_id, obj_id, obj_id); + const __m256i shift = _mm256_set_epi64x(SHIFT_DOMAIN, 0, 0, SHIFT_ENTRY_ID); + const __m256i masks = _mm256_set_epi64x(MASK_DOMAIN, MASK_GENERATIONS, MASK_OBJ_TAG, MASK_ENTRY_ID); + + const __m256i shifted = _mm256_srav_epi32(src, shift); + const __m256i result = _mm256_and_si256(shifted, masks); + + const int64_t domain = _mm256_extract_epi64(result, 3); + const int64_t generation = _mm256_extract_epi64(result, 2); + const int64_t tag_bit = _mm256_extract_epi64(result, 1); + const int64_t entry_id = _mm256_extract_epi64(result, 0); + +// printf("===> dom=%lli gen=0x%llx tag=0x%llx entry=%lli\n", domain, generation, tag_bit, entry_id); + + +// const unsigned int domain = (obj_id >> SHIFT_DOMAIN) & MASK_DOMAIN; goto *jump[domain]; do_domain_main: EINA_HOT @@ -105,11 +123,11 @@ do_domain_main: EINA_HOT return _eo_main_id_table.cache.object; /* XXX This could definitely be done in one go with vectorization */ - const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; - const unsigned int generation = obj_id & MASK_GENERATIONS; + // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; + // const unsigned int generation = obj_id & MASK_GENERATIONS; - // get tag bit to check later down below - pipelining - const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; + // // get tag bit to check later down below - pipelining + // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; if (EINA_UNLIKELY(!tag_bit || (entry_id >= _eo_main_id_table.count))) goto main_err; @@ -146,11 +164,11 @@ do_domain_other: EINA_COLD if (obj_id == table->cache.id) return table->cache.object; - const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; - const unsigned int generation = obj_id & MASK_GENERATIONS; + // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; + // const unsigned int generation = obj_id & MASK_GENERATIONS; - // get tag bit to check later down below - pipelining - const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; + // // get tag bit to check later down below - pipelining + // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; if (EINA_UNLIKELY(!tag_bit || (entry_id >= table->count))) goto err; @@ -183,11 +201,11 @@ do_domain_shared: EINA_COLD // by EO_OBJ_DONE() to release return table->cache.object; - const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; - const unsigned int generation = obj_id & MASK_GENERATIONS; + // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; + // const unsigned int generation = obj_id & MASK_GENERATIONS; - // get tag bit to check later down below - pipelining - const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; + // // get tag bit to check later down below - pipelining + // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; if (EINA_UNLIKELY((!tag_bit || entry_id >= table->count))) goto err_shared; |