summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2021-12-18 18:07:59 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2021-12-18 18:07:59 -0500
commit12a356e431d1b2d3531d3d73de330bf9ee9be48b (patch)
tree490e1eeb55bd473a85dcfd3db409eb902a0e5f69
parentec64b3c5b7abd621dfddee6b4cc115298e5d6803 (diff)
downloadopus-12a356e431d1b2d3531d3d73de330bf9ee9be48b.tar.gz
Disable dangerous SSE 4.1 intrinsic optimizations
These could result in 16-byte-aligned loads on unaligned data, causing a segfault.
-rw-r--r--celt/x86/x86cpu.h34
1 files changed, 2 insertions, 32 deletions
diff --git a/celt/x86/x86cpu.h b/celt/x86/x86cpu.h
index 1e2bf17b..0de8df35 100644
--- a/celt/x86/x86cpu.h
+++ b/celt/x86/x86cpu.h
@@ -56,40 +56,10 @@
int opus_select_arch(void);
# endif
-/*gcc appears to emit MOVDQA's to load the argument of an _mm_cvtepi8_epi32()
- or _mm_cvtepi16_epi32() when optimizations are disabled, even though the
- actual PMOVSXWD instruction takes an m32 or m64. Unlike a normal memory
- reference, these require 16-byte alignment and load a full 16 bytes (instead
- of 4 or 8), possibly reading out of bounds.
-
- We can insert an explicit MOVD or MOVQ using _mm_cvtsi32_si128() or
- _mm_loadl_epi64(), which should have the same semantics as an m32 or m64
- reference in the PMOVSXWD instruction itself, but gcc is not smart enough to
- optimize this out when optimizations ARE enabled.
-
- Clang, in contrast, requires us to do this always for _mm_cvtepi8_epi32
- (which is fair, since technically the compiler is always allowed to do the
- dereference before invoking the function implementing the intrinsic).
- However, it is smart enough to eliminate the extra MOVD instruction.
- For _mm_cvtepi16_epi32, it does the right thing, though does *not* optimize out
- the extra MOVQ if it's specified explicitly */
-
-# if defined(__clang__) || !defined(__OPTIMIZE__)
-# define OP_CVTEPI8_EPI32_M32(x) \
+#define OP_CVTEPI8_EPI32_M32(x) \
(_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x))))
-# else
-# define OP_CVTEPI8_EPI32_M32(x) \
- (_mm_cvtepi8_epi32(*(__m128i *)(x)))
-#endif
-/* similar reasoning about the instruction sequence as in the 32-bit macro above,
- */
-# if defined(__clang__) || !defined(__OPTIMIZE__)
-# define OP_CVTEPI16_EPI32_M64(x) \
+#define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
-# else
-# define OP_CVTEPI16_EPI32_M64(x) \
- (_mm_cvtepi16_epi32(*(__m128i *)(x)))
-# endif
#endif