Cleared crash with GCC 4.8 and above and -O3. In a nutshell, it was due to vectorization and alignment violations agains the vmovdqa instruction

git-svn-id: svn://svn.code.sf.net/p/cryptopp/code/trunk/c5@583 57ff6487-cd31-0410-9ec3-f628ee90f5f0
author: noloader <noloader@57ff6487-cd31-0410-9ec3-f628ee90f5f0> 2015-07-14 02:57:13 +0000
committer: noloader <noloader@57ff6487-cd31-0410-9ec3-f628ee90f5f0> 2015-07-14 02:57:13 +0000
commit: 51dd45a78501d37d02fea9ffa504d2e609ad8672 (patch)
tree: 79997d63ec3bcfc521fbc6b058d6405c90884787
parent: 47e5a4d4ea89c7f236bfc55df01c052efc3059e1 (diff)
download: cryptopp-51dd45a78501d37d02fea9ffa504d2e609ad8672.tar.gz
6 files changed, 62 insertions, 9 deletions
diff --git a/GNUmakefile b/GNUmakefile
index ca36725..30284ec 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -1,6 +1,6 @@
 CXXFLAGS ?= -DNDEBUG
 SYMBOLS ?= -g2
-OPTIMIZE ?= -O2
+OPTIMIZE ?= -O3
 # -fPIC is supported, and enabled by default for x86_64.
 # the following options reduce code size, but breaks link or makes link very slow on some systems
 # CXXFLAGS += -ffunction-sections -fdata-sections
@@ -64,7 +64,6 @@ endif
 # We can do integer math using the Posix shell in a GNUmakefile
 # Below, we are building a boolean circuit that says "Darwin && (GCC 4.2 || Clang)"
 MULTIARCH_SUPPORT = $(shell echo $$(($(IS_DARWIN) * ($(GCC42_OR_LATER) + $(CLANG_COMPILER)))))
-
 ifneq ($(MULTIARCH_SUPPORT),0)
 CXXFLAGS += -arch x86_64 -arch i386
 else
diff --git a/config.h b/config.h
index 7472ca0..d3bd692 100644
--- a/config.h
+++ b/config.h
@@ -23,6 +23,11 @@
 // This macro will be ignored if NO_OS_DEPENDENCE is defined.
 #define USE_MS_CRYPTOAPI
 
+// Define this to ensure C/C++ standard compliance and adherence
+// to aliasing rules and other alignment fodder. If you experience
+// a break at -O3 with GCC, you should try this first.
+// #define CRYPTOPP_NO_UNALIGNED_DATA_ACCESS
+
 // ***************** Less Important Settings ***************
 
 // define this to retain (as much as possible) old deprecated function and class names
@@ -342,7 +347,8 @@ NAMESPACE_END
 	#define CRYPTOPP_BOOL_X86 0
 #endif
 
-#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || defined(__powerpc__)
+// CRYPTOPP_NO_UNALIGNED_DATA_ACCESS can be set on the command line or in config.h above.
+#if !defined(CRYPTOPP_NO_UNALIGNED_DATA_ACCESS) && (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || defined(__powerpc__))
 	#define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
 #endif
 
diff --git a/misc.cpp b/misc.cpp
index 3c2c2a5..e03e6c8 100644
--- a/misc.cpp
+++ b/misc.cpp
@@ -14,14 +14,20 @@
 
 NAMESPACE_BEGIN(CryptoPP)
 
+// Vectorization at -O3 requires IsStrictAligned<word64> for GCC 4.8 and above with xorbuf and VerifyBufsEqual.
+// Problems have not been experienced for the word32 variant, but it may aoccur in the future.
+
 void xorbuf(byte *buf, const byte *mask, size_t count)
 {
 	size_t i;
 
 	if (IsAligned<word32>(buf) && IsAligned<word32>(mask))
 	{
-		if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(buf) && IsAligned<word64>(mask))
+		if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsStrictAligned<word64>(buf) && IsStrictAligned<word64>(mask))
 		{
+			assert(IsAlignedOn(input, GetStrictAlignedOn<word64>(buf)));
+			assert(IsAlignedOn(mask, GetStrictAlignedOn<word64>(mask)));
+
 			for (i=0; i<count/8; i++)
 				((word64*)buf)[i] ^= ((word64*)mask)[i];
 			count -= 8*i;
@@ -50,8 +56,12 @@ void xorbuf(byte *output, const byte *input, const byte *mask, size_t count)
 
 	if (IsAligned<word32>(output) && IsAligned<word32>(input) && IsAligned<word32>(mask))
 	{
-		if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(output) && IsAligned<word64>(input) && IsAligned<word64>(mask))
+		if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsStrictAligned<word64>(output) && IsStrictAligned<word64>(input) && IsStrictAligned<word64>(mask))
 		{
+			assert(IsAlignedOn(output, GetStrictAlignedOn<word64>(output)));
+			assert(IsAlignedOn(input, GetStrictAlignedOn<word64>(input)));
+			assert(IsAlignedOn(mask, GetStrictAlignedOn<word64>(mask)));
+
 			for (i=0; i<count/8; i++)
 				((word64*)output)[i] = ((word64*)input)[i] ^ ((word64*)mask)[i];
 			count -= 8*i;
@@ -84,8 +94,11 @@ bool VerifyBufsEqual(const byte *buf, const byte *mask, size_t count)
 	if (IsAligned<word32>(buf) && IsAligned<word32>(mask))
 	{
 		word32 acc32 = 0;
-		if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsAligned<word64>(buf) && IsAligned<word64>(mask))
+		if (!CRYPTOPP_BOOL_SLOW_WORD64 && IsStrictAligned<word64>(buf) && IsStrictAligned<word64>(mask))
 		{
+			assert(IsAlignedOn(buf, GetStrictAlignedOn<word64>(buf)));
+			assert(IsAlignedOn(mask, GetStrictAlignedOn<word64>(mask)));
+
 			word64 acc64 = 0;
 			for (i=0; i<count/8; i++)
 				acc64 |= ((word64*)buf)[i] ^ ((word64*)mask)[i];
diff --git a/misc.h b/misc.h
index 357da07..15d34e5 100644
--- a/misc.h
+++ b/misc.h
@@ -382,21 +382,26 @@ inline T1 RoundDownToMultipleOf(const T1 &n, const T2 &m)
 template <class T1, class T2>
 inline T1 RoundUpToMultipleOf(const T1 &n, const T2 &m)
 {
+	// TODO: undefined behavior here...
 	if (n+m-1 < n)
 		throw InvalidArgument("RoundUpToMultipleOf: integer overflow");
 	return RoundDownToMultipleOf(n+m-1, m);
 }
 
+// Influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; may cause
+//   problems at -O3 and GCC vectorization.
 template <class T>
 inline unsigned int GetAlignmentOf(T *dummy=NULL)	// VC60 workaround
 {
 #ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
 	if (sizeof(T) < 16)
-		return 1;
+    return 1;
 #endif
-
+    
 #if (_MSC_VER >= 1300)
 	return __alignof(T);
+#elif defined(__clang__)
+	return __alignof(T);
 #elif defined(__GNUC__)
 	return __alignof__(T);
 #elif CRYPTOPP_BOOL_SLOW_WORD64
@@ -406,17 +411,43 @@ inline unsigned int GetAlignmentOf(T *dummy=NULL)	// VC60 workaround
 #endif
 }
 
+// Not influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; will not
+//   cause problems with -O3 and GCC vectorization.
+template <class T>
+inline unsigned int GetStrictAlignmentOf(T *dummy=NULL)	// VC60 workaround
+{
+#if (_MSC_VER >= 1300)
+	return __alignof(T);
+#elif defined(__clang__)
+	return __alignof(T);
+#elif defined(__GNUC__)
+	return __alignof__(T);
+#else
+	return sizeof(T);
+#endif
+}
+
 inline bool IsAlignedOn(const void *p, unsigned int alignment)
 {
 	return alignment==1 || (IsPowerOf2(alignment) ? ModPowerOf2((size_t)p, alignment) == 0 : (size_t)p % alignment == 0);
 }
 
+// Influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; may cause
+//   problems at -O3 and GCC vectorization.
 template <class T>
 inline bool IsAligned(const void *p, T *dummy=NULL)	// VC60 workaround
 {
 	return IsAlignedOn(p, GetAlignmentOf<T>());
 }
 
+// Not influenced by CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS; will not
+//   cause problems with -O3 and GCC vectorization.
+template <class T>
+inline bool IsStrictAligned(const void *p, T *dummy=NULL)	// VC60 workaround
+{
+	return IsAlignedOn(p, GetStrictAlignmentOf<T>());
+}
+
 #ifdef IS_LITTLE_ENDIAN
 	typedef LittleEndian NativeByteOrder;
 #else
diff --git a/sha3.h b/sha3.h
index 232bae5..15cefc9 100644
--- a/sha3.h
+++ b/sha3.h
@@ -15,7 +15,7 @@ public:
 	SHA3(unsigned int digestSize) : m_digestSize(digestSize) {Restart();}
 	unsigned int DigestSize() const {return m_digestSize;}
 	std::string AlgorithmName() const {return "SHA-3-" + IntToString(m_digestSize*8);}
-	unsigned int OptimalDataAlignment() const {return GetAlignmentOf<word64>();}
+	unsigned int OptimalDataAlignment() const {return GetStrictAlignmentOf<word64>();}
 
 	void Update(const byte *input, size_t length);
 	void Restart();
diff --git a/stdcpp.h b/stdcpp.h
index 6511c4f..619cc03 100644
--- a/stdcpp.h
+++ b/stdcpp.h
@@ -19,6 +19,10 @@
 #include <map>
 #include <vector>
 
+#if !defined(_NDEBUG) && !defined(NDEBUG)
+# include <cassert>
+#endif
+
 #ifdef CRYPTOPP_INCLUDE_VECTOR_CC
 // workaround needed on Sun Studio 12u1 Sun C++ 5.10 SunOS_i386 128229-02 2009/09/21
 #include <vector.cc>
author	noloader <noloader@57ff6487-cd31-0410-9ec3-f628ee90f5f0>	2015-07-14 02:57:13 +0000
committer	noloader <noloader@57ff6487-cd31-0410-9ec3-f628ee90f5f0>	2015-07-14 02:57:13 +0000
commit	51dd45a78501d37d02fea9ffa504d2e609ad8672 (patch)
tree	79997d63ec3bcfc521fbc6b058d6405c90884787
parent	47e5a4d4ea89c7f236bfc55df01c052efc3059e1 (diff)
download	cryptopp-51dd45a78501d37d02fea9ffa504d2e609ad8672.tar.gz