summaryrefslogtreecommitdiff
path: root/simon.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-11-12 21:51:11 -0500
committerJeffrey Walton <noloader@gmail.com>2018-11-12 21:51:11 -0500
commit9550ccc9a32b664e76654e21f5023ea9134eac13 (patch)
treef596a6b43f4b72bf0b7877056e9de7b119de1144 /simon.cpp
parenta0608a6b8083a330b43747a31dfd31d27482a725 (diff)
downloadcryptopp-git-9550ccc9a32b664e76654e21f5023ea9134eac13.tar.gz
Port SIMON64 to Altivec
SIMON64 runs about 4x faster than C++ for POWER4 and friends. If POWER7 is available it goes back to full speed due to efficient unaligned loads
Diffstat (limited to 'simon.cpp')
-rw-r--r--simon.cpp38
1 files changed, 26 insertions, 12 deletions
diff --git a/simon.cpp b/simon.cpp
index 4c48efd2..0e777306 100644
--- a/simon.cpp
+++ b/simon.cpp
@@ -7,6 +7,14 @@
#include "misc.h"
#include "cpu.h"
+#ifndef CRYPTOPP_INLINE
+# if defined(CRYPTOPP_DEBUG)
+# define CRYPTOPP_INLINE static
+# else
+# define CRYPTOPP_INLINE inline
+# endif
+#endif
+
// Uncomment for benchmarking C++ against SSE or NEON.
// Do so in both simon.cpp and simon-simd.cpp.
// #undef CRYPTOPP_SSSE3_AVAILABLE
@@ -228,11 +236,11 @@ extern size_t SIMON128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, si
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
-#if (CRYPTOPP_POWER7_AVAILABLE)
-extern size_t SIMON64_Enc_AdvancedProcessBlocks_POWER7(const word32* subKeys, size_t rounds,
+#if (CRYPTOPP_ALTIVEC_AVAILABLE)
+extern size_t SIMON64_Enc_AdvancedProcessBlocks_ALTIVEC(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
-extern size_t SIMON64_Dec_AdvancedProcessBlocks_POWER7(const word32* subKeys, size_t rounds,
+extern size_t SIMON64_Dec_AdvancedProcessBlocks_ALTIVEC(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
@@ -255,9 +263,9 @@ std::string SIMON64::Base::AlgorithmProvider() const
if (HasNEON())
return "NEON";
# endif
-# if (CRYPTOPP_POWER7_AVAILABLE)
- if (HasPower7())
- return "Power7";
+# if (CRYPTOPP_ALTIVEC_AVAILABLE)
+ if (HasAltivec())
+ return "Altivec";
# endif
#endif
return "C++";
@@ -292,6 +300,12 @@ void SIMON64::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength,
default:
CRYPTOPP_ASSERT(0);;
}
+
+ // Altivec loads the current subkey as a 16-byte vector
+ // The extra elements ensure memory backs the last subkey.
+#if CRYPTOPP_ALTIVEC_AVAILABLE
+ m_rkeys.Grow(m_rkeys.size()+4);
+#endif
}
void SIMON64::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
@@ -463,9 +477,9 @@ size_t SIMON64::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xor
return SIMON64_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
-#if (CRYPTOPP_POWER7_AVAILABLE)
- if (HasPower7())
- return SIMON64_Enc_AdvancedProcessBlocks_POWER7(m_rkeys, (size_t)m_rounds,
+#if (CRYPTOPP_ALTIVEC_AVAILABLE)
+ if (HasAltivec())
+ return SIMON64_Enc_AdvancedProcessBlocks_ALTIVEC(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
@@ -484,9 +498,9 @@ size_t SIMON64::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xor
return SIMON64_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
-#if (CRYPTOPP_POWER7_AVAILABLE)
- if (HasPower7())
- return SIMON64_Dec_AdvancedProcessBlocks_POWER7(m_rkeys, (size_t)m_rounds,
+#if (CRYPTOPP_ALTIVEC_AVAILABLE)
+ if (HasAltivec())
+ return SIMON64_Dec_AdvancedProcessBlocks_ALTIVEC(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);