From 4412a56d167d4e7315f0f8e923ae73cb93cf7caf Mon Sep 17 00:00:00 2001 From: jtc Date: Fri, 15 Oct 2004 04:13:11 +0000 Subject: ChangeLogTag: Thu Oct 14 21:03:00 2004 J.T. Conklin --- ChangeLog | 8 ++++++++ ace/CDR_Base.cpp | 27 +++++++++++++++++++++++---- ace/CDR_Base.inl | 23 +++++++++++++---------- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9699b3d13bd..7089cc7694d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Thu Oct 14 21:03:00 2004 J.T. Conklin + + * ace/CDR_Base.cpp: + * ace/CDR_Base.inl: + + Added AMD64 optimized versions of ACE_CDR::swap_{2, 4, 8}, and + ACE_CDR::swap_{2, 4}_array. + Thu Oct 14 08:07:36 2004 Chad Elliott * ace/Profile_Timer.inl: diff --git a/ace/CDR_Base.cpp b/ace/CDR_Base.cpp index a3447451c72..7f3c7d5c05f 100644 --- a/ace/CDR_Base.cpp +++ b/ace/CDR_Base.cpp @@ -21,9 +21,13 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n) { // ACE_ASSERT(n > 0); The caller checks that n > 0 + // We pretend that AMD64/GNU G++ systems have a Pentium CPU to + // take advantage of the inline assembly implementation. + // Later, we try to read in 32 or 64 bit chunks, // so make sure we don't do that for unaligned addresses. -#if ACE_SIZEOF_LONG == 8 +#if ACE_SIZEOF_LONG == 8 && \ + !(defined(__amd64__) && defined(__GNUG__)) const char* const o8 = ACE_ptr_align_binary (orig, 8); while (orig < o8 && n > 0) { @@ -57,7 +61,8 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n) const char* const end = orig + 2 * (n & (~3)); // See if we're aligned for writting in 64 or 32 bit chunks... -#if ACE_SIZEOF_LONG == 8 +#if ACE_SIZEOF_LONG == 8 && \ + !(defined(__amd64__) && defined(__GNUG__)) if (target == ACE_ptr_align_binary (target, 8)) #else if (target == ACE_ptr_align_binary (target, 4)) @@ -65,7 +70,7 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n) { while (orig < end) { -#if defined (ACE_HAS_PENTIUM) && defined (__GNUG__) +#if (defined (ACE_HAS_PENTIUM) || defined(__amd64__)) && defined (__GNUG__) unsigned int a = * reinterpret_cast (orig); unsigned int b = @@ -126,7 +131,7 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n) // We're out of luck. We have to write in 2 byte chunks. while (orig < end) { -#if defined (ACE_HAS_PENTIUM) && defined (__GNUG__) +#if (defined (ACE_HAS_PENTIUM) || defined(__amd64__)) && defined (__GNUG__) unsigned int a = * reinterpret_cast (orig); unsigned int b = @@ -282,6 +287,12 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n) register unsigned long b = * reinterpret_cast (orig + 8); +#if defined(__amd64__) && defined(__GNUC__) + asm ("bswapq %1" : "=r" (a) : "0" (a)); + asm ("bswapq %1" : "=r" (b) : "0" (b)); + asm ("rol $32, %1" : "=r" (a) : "0" (a)); + asm ("rol $32, %1" : "=r" (b) : "0" (b)); +#else register unsigned long a84 = (a & 0x000000ff000000ffL) << 24; register unsigned long b84 = (b & 0x000000ff000000ffL) << 24; register unsigned long a73 = (a & 0x0000ff000000ff00L) << 8; @@ -293,6 +304,7 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n) a = (a84 | a73 | a62 | a51); b = (b84 | b73 | b62 | b51); +#endif * reinterpret_cast (target) = a; * reinterpret_cast (target + 8) = b; @@ -311,6 +323,12 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n) register unsigned long b = * reinterpret_cast (orig + 8); +#if defined(__amd64__) && defined(__GNUC__) + asm ("bswapq %1" : "=r" (a) : "0" (a)); + asm ("bswapq %1" : "=r" (b) : "0" (b)); + asm ("rol $32, %1" : "=r" (a) : "0" (a)); + asm ("rol $32, %1" : "=r" (b) : "0" (b)); +#else register unsigned long a84 = (a & 0x000000ff000000ffL) << 24; register unsigned long b84 = (b & 0x000000ff000000ffL) << 24; register unsigned long a73 = (a & 0x0000ff000000ff00L) << 8; @@ -322,6 +340,7 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n) a = (a84 | a73 | a62 | a51); b = (b84 | b73 | b62 | b51); +#endif ACE_UINT32 c1 = static_cast (a >> 32); ACE_UINT32 c2 = static_cast (a & 0xffffffff); diff --git a/ace/CDR_Base.inl b/ace/CDR_Base.inl index 40ef426166c..6d6222ff40d 100644 --- a/ace/CDR_Base.inl +++ b/ace/CDR_Base.inl @@ -6,6 +6,9 @@ // The ACE_CDR::swap_X and ACE_CDR::swap_X_array routines are broken // in 4 cases for optimization: // +// * AMD64 CPU + gnu g++ +// => gcc amd64 inline assembly. +// // * x86 Pentium CPU + gnu g++ // (ACE_HAS_PENTIUM && __GNUG__) // => gcc x86 inline assembly. @@ -47,24 +50,19 @@ ACE_INLINE void ACE_CDR::swap_2 (const char *orig, char* target) { -#if defined(ACE_HAS_PENTIUM) -# if defined(__GNUG__) +#if (defined(ACE_HAS_PENTIUM) || defined (__amd64__)) && defined(__GNUG__) unsigned short a = *reinterpret_cast (orig); asm( "rolw $8, %0" : "=r" (a) : "0" (a) ); *reinterpret_cast (target) = a; -# elif (defined(_MSC_VER) || defined(__BORLANDC__)) \ +#elif defined (ACE_HAS_PENTIUM) \ + && (defined(_MSC_VER) || defined(__BORLANDC__)) \ && !defined(ACE_LACKS_INLINE_ASSEMBLY) __asm mov ebx, orig; __asm mov ecx, target; __asm mov ax, [ebx]; __asm rol ax, 8; __asm mov [ecx], ax; -# else - // For CISC Platforms this is faster than shift/masks. - target[1] = orig[0]; - target[0] = orig[1]; -# endif #else register ACE_UINT16 usrc = * reinterpret_cast (orig); register ACE_UINT16* udst = reinterpret_cast (target); @@ -75,7 +73,7 @@ ACE_CDR::swap_2 (const char *orig, char* target) ACE_INLINE void ACE_CDR::swap_4 (const char* orig, char* target) { -#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__) +#if (defined(ACE_HAS_PENTIUM) || defined (__amd64__)) && defined(__GNUG__) // We have ACE_HAS_PENTIUM, so we know the sizeof's. register unsigned int j = *reinterpret_cast (orig); @@ -99,7 +97,12 @@ ACE_CDR::swap_4 (const char* orig, char* target) ACE_INLINE void ACE_CDR::swap_8 (const char* orig, char* target) { -#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__) +#if defined(__amd64__) && defined(__GNUG__) + register unsigned long x = + * reinterpret_cast (orig); + asm ("bswapq %1" : "=r" (x) : "0" (x)); + *reinterpret_cast (target) = x; +#elif defined(ACE_HAS_PENTIUM) && defined(__GNUG__) register unsigned int i = *reinterpret_cast (orig); register unsigned int j = -- cgit v1.2.1