summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjtc <jtc@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>2004-10-15 04:13:11 +0000
committerjtc <jtc@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>2004-10-15 04:13:11 +0000
commit4412a56d167d4e7315f0f8e923ae73cb93cf7caf (patch)
treeff3b3439a8b2661b16478e3787bfc1c5f877e7a7
parentbf8bea74568a53e3588bf5260bb0056213cb925e (diff)
downloadATCD-4412a56d167d4e7315f0f8e923ae73cb93cf7caf.tar.gz
ChangeLogTag: Thu Oct 14 21:03:00 2004 J.T. Conklin <jtc@acorntoolworks.com>
-rw-r--r--ChangeLog8
-rw-r--r--ace/CDR_Base.cpp27
-rw-r--r--ace/CDR_Base.inl23
3 files changed, 44 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 9699b3d13bd..7089cc7694d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+Thu Oct 14 21:03:00 2004 J.T. Conklin <jtc@acorntoolworks.com>
+
+ * ace/CDR_Base.cpp:
+ * ace/CDR_Base.inl:
+
+ Added AMD64 optimized versions of ACE_CDR::swap_{2, 4, 8}, and
+ ACE_CDR::swap_{2, 4}_array.
+
Thu Oct 14 08:07:36 2004 Chad Elliott <elliott_c@ociweb.com>
* ace/Profile_Timer.inl:
diff --git a/ace/CDR_Base.cpp b/ace/CDR_Base.cpp
index a3447451c72..7f3c7d5c05f 100644
--- a/ace/CDR_Base.cpp
+++ b/ace/CDR_Base.cpp
@@ -21,9 +21,13 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n)
{
// ACE_ASSERT(n > 0); The caller checks that n > 0
+ // We pretend that AMD64/GNU G++ systems have a Pentium CPU to
+ // take advantage of the inline assembly implementation.
+
// Later, we try to read in 32 or 64 bit chunks,
// so make sure we don't do that for unaligned addresses.
-#if ACE_SIZEOF_LONG == 8
+#if ACE_SIZEOF_LONG == 8 && \
+ !(defined(__amd64__) && defined(__GNUG__))
const char* const o8 = ACE_ptr_align_binary (orig, 8);
while (orig < o8 && n > 0)
{
@@ -57,7 +61,8 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n)
const char* const end = orig + 2 * (n & (~3));
// See if we're aligned for writting in 64 or 32 bit chunks...
-#if ACE_SIZEOF_LONG == 8
+#if ACE_SIZEOF_LONG == 8 && \
+ !(defined(__amd64__) && defined(__GNUG__))
if (target == ACE_ptr_align_binary (target, 8))
#else
if (target == ACE_ptr_align_binary (target, 4))
@@ -65,7 +70,7 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n)
{
while (orig < end)
{
-#if defined (ACE_HAS_PENTIUM) && defined (__GNUG__)
+#if (defined (ACE_HAS_PENTIUM) || defined(__amd64__)) && defined (__GNUG__)
unsigned int a =
* reinterpret_cast<const unsigned int*> (orig);
unsigned int b =
@@ -126,7 +131,7 @@ ACE_CDR::swap_2_array (const char* orig, char* target, size_t n)
// We're out of luck. We have to write in 2 byte chunks.
while (orig < end)
{
-#if defined (ACE_HAS_PENTIUM) && defined (__GNUG__)
+#if (defined (ACE_HAS_PENTIUM) || defined(__amd64__)) && defined (__GNUG__)
unsigned int a =
* reinterpret_cast<const unsigned int*> (orig);
unsigned int b =
@@ -282,6 +287,12 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n)
register unsigned long b =
* reinterpret_cast<const long*> (orig + 8);
+#if defined(__amd64__) && defined(__GNUC__)
+ asm ("bswapq %1" : "=r" (a) : "0" (a));
+ asm ("bswapq %1" : "=r" (b) : "0" (b));
+ asm ("rol $32, %1" : "=r" (a) : "0" (a));
+ asm ("rol $32, %1" : "=r" (b) : "0" (b));
+#else
register unsigned long a84 = (a & 0x000000ff000000ffL) << 24;
register unsigned long b84 = (b & 0x000000ff000000ffL) << 24;
register unsigned long a73 = (a & 0x0000ff000000ff00L) << 8;
@@ -293,6 +304,7 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n)
a = (a84 | a73 | a62 | a51);
b = (b84 | b73 | b62 | b51);
+#endif
* reinterpret_cast<long*> (target) = a;
* reinterpret_cast<long*> (target + 8) = b;
@@ -311,6 +323,12 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n)
register unsigned long b =
* reinterpret_cast<const long*> (orig + 8);
+#if defined(__amd64__) && defined(__GNUC__)
+ asm ("bswapq %1" : "=r" (a) : "0" (a));
+ asm ("bswapq %1" : "=r" (b) : "0" (b));
+ asm ("rol $32, %1" : "=r" (a) : "0" (a));
+ asm ("rol $32, %1" : "=r" (b) : "0" (b));
+#else
register unsigned long a84 = (a & 0x000000ff000000ffL) << 24;
register unsigned long b84 = (b & 0x000000ff000000ffL) << 24;
register unsigned long a73 = (a & 0x0000ff000000ff00L) << 8;
@@ -322,6 +340,7 @@ ACE_CDR::swap_4_array (const char* orig, char* target, size_t n)
a = (a84 | a73 | a62 | a51);
b = (b84 | b73 | b62 | b51);
+#endif
ACE_UINT32 c1 = static_cast<ACE_UINT32> (a >> 32);
ACE_UINT32 c2 = static_cast<ACE_UINT32> (a & 0xffffffff);
diff --git a/ace/CDR_Base.inl b/ace/CDR_Base.inl
index 40ef426166c..6d6222ff40d 100644
--- a/ace/CDR_Base.inl
+++ b/ace/CDR_Base.inl
@@ -6,6 +6,9 @@
// The ACE_CDR::swap_X and ACE_CDR::swap_X_array routines are broken
// in 4 cases for optimization:
//
+// * AMD64 CPU + gnu g++
+// => gcc amd64 inline assembly.
+//
// * x86 Pentium CPU + gnu g++
// (ACE_HAS_PENTIUM && __GNUG__)
// => gcc x86 inline assembly.
@@ -47,24 +50,19 @@
ACE_INLINE void
ACE_CDR::swap_2 (const char *orig, char* target)
{
-#if defined(ACE_HAS_PENTIUM)
-# if defined(__GNUG__)
+#if (defined(ACE_HAS_PENTIUM) || defined (__amd64__)) && defined(__GNUG__)
unsigned short a =
*reinterpret_cast<const unsigned short*> (orig);
asm( "rolw $8, %0" : "=r" (a) : "0" (a) );
*reinterpret_cast<unsigned short*> (target) = a;
-# elif (defined(_MSC_VER) || defined(__BORLANDC__)) \
+#elif defined (ACE_HAS_PENTIUM) \
+ && (defined(_MSC_VER) || defined(__BORLANDC__)) \
&& !defined(ACE_LACKS_INLINE_ASSEMBLY)
__asm mov ebx, orig;
__asm mov ecx, target;
__asm mov ax, [ebx];
__asm rol ax, 8;
__asm mov [ecx], ax;
-# else
- // For CISC Platforms this is faster than shift/masks.
- target[1] = orig[0];
- target[0] = orig[1];
-# endif
#else
register ACE_UINT16 usrc = * reinterpret_cast<const ACE_UINT16*> (orig);
register ACE_UINT16* udst = reinterpret_cast<ACE_UINT16*> (target);
@@ -75,7 +73,7 @@ ACE_CDR::swap_2 (const char *orig, char* target)
ACE_INLINE void
ACE_CDR::swap_4 (const char* orig, char* target)
{
-#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__)
+#if (defined(ACE_HAS_PENTIUM) || defined (__amd64__)) && defined(__GNUG__)
// We have ACE_HAS_PENTIUM, so we know the sizeof's.
register unsigned int j =
*reinterpret_cast<const unsigned int*> (orig);
@@ -99,7 +97,12 @@ ACE_CDR::swap_4 (const char* orig, char* target)
ACE_INLINE void
ACE_CDR::swap_8 (const char* orig, char* target)
{
-#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__)
+#if defined(__amd64__) && defined(__GNUG__)
+ register unsigned long x =
+ * reinterpret_cast<const unsigned long*> (orig);
+ asm ("bswapq %1" : "=r" (x) : "0" (x));
+ *reinterpret_cast<unsigned long*> (target) = x;
+#elif defined(ACE_HAS_PENTIUM) && defined(__GNUG__)
register unsigned int i =
*reinterpret_cast<const unsigned int*> (orig);
register unsigned int j =