summaryrefslogtreecommitdiff
path: root/ace/CDR_Base.inl
blob: 40ef426166cafb5c14d30217cf469b684c4dd5a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
// -*- C++ -*-
//
// $Id$

//
// The ACE_CDR::swap_X and ACE_CDR::swap_X_array routines are broken
// in 4 cases for optimization:
//
// * x86 Pentium CPU + gnu g++
//   (ACE_HAS_PENTIUM && __GNUG__)
//   => gcc x86 inline assembly.
//
// * x86 Pentium CPU and (_MSC_VER) or BORLAND C++)
//   (ACE_HAS_PENTIUM && ( _MSC_VER || __BORLANDC__ )
//   => MSC x86 inline assembly.
//
// * 64 bit architecture
//   (ACE_SIZEOF_LONG == 8)
//   => shift/masks using 64bit words.
//
// * default
//   (none of the above)
//   => shift/masks using 32bit words.
//
//
// Some things you could find useful to know if you intend to mess
// with this optimizations for swaps:
//
//      * MSVC++ don't assume register values are conserved between
//        statements. So you can clobber any register you want,
//        whenever you want (well not *anyone* really, see manual).
//        The MSVC++ optimizer will try to pick different registers
//        for the C++ statements sorrounding your asm block, and if
//        it's not possible will use the stack.
//
//      * If you clobber registers with asm statements in gcc, you
//        better do it in an asm-only function, or save/restore them
//        before/after in the stack. If not, sorrounding C statements
//        could end using the same registers and big-badda-bum (been
//        there, done that...). The big-badda-bum could happen *even
//        if you specify the clobbered register in your asm's*.
//        Even better, use gcc asm syntax for detecting the register
//        asigned to a certain variable so you don't have to clobber any
//        register directly.
//

ACE_INLINE void
ACE_CDR::swap_2 (const char *orig, char* target)
{
#if defined(ACE_HAS_PENTIUM)
# if defined(__GNUG__)
  unsigned short a =
    *reinterpret_cast<const unsigned short*> (orig);
  asm( "rolw $8, %0" : "=r" (a) : "0" (a) );
  *reinterpret_cast<unsigned short*> (target) = a;
# elif (defined(_MSC_VER) || defined(__BORLANDC__)) \
       && !defined(ACE_LACKS_INLINE_ASSEMBLY)
  __asm mov ebx, orig;
  __asm mov ecx, target;
  __asm mov ax, [ebx];
  __asm rol ax, 8;
  __asm mov [ecx], ax;
# else
  // For CISC Platforms this is faster than shift/masks.
  target[1] = orig[0];
  target[0] = orig[1];
# endif
#else
  register ACE_UINT16 usrc = * reinterpret_cast<const ACE_UINT16*> (orig);
  register ACE_UINT16* udst = reinterpret_cast<ACE_UINT16*> (target);
  *udst = (usrc << 8) | (usrc >> 8);
#endif /* ACE_HAS_PENTIUM */
}

ACE_INLINE void
ACE_CDR::swap_4 (const char* orig, char* target)
{
#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__)
  // We have ACE_HAS_PENTIUM, so we know the sizeof's.
  register unsigned int j =
    *reinterpret_cast<const unsigned int*> (orig);
  asm ("bswap %1" : "=r" (j) : "0" (j));
  *reinterpret_cast<unsigned int*> (target) = j;
#elif defined(ACE_HAS_PENTIUM) \
      && (defined(_MSC_VER) || defined(__BORLANDC__)) \
      && !defined(ACE_LACKS_INLINE_ASSEMBLY)
  __asm mov ebx, orig;
  __asm mov ecx, target;
  __asm mov eax, [ebx];
  __asm bswap eax;
  __asm mov [ecx], eax;
#else
  register ACE_UINT32 x = * reinterpret_cast<const ACE_UINT32*> (orig);
  x = (x << 24) | ((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | (x >> 24);
  * reinterpret_cast<ACE_UINT32*> (target) = x;
#endif
}

ACE_INLINE void
ACE_CDR::swap_8 (const char* orig, char* target)
{
#if defined(ACE_HAS_PENTIUM) && defined(__GNUG__)
  register unsigned int i =
    *reinterpret_cast<const unsigned int*> (orig);
  register unsigned int j =
    *reinterpret_cast<const unsigned int*> (orig + 4);
  asm ("bswap %1" : "=r" (i) : "0" (i));
  asm ("bswap %1" : "=r" (j) : "0" (j));
  *reinterpret_cast<unsigned int*> (target + 4) = i;
  *reinterpret_cast<unsigned int*> (target) = j;
#elif defined(ACE_HAS_PENTIUM) \
      && (defined(_MSC_VER) || defined(__BORLANDC__)) \
      && !defined(ACE_LACKS_INLINE_ASSEMBLY)
  __asm mov ecx, orig;
  __asm mov edx, target;
  __asm mov eax, [ecx];
  __asm mov ebx, 4[ecx];
  __asm bswap eax;
  __asm bswap ebx;
  __asm mov 4[edx], eax;
  __asm mov [edx], ebx;
#elif ACE_SIZEOF_LONG == 8
  // 64 bit architecture.
  register unsigned long x =
    * reinterpret_cast<const unsigned long*> (orig);
  register unsigned long x84 = (x & 0x000000ff000000ffUL) << 24;
  register unsigned long x73 = (x & 0x0000ff000000ff00UL) << 8;
  register unsigned long x62 = (x & 0x00ff000000ff0000UL) >> 8;
  register unsigned long x51 = (x & 0xff000000ff000000UL) >> 24;
  x = (x84 | x73 | x62 | x51);
  x = (x << 32) | (x >> 32);
  *reinterpret_cast<unsigned long*> (target) = x;
#else
  register ACE_UINT32 x =
    * reinterpret_cast<const ACE_UINT32*> (orig);
  register ACE_UINT32 y =
    * reinterpret_cast<const ACE_UINT32*> (orig + 4);
  x = (x << 24) | ((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | (x >> 24);
  y = (y << 24) | ((y & 0xff00) << 8) | ((y & 0xff0000) >> 8) | (y >> 24);
  * reinterpret_cast<ACE_UINT32*> (target) = y;
  * reinterpret_cast<ACE_UINT32*> (target + 4) = x;
#endif
}

ACE_INLINE void
ACE_CDR::swap_16 (const char* orig, char* target)
{
  swap_8 (orig + 8, target);
  swap_8 (orig, target + 8);
}

ACE_INLINE size_t
ACE_CDR::first_size (size_t minsize)
{
  if (minsize == 0)
    return ACE_CDR::DEFAULT_BUFSIZE;

  size_t newsize = ACE_CDR::DEFAULT_BUFSIZE;
  while (newsize < minsize)
    {
      if (newsize < ACE_CDR::EXP_GROWTH_MAX)
        {
          // We grow exponentially at the beginning, this is fast and
          // reduces the number of allocations.
          newsize *= 2;
        }
      else
        {
          // but continuing with exponential growth can result in over
          // allocations and easily yield an allocation failure.
          // So we grow linearly when the buffer is too big.
          newsize += ACE_CDR::LINEAR_GROWTH_CHUNK;
        }
    }
  return newsize;
}

ACE_INLINE size_t
ACE_CDR::next_size (size_t minsize)
{
  size_t newsize =
    ACE_CDR::first_size (minsize);

  if (newsize == minsize)
    {
      // If necessary increment the size
      if (newsize < ACE_CDR::EXP_GROWTH_MAX)
        newsize *= 2;
      else
        newsize += ACE_CDR::LINEAR_GROWTH_CHUNK;
    }

  return newsize;
}

// ****************************************************************