diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2016-03-15 10:10:12 -0700 |
---|---|---|
committer | Jani Heikkinen <jani.heikkinen@qt.io> | 2016-05-25 15:46:17 +0000 |
commit | 540978288ea0f6ed0b166bb9207f427a4c825ab6 (patch) | |
tree | 91209cfb26a1f6def5fb62271df68ee64d80e379 | |
parent | 421aa422af2f6b147ad076ad1736b3747abc4317 (diff) | |
download | qtbase-5.6.1.tar.gz |
Move the Q_ALWAYS_INLINE and forcing of __builtin_memcpy to the existing
functions.
Change-Id: Icaa7fb2a490246bda156ffff143c137e520eea79
Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
-rw-r--r-- | src/corelib/global/qendian.h | 30 | ||||
-rw-r--r-- | src/corelib/global/qendian.qdoc | 23 | ||||
-rw-r--r-- | src/corelib/json/qjson_p.h | 2 | ||||
-rw-r--r-- | src/corelib/mimetypes/qmimemagicrule.cpp | 3 | ||||
-rw-r--r-- | src/corelib/tools/qbitarray.cpp | 26 | ||||
-rw-r--r-- | src/corelib/tools/qhash.cpp | 9 | ||||
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 22 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 37 | ||||
-rw-r--r-- | src/corelib/tools/qstring.cpp | 2 |
9 files changed, 55 insertions, 99 deletions
diff --git a/src/corelib/global/qendian.h b/src/corelib/global/qendian.h index 2ddefaec8b..23dda270e3 100644 --- a/src/corelib/global/qendian.h +++ b/src/corelib/global/qendian.h @@ -42,6 +42,11 @@ QT_BEGIN_NAMESPACE +#ifdef __has_builtin +# define QT_HAS_BUILTIN(x) __has_builtin(x) +#else +# define QT_HAS_BUILTIN(x) 0 +#endif /* * ENDIAN FUNCTIONS @@ -64,18 +69,29 @@ template <typename T> inline void qbswap(const T src, uchar *dest) // Used to implement a type-safe and alignment-safe copy operation // If you want to avoid the memcpy, you must write specializations for these functions -template <typename T> inline void qToUnaligned(const T src, uchar *dest) +template <typename T> Q_ALWAYS_INLINE void qToUnaligned(const T src, uchar *dest) { // Using sizeof(T) inside memcpy function produces internal compiler error with // MSVC2008/ARM in tst_endian -> use extra indirection to resolve size of T. const size_t size = sizeof(T); - memcpy(dest, &src, size); +#if QT_HAS_BUILTIN(__builtin_memcpy) + __builtin_memcpy +#else + memcpy +#endif + (dest, &src, size); } -template <typename T> inline T qFromUnaligned(const uchar *src) + +template <typename T> Q_ALWAYS_INLINE T qFromUnaligned(const uchar *src) { T dest; const size_t size = sizeof(T); - memcpy(&dest, src, size); +#if QT_HAS_BUILTIN(__builtin_memcpy) + __builtin_memcpy +#else + memcpy +#endif + (&dest, src, size); return dest; } @@ -87,12 +103,6 @@ template <typename T> inline T qFromUnaligned(const uchar *src) */ template <typename T> T qbswap(T source); -#ifdef __has_builtin -# define QT_HAS_BUILTIN(x) __has_builtin(x) -#else -# define QT_HAS_BUILTIN(x) 0 -#endif - // GCC 4.3 implemented all the intrinsics, but the 16-bit one only got implemented in 4.8; // Clang 2.6 implemented the 32- and 64-bit but waited until 3.2 to implement the 16-bit one #if (defined(Q_CC_GNU) && Q_CC_GNU >= 403) || QT_HAS_BUILTIN(__builtin_bswap32) diff --git a/src/corelib/global/qendian.qdoc b/src/corelib/global/qendian.qdoc index e110461f8b..b7494c9a21 100644 --- a/src/corelib/global/qendian.qdoc +++ b/src/corelib/global/qendian.qdoc @@ -34,6 +34,29 @@ */ /*! + \internal + \fn T qFromUnaligned(const uchar *ptr) + \since 5.5 + + Loads a \c{T} from address \a ptr, which may be misaligned. + + Use of this function avoids the undefined behavior that the C++ standard + otherwise attributes to unaligned loads. +*/ + +/*! + \internal + \fn void qToUnaligned(T t, uchar *ptr) + \since 4.5 + + Stores \a t to address \a ptr, which may be misaligned. + + Use of this function avoids the undefined behavior that the C++ standard + otherwise attributes to unaligned stores. +*/ + + +/*! \fn T qFromBigEndian(const uchar *src) \since 4.3 \relates <QtEndian> diff --git a/src/corelib/json/qjson_p.h b/src/corelib/json/qjson_p.h index 59d0c91785..c52a37ba2b 100644 --- a/src/corelib/json/qjson_p.h +++ b/src/corelib/json/qjson_p.h @@ -402,7 +402,7 @@ public: // pack with itself, we'll discard the high part anyway chunk = _mm_packus_epi16(chunk, chunk); // unaligned 64-bit store - qUnalignedStore(l + i, _mm_cvtsi128_si64(chunk)); + qToUnaligned(_mm_cvtsi128_si64(chunk), l + i); i += 8; } # endif diff --git a/src/corelib/mimetypes/qmimemagicrule.cpp b/src/corelib/mimetypes/qmimemagicrule.cpp index 44834420fe..398a670544 100644 --- a/src/corelib/mimetypes/qmimemagicrule.cpp +++ b/src/corelib/mimetypes/qmimemagicrule.cpp @@ -42,7 +42,6 @@ #include <QtCore/QList> #include <QtCore/QDebug> #include <qendian.h> -#include <private/qsimd_p.h> // for qUnalignedLoad QT_BEGIN_NAMESPACE @@ -177,7 +176,7 @@ static bool matchNumber(const QMimeMagicRulePrivate *d, const QByteArray &data) const char *p = data.constData() + d->startPos; const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1); for ( ; p <= e; ++p) { - if ((qUnalignedLoad<T>(p) & mask) == (value & mask)) + if ((qFromUnaligned<T>(reinterpret_cast<const uchar *>(p)) & mask) == (value & mask)) return true; } diff --git a/src/corelib/tools/qbitarray.cpp b/src/corelib/tools/qbitarray.cpp index a64edea77e..8e6b1203f8 100644 --- a/src/corelib/tools/qbitarray.cpp +++ b/src/corelib/tools/qbitarray.cpp @@ -35,6 +35,7 @@ #include <qalgorithms.h> #include <qdatastream.h> #include <qdebug.h> +#include <qendian.h> #include <string.h> QT_BEGIN_NAMESPACE @@ -162,25 +163,6 @@ QBitArray::QBitArray(int size, bool value) Same as size(). */ -template <typename T> T qUnalignedLoad(const uchar *ptr) -{ - /* - * Testing with different compilers shows that they all optimize the memcpy - * call away and replace with direct loads whenever possible. On x86 and PPC, - * GCC does direct unaligned loads; on MIPS, it generates a pair of load-left - * and load-right instructions. ICC and Clang do the same on x86. This is both - * 32- and 64-bit. - * - * On ARM cores without unaligned loads, the compiler leaves a call to - * memcpy. - */ - - T u; - memcpy(&u, ptr, sizeof(u)); - return u; -} - - /*! If \a on is true, this function returns the number of 1-bits stored in the bit array; otherwise the number @@ -196,17 +178,17 @@ int QBitArray::count(bool on) const const quint8 *const end = reinterpret_cast<const quint8 *>(d.end()); while (bits + 7 <= end) { - quint64 v = qUnalignedLoad<quint64>(bits); + quint64 v = qFromUnaligned<quint64>(bits); bits += 8; numBits += int(qPopulationCount(v)); } if (bits + 3 <= end) { - quint32 v = qUnalignedLoad<quint32>(bits); + quint32 v = qFromUnaligned<quint32>(bits); bits += 4; numBits += int(qPopulationCount(v)); } if (bits + 1 < end) { - quint16 v = qUnalignedLoad<quint16>(bits); + quint16 v = qFromUnaligned<quint16>(bits); bits += 2; numBits += int(qPopulationCount(v)); } diff --git a/src/corelib/tools/qhash.cpp b/src/corelib/tools/qhash.cpp index d40570d347..c5669babd9 100644 --- a/src/corelib/tools/qhash.cpp +++ b/src/corelib/tools/qhash.cpp @@ -51,6 +51,7 @@ #include <qbytearray.h> #include <qdatetime.h> #include <qbasicatomic.h> +#include <qendian.h> #include <private/qsimd_p.h> #ifndef QT_BOOTSTRAPPED @@ -105,24 +106,24 @@ static uint crc32(const Char *ptr, size_t len, uint h) p += 8; for ( ; p <= e; p += 8) - h2 = _mm_crc32_u64(h2, qUnalignedLoad<qlonglong>(p - 8)); + h2 = _mm_crc32_u64(h2, qFromUnaligned<qlonglong>(p - 8)); h = h2; p -= 8; len = e - p; if (len & 4) { - h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p)); + h = _mm_crc32_u32(h, qFromUnaligned<uint>(p)); p += 4; } # else p += 4; for ( ; p <= e; p += 4) - h = _mm_crc32_u32(h, qUnalignedLoad<uint>(p - 4)); + h = _mm_crc32_u32(h, qFromUnaligned<uint>(p - 4)); p -= 4; len = e - p; # endif if (len & 2) { - h = _mm_crc32_u16(h, qUnalignedLoad<ushort>(p)); + h = _mm_crc32_u16(h, qFromUnaligned<ushort>(p)); p += 2; } if (sizeof(Char) == 1 && len & 1) diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 5ca2ce4c6f..f07eb098f2 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -716,26 +716,4 @@ void qDumpCPUFeatures() puts(""); } -/*! - \internal - \fn T qUnalignedLoad(const void *ptr) - \since 5.6.1 - - Loads a \c{T} from address \a ptr, which may be misaligned. - - Use of this function avoid the undefined behavior that the C++ standard - otherwise attributes to unaligned loads. -*/ - -/*! - \internal - \fn void qUnalignedStore(void *ptr, T t) - \since 5.6.1 - - Stores \a t to address \a ptr, which may be misaligned. - - Use of this function avoid the undefined behavior that the C++ standard - otherwise attributes to unaligned stores. -*/ - QT_END_NAMESPACE diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index ca53908cf5..d689654b29 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -476,43 +476,6 @@ unsigned _bit_scan_forward(unsigned val) #define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) -// these defines are copied from qendian.h -// in Qt 5.7, they have been moved to qglobal.h -// drop them when merging this to 5.7 -#ifdef __has_builtin -# define QT_HAS_BUILTIN(x) __has_builtin(x) -#else -# define QT_HAS_BUILTIN(x) 0 -#endif - -template <typename T> -Q_ALWAYS_INLINE -T qUnalignedLoad(const void *ptr) Q_DECL_NOTHROW -{ - T result; -#if QT_HAS_BUILTIN(__builtin_memcpy) - __builtin_memcpy -#else - memcpy -#endif - /*memcpy*/(&result, ptr, sizeof result); - return result; -} - -template <typename T> -Q_ALWAYS_INLINE -void qUnalignedStore(void *ptr, T t) Q_DECL_NOTHROW -{ -#if QT_HAS_BUILTIN(__builtin_memcpy) - __builtin_memcpy -#else - memcpy -#endif - /*memcpy*/(ptr, &t, sizeof t); -} - -#undef QT_HAS_BUILTIN - QT_END_NAMESPACE #endif // QSIMD_P_H diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 6bbaf05fef..be1ca8ba95 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -577,7 +577,7 @@ static int ucstrncmp(const QChar *a, const uchar *c, int l) // we'll read uc[offset..offset+7] (16 bytes) and c[offset..offset+7] (8 bytes) if (uc + offset + 7 < e) { // same, but we're using an 8-byte load - __m128i chunk = _mm_cvtsi64_si128(qUnalignedLoad<long long>(c + offset)); + __m128i chunk = _mm_cvtsi64_si128(qFromUnaligned<long long>(c + offset)); __m128i secondHalf = _mm_unpacklo_epi8(chunk, nullmask); __m128i ucdata = _mm_loadu_si128((const __m128i*)(uc + offset)); |