diff options
author | Erik de Castro Lopo <erikd@mega-nerd.com> | 2013-09-25 23:05:13 +1000 |
---|---|---|
committer | Erik de Castro Lopo <erikd@mega-nerd.com> | 2013-09-25 23:05:17 +1000 |
commit | 8fe2c23e316e8297ccc60e4cff4b7df45b148f37 (patch) | |
tree | 8a245701deb5defd3df7d098c22e687472f91408 /src | |
parent | 99f5a57d2aa8a04e4c7f8b96ebaa6f166b30b7f6 (diff) | |
download | flac-8fe2c23e316e8297ccc60e4cff4b7df45b148f37.tar.gz |
Add SSE4.1/SSE4.2 detection.
Patch-from: lvqcl <lvqcl.mail@gmail.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/libFLAC/cpu.c | 64 | ||||
-rw-r--r-- | src/libFLAC/include/private/cpu.h | 4 |
2 files changed, 40 insertions, 28 deletions
diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index 493e1398..dce1b2f1 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -96,6 +96,8 @@ static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000; /* these are flags in ECX of CPUID AX=00000001 */ static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200; +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41 = 0x00080000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42 = 0x00100000; /* these are flags in EDX of CPUID AX=80000001 */ static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000; static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000; @@ -171,6 +173,8 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) info->data.ia32.sse2 = false; info->data.ia32.sse3 = false; info->data.ia32.ssse3 = false; + info->data.ia32.sse41 = false; + info->data.ia32.sse42 = false; info->data.ia32._3dnow = false; info->data.ia32.ext3dnow = false; info->data.ia32.extmmx = false; @@ -185,6 +189,8 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) info->data.ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false; info->data.ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; info->data.ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; + info->data.ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; + info->data.ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; #ifdef FLAC__USE_3DNOW flags_edx = FLAC__cpu_info_extended_amd_asm_ia32(); @@ -206,6 +212,8 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) fprintf(stderr, " SSE2 ....... %c\n", info->data.ia32.sse2 ? 'Y' : 'n'); fprintf(stderr, " SSE3 ....... %c\n", info->data.ia32.sse3 ? 'Y' : 'n'); fprintf(stderr, " SSSE3 ...... %c\n", info->data.ia32.ssse3 ? 'Y' : 'n'); + fprintf(stderr, " SSE41 ...... %c\n", info->data.ia32.sse41 ? 'Y' : 'n'); + fprintf(stderr, " SSE42 ...... %c\n", info->data.ia32.sse42 ? 'Y' : 'n'); fprintf(stderr, " 3DNow! ..... %c\n", info->data.ia32._3dnow ? 'Y' : 'n'); fprintf(stderr, " 3DNow!-ext . %c\n", info->data.ia32.ext3dnow? 'Y' : 'n'); fprintf(stderr, " 3DNow!-MMX . %c\n", info->data.ia32.extmmx ? 'Y' : 'n'); @@ -217,7 +225,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) if(info->data.ia32.fxsr || info->data.ia32.sse || info->data.ia32.sse2) { #if defined FLAC__NO_SSE_OS /* assume user knows better than us; turn it off */ - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; #elif defined FLAC__SSE_OS /* assume user knows better than us; leave as detected above */ #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__) @@ -227,21 +235,21 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse); len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse" , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */ if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; #elif defined(__NetBSD__) || defined (__OpenBSD__) # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__) int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE }; size_t len = sizeof(val); if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; else { /* double-check SSE2 */ mib[1] = CPU_SSE2; len = sizeof(val); if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) - info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; } # else - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; # endif #elif defined(__linux__) int sse = 0; @@ -280,7 +288,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) } if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; #elif defined(_MSC_VER) # ifdef USE_TRY_CATCH_FLAVOR __try { @@ -290,7 +298,7 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) } __except(EXCEPTION_EXECUTE_HANDLER) { if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; } # else int sse = 0; @@ -314,11 +322,11 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) } SetUnhandledExceptionFilter(save); if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; # endif #else /* no way to test, disable to be safe */ - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = info->data.ia32.sse41 = info->data.ia32.sse42 = false; #endif #ifdef DEBUG fprintf(stderr, " SSE OS sup . %c\n", info->data.ia32.sse ? 'Y' : 'n'); @@ -337,19 +345,21 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) info->type = FLAC__CPUINFO_TYPE_X86_64; #if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN info->use_asm = true; - info->data.x86_64.sse3 = false; - info->data.x86_64.ssse3 = false; { /* http://www.sandpile.org/x86/cpuid.htm */ FLAC__uint32 flags_edx, flags_ecx; FLAC__cpu_info_x86(&flags_edx, &flags_ecx); info->data.x86_64.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; info->data.x86_64.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; + info->data.x86_64.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; + info->data.x86_64.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; } #ifdef DEBUG fprintf(stderr, "CPU info (x86-64):\n"); fprintf(stderr, " SSE3 ....... %c\n", info->data.x86_64.sse3 ? 'Y' : 'n'); fprintf(stderr, " SSSE3 ...... %c\n", info->data.x86_64.ssse3 ? 'Y' : 'n'); + fprintf(stderr, " SSE41 ...... %c\n", info->data.x86_64.sse41 ? 'Y' : 'n'); + fprintf(stderr, " SSE42 ...... %c\n", info->data.x86_64.sse42 ? 'Y' : 'n'); #endif #else @@ -432,28 +442,26 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) #if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN -#if defined _MSC_VER && (_MSC_VER >= 1400) +#if defined _MSC_VER #include <intrin.h> /* for __cpuid() */ +#elif defined __GNUC__ +#include <cpuid.h> /* for __get_cpuid() */ #endif void FLAC__cpu_info_x86(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx) { -#if defined _MSC_VER && (_MSC_VER >= 1400) - int cpuinfo[4]; - __cpuid(cpuinfo, 1); - *flags_ecx = cpuinfo[2]; - *flags_edx = cpuinfo[3]; -#elif defined __GNUC__ && __GNUC__ - FLAC__uint32 info = 1, flags_eax, flags_ebx; - __asm__ __volatile__ ( - "xchg %%ebx, %%edi;" - "cpuid;" - "xchg %%edi, %%ebx;" - :"=a" (flags_eax), "=D" (flags_ebx), "=c" (*flags_ecx), "=d" (*flags_edx) - :"a" (info) - ); -#else +#if defined _MSC_VER + int cpuinfo[4]; + __cpuid(cpuinfo, 1); + *flags_ecx = cpuinfo[2]; + *flags_edx = cpuinfo[3]; +#elif defined __GNUC__ + FLAC__uint32 flags_eax, flags_ebx; + if (0 == __get_cpuid(1, &flags_eax, &flags_ebx, flags_ecx, flags_edx)) *flags_ecx = *flags_edx = 0; +#else + *flags_ecx = *flags_edx = 0; #endif } -#endif /* (FLAC__CPU_IA32 || FLAC__HAS_X86INTRIN) && FLAC__CPU_X86_64 */ + +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index 4bfe1ae3..4d264367 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -56,6 +56,8 @@ typedef struct { FLAC__bool sse2; FLAC__bool sse3; FLAC__bool ssse3; + FLAC__bool sse41; + FLAC__bool sse42; FLAC__bool _3dnow; FLAC__bool ext3dnow; FLAC__bool extmmx; @@ -64,6 +66,8 @@ typedef struct { typedef struct { FLAC__bool sse3; FLAC__bool ssse3; + FLAC__bool sse41; + FLAC__bool sse42; } FLAC__CPUInfo_x86_64; typedef struct { |