From 75ef7958df603ca6de29fa00e82615e0da017903 Mon Sep 17 00:00:00 2001 From: Martijn van Beurden Date: Sat, 15 Oct 2022 15:44:03 +0200 Subject: Remove all assembler --- Makefile.am | 3 +- config.cmake.h.in | 3 - configure.ac | 9 - src/libFLAC/CMakeLists.txt | 12 - src/libFLAC/Makefile.am | 17 +- src/libFLAC/cpu.c | 9 +- src/libFLAC/ia32/CMakeLists.txt | 17 - src/libFLAC/ia32/Makefile.am | 46 --- src/libFLAC/ia32/cpu_asm.nasm | 119 ------ src/libFLAC/ia32/fixed_asm.nasm | 309 --------------- src/libFLAC/ia32/lpc_asm.nasm | 727 ------------------------------------ src/libFLAC/ia32/nasm.h | 95 ----- src/libFLAC/include/private/fixed.h | 3 - src/libFLAC/include/private/lpc.h | 7 - src/libFLAC/stream_encoder.c | 14 - strip_non_asm_libtool_args.sh | 19 - 16 files changed, 6 insertions(+), 1403 deletions(-) delete mode 100644 src/libFLAC/ia32/CMakeLists.txt delete mode 100644 src/libFLAC/ia32/Makefile.am delete mode 100644 src/libFLAC/ia32/cpu_asm.nasm delete mode 100644 src/libFLAC/ia32/fixed_asm.nasm delete mode 100644 src/libFLAC/ia32/lpc_asm.nasm delete mode 100644 src/libFLAC/ia32/nasm.h delete mode 100755 strip_non_asm_libtool_args.sh diff --git a/Makefile.am b/Makefile.am index 7c924c2c..4484db92 100644 --- a/Makefile.am +++ b/Makefile.am @@ -60,7 +60,6 @@ EXTRA_DIST = \ autogen.sh \ config.rpath \ depcomp \ - ltmain.sh \ - strip_non_asm_libtool_args.sh + ltmain.sh CLEANFILES = *~ diff --git a/config.cmake.h.in b/config.cmake.h.in index 1f2387c2..10efc712 100644 --- a/config.cmake.h.in +++ b/config.cmake.h.in @@ -24,9 +24,6 @@ /* define if you have docbook-to-man or docbook2man */ #cmakedefine FLAC__HAS_DOCBOOK_TO_MAN -/* define if you are compiling for x86 and have the NASM assembler */ -#cmakedefine FLAC__HAS_NASM - /* define if you have the ogg library */ #cmakedefine01 OGG_FOUND #define FLAC__HAS_OGG OGG_FOUND diff --git a/configure.ac b/configure.ac index 179d0e1c..5117a0bf 100644 --- a/configure.ac +++ b/configure.ac @@ -471,14 +471,6 @@ AC_CHECK_LIB(rt, clock_gettime, AH_TEMPLATE(HAVE_CLOCK_GETTIME, [define if you have clock_gettime])) AC_SUBST(LIB_CLOCK_GETTIME) -# only matters for x86 -AC_CHECK_PROGS(NASM, nasm) -AM_CONDITIONAL(FLaC__HAS_NASM, test -n "$NASM") -if test -n "$NASM" ; then -AC_DEFINE(FLAC__HAS_NASM) -AH_TEMPLATE(FLAC__HAS_NASM, [define if you are compiling for x86 and have the NASM assembler]) -fi - dnl If debugging is disabled AND no CFLAGS/CXXFLAGS/CPPFLAGS/LDFLAGS dnl are provided, we can set defaults to our liking AS_IF([test "x${ax_enable_debug}" = "xno" && test "x${enable_flags_setting}" = "xyes"], [ @@ -607,7 +599,6 @@ AC_CONFIG_FILES([ \ src/Makefile \ src/libFLAC/Makefile \ src/libFLAC/flac.pc \ - src/libFLAC/ia32/Makefile \ src/libFLAC/include/Makefile \ src/libFLAC/include/private/Makefile \ src/libFLAC/include/protected/Makefile \ diff --git a/src/libFLAC/CMakeLists.txt b/src/libFLAC/CMakeLists.txt index f7aa852d..cd99c8f8 100644 --- a/src/libFLAC/CMakeLists.txt +++ b/src/libFLAC/CMakeLists.txt @@ -38,23 +38,11 @@ else() endif() endif() - -include(CheckLanguage) -check_language(ASM_NASM) -if(CMAKE_ASM_NASM_COMPILER) - enable_language(ASM_NASM) - add_definitions(-DFLAC__HAS_NASM) -endif() - if(NOT WITH_ASM) add_definitions(-DFLAC__NO_ASM) endif() if(FLAC__CPU_IA32) - if(WITH_ASM AND CMAKE_ASM_NASM_COMPILER) - add_subdirectory(ia32) - endif() - option(WITH_SSE "Enable SSE2 optimizations (WITHOUT runtime detection, resulting binary requires SSE2)" ON) check_c_compiler_flag(-msse2 HAVE_MSSE2_FLAG) if(WITH_SSE) diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am index 3b8fc722..c9520949 100644 --- a/src/libFLAC/Makefile.am +++ b/src/libFLAC/Makefile.am @@ -55,19 +55,9 @@ endif AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@ -if FLaC__NO_ASM -else -if FLaC__CPU_IA32 -if FLaC__HAS_NASM -ARCH_SUBDIRS = ia32 -LOCAL_EXTRA_LIBADD = ia32/libFLAC-asm.la -endif -endif -endif - -libFLAC_la_LIBADD = $(LOCAL_EXTRA_LIBADD) @OGG_LIBS@ -lm +libFLAC_la_LIBADD = @OGG_LIBS@ -lm -SUBDIRS = $(ARCH_SUBDIRS) include . +SUBDIRS = include . m4datadir = $(datadir)/aclocal m4data_DATA = libFLAC.m4 @@ -97,7 +87,7 @@ extra_ogg_sources = \ endif # see 'http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning' for numbering convention -libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0 $(LOCAL_EXTRA_LDFLAGS) +libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0 libFLAC_sources = \ bitmath.c \ @@ -134,5 +124,4 @@ libFLAC_sources = \ libFLAC_la_SOURCES = $(libFLAC_sources) # needed for test_libFLAC -libFLAC_static_la_LIBADD = $(LOCAL_EXTRA_LIBADD) libFLAC_static_la_SOURCES = $(libFLAC_sources) diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index 4b1b371e..cba0ad02 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -57,7 +57,7 @@ #include #endif -#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM /* these are flags in EDX of CPUID AX=00000001 */ static const uint32_t FLAC__CPUINFO_X86_CPUID_CMOV = 0x00008000; @@ -97,8 +97,6 @@ cpu_have_cpuid(void) #if defined FLAC__CPU_X86_64 || defined __i686__ || defined __SSE__ || (defined _M_IX86_FP && _M_IX86_FP > 0) /* target CPU does have CPUID instruction */ return 1; -#elif defined FLAC__HAS_NASM - return FLAC__cpu_have_cpuid_asm_ia32(); #elif defined __GNUC__ && defined HAVE_CPUID_H if (__get_cpuid_max(0, 0) != 0) return 1; @@ -151,9 +149,6 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint __cpuid_count(level, 0, *eax, *ebx, *ecx, *edx); return; } -#elif defined FLAC__HAS_NASM && defined FLAC__CPU_IA32 - FLAC__cpu_info_asm_ia32(level, eax, ebx, ecx, edx); - return; #endif *eax = *ebx = *ecx = *edx = 0; } @@ -163,7 +158,7 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint static void x86_cpu_info (FLAC__CPUInfo *info) { -#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM FLAC__bool x86_osxsave = false; FLAC__bool os_avx = false; FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; diff --git a/src/libFLAC/ia32/CMakeLists.txt b/src/libFLAC/ia32/CMakeLists.txt deleted file mode 100644 index 014f6654..00000000 --- a/src/libFLAC/ia32/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -include_directories("${CMAKE_CURRENT_SOURCE_DIR}") -add_compile_options(-I${CMAKE_CURRENT_SOURCE_DIR}/) - -if(APPLE) - add_compile_options(-dOBJ_FORMAT_macho) -elseif(WIN32) - #add_compile_options(-d OBJ_FORMAT_win32) - # FIXME the command above doesn't seem to work on Windows - set(CMAKE_ASM_NASM_FLAGS -dOBJ_FORMAT_win32) -else() - add_compile_options(-dOBJ_FORMAT_elf) -endif() - -add_library(FLAC-asm OBJECT - cpu_asm.nasm - fixed_asm.nasm - lpc_asm.nasm) diff --git a/src/libFLAC/ia32/Makefile.am b/src/libFLAC/ia32/Makefile.am deleted file mode 100644 index 33409906..00000000 --- a/src/libFLAC/ia32/Makefile.am +++ /dev/null @@ -1,46 +0,0 @@ -# libFLAC - Free Lossless Audio Codec library -# Copyright (C) 2001-2009 Josh Coalson -# Copyright (C) 2011-2022 Xiph.Org Foundation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# - Neither the name of the Xiph.org Foundation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -SUFFIXES = .nasm .lo - -STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh -AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/include -I$(top_srcdir)/include -.nasm.lo: - $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) $(NASM) -f $(OBJ_FORMAT) -d OBJ_FORMAT_$(OBJ_FORMAT) -i$(srcdir)/ $< -o $@ - -noinst_LTLIBRARIES = libFLAC-asm.la -libFLAC_asm_la_SOURCES = \ - cpu_asm.nasm \ - fixed_asm.nasm \ - lpc_asm.nasm \ - nasm.h - -EXTRA_DIST = CMakeLists.txt diff --git a/src/libFLAC/ia32/cpu_asm.nasm b/src/libFLAC/ia32/cpu_asm.nasm deleted file mode 100644 index b0b2701b..00000000 --- a/src/libFLAC/ia32/cpu_asm.nasm +++ /dev/null @@ -1,119 +0,0 @@ -; vim:filetype=nasm ts=8 - -; libFLAC - Free Lossless Audio Codec library -; Copyright (C) 2001-2009 Josh Coalson -; Copyright (C) 2011-2022 Xiph.Org Foundation -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions -; are met: -; -; - Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; - Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; -; - Neither the name of the Xiph.org Foundation nor the names of its -; contributors may be used to endorse or promote products derived from -; this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -%include "nasm.h" - - data_section - -cglobal FLAC__cpu_have_cpuid_asm_ia32 -cglobal FLAC__cpu_info_asm_ia32 - - code_section - -; ********************************************************************** -; -; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32() -; - -cident FLAC__cpu_have_cpuid_asm_ia32 - pushfd - pop eax - mov edx, eax - xor eax, 0x00200000 - push eax - popfd - pushfd - pop eax - xor eax, edx - and eax, 0x00200000 - shr eax, 0x15 - push edx - popfd - ret - - -; ********************************************************************** -; -; void FLAC__cpu_info_asm_ia32(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx) -; - -cident FLAC__cpu_info_asm_ia32 - ;[esp + 8] == level - ;[esp + 12] == flags_eax - ;[esp + 16] == flags_ebx - ;[esp + 20] == flags_ecx - ;[esp + 24] == flags_edx - - push ebx - call FLAC__cpu_have_cpuid_asm_ia32 - test eax, eax - jz .no_cpuid - - mov eax, [esp + 8] - and eax, 0x80000000 - cpuid - cmp eax, [esp + 8] - jb .no_cpuid - xor ecx, ecx - mov eax, [esp + 8] - cpuid - - push ebx - ;[esp + 16] == flags_eax - mov ebx, [esp + 16] - mov [ebx], eax - pop eax - ;[esp + 16] == flags_ebx - mov ebx, [esp + 16] - mov [ebx], eax - mov ebx, [esp + 20] - mov [ebx], ecx - mov ebx, [esp + 24] - mov [ebx], edx - jmp .end - -.no_cpuid: - xor eax, eax - mov ebx, [esp + 12] - mov [ebx], eax - mov ebx, [esp + 16] - mov [ebx], eax - mov ebx, [esp + 20] - mov [ebx], eax - mov ebx, [esp + 24] - mov [ebx], eax -.end: - pop ebx - ret - -; end diff --git a/src/libFLAC/ia32/fixed_asm.nasm b/src/libFLAC/ia32/fixed_asm.nasm deleted file mode 100644 index be1ee064..00000000 --- a/src/libFLAC/ia32/fixed_asm.nasm +++ /dev/null @@ -1,309 +0,0 @@ -; vim:filetype=nasm ts=8 - -; libFLAC - Free Lossless Audio Codec library -; Copyright (C) 2001-2009 Josh Coalson -; Copyright (C) 2011-2022 Xiph.Org Foundation -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions -; are met: -; -; - Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; - Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; -; - Neither the name of the Xiph.org Foundation nor the names of its -; contributors may be used to endorse or promote products derived from -; this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -%include "nasm.h" - - data_section - -cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov - - code_section - -; ********************************************************************** -; -; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]) -; { -; FLAC__int32 last_error_0 = data[-1]; -; FLAC__int32 last_error_1 = data[-1] - data[-2]; -; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]); -; FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]); -; FLAC__int32 error, save; -; FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0; -; unsigned i, order; -; -; for(i = 0; i < data_len; i++) { -; error = data[i] ; total_error_0 += local_abs(error); save = error; -; error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error; -; error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error; -; error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error; -; error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save; -; } -; -; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4)) -; order = 0; -; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4)) -; order = 1; -; else if(total_error_2 < min(total_error_3, total_error_4)) -; order = 2; -; else if(total_error_3 < total_error_4) -; order = 3; -; else -; order = 4; -; -; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); -; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); -; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); -; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); -; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); -; -; return order; -; } - ALIGN 16 -cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov - - ; esp + 36 == data[] - ; esp + 40 == data_len - ; esp + 44 == residual_bits_per_sample[] - - push ebp - push ebx - push esi - push edi - sub esp, byte 16 - ; qword [esp] == temp space for loading FLAC__uint64s to FPU regs - - ; ebx == &data[i] - ; ecx == loop counter (i) - ; ebp == order - ; mm0 == total_error_1:total_error_0 - ; mm1 == total_error_2:total_error_3 - ; mm2 == :total_error_4 - ; mm3 == last_error_1:last_error_0 - ; mm4 == last_error_2:last_error_3 - - mov ecx, [esp + 40] ; ecx = data_len - test ecx, ecx - jz near .data_len_is_0 - - mov ebx, [esp + 36] ; ebx = data[] - movd mm3, [ebx - 4] ; mm3 = 0:last_error_0 - movd mm2, [ebx - 8] ; mm2 = 0:data[-2] - movd mm1, [ebx - 12] ; mm1 = 0:data[-3] - movd mm0, [ebx - 16] ; mm0 = 0:data[-4] - movq mm5, mm3 ; mm5 = 0:last_error_0 - psubd mm5, mm2 ; mm5 = 0:last_error_1 - punpckldq mm3, mm5 ; mm3 = last_error_1:last_error_0 - psubd mm2, mm1 ; mm2 = 0:data[-2] - data[-3] - psubd mm5, mm2 ; mm5 = 0:last_error_2 - movq mm4, mm5 ; mm4 = 0:last_error_2 - psubd mm4, mm2 ; mm4 = 0:last_error_2 - (data[-2] - data[-3]) - paddd mm4, mm1 ; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3]) - psubd mm4, mm0 ; mm4 = 0:last_error_3 - punpckldq mm4, mm5 ; mm4 = last_error_2:last_error_3 - pxor mm0, mm0 ; mm0 = total_error_1:total_error_0 - pxor mm1, mm1 ; mm1 = total_error_2:total_error_3 - pxor mm2, mm2 ; mm2 = 0:total_error_4 - - ALIGN 16 -.loop: - movd mm7, [ebx] ; mm7 = 0:error_0 - add ebx, byte 4 - movq mm6, mm7 ; mm6 = 0:error_0 - psubd mm7, mm3 ; mm7 = :error_1 - punpckldq mm6, mm7 ; mm6 = error_1:error_0 - movq mm5, mm6 ; mm5 = error_1:error_0 - movq mm7, mm6 ; mm7 = error_1:error_0 - psubd mm5, mm3 ; mm5 = error_2: - movq mm3, mm6 ; mm3 = error_1:error_0 - psrad mm6, 31 - pxor mm7, mm6 - psubd mm7, mm6 ; mm7 = abs(error_1):abs(error_0) - paddd mm0, mm7 ; mm0 = total_error_1:total_error_0 - movq mm6, mm5 ; mm6 = error_2: - psubd mm5, mm4 ; mm5 = error_3: - punpckhdq mm5, mm6 ; mm5 = error_2:error_3 - movq mm7, mm5 ; mm7 = error_2:error_3 - movq mm6, mm5 ; mm6 = error_2:error_3 - psubd mm5, mm4 ; mm5 = :error_4 - movq mm4, mm6 ; mm4 = error_2:error_3 - psrad mm6, 31 - pxor mm7, mm6 - psubd mm7, mm6 ; mm7 = abs(error_2):abs(error_3) - paddd mm1, mm7 ; mm1 = total_error_2:total_error_3 - movq mm6, mm5 ; mm6 = :error_4 - psrad mm5, 31 - pxor mm6, mm5 - psubd mm6, mm5 ; mm6 = :abs(error_4) - paddd mm2, mm6 ; mm2 = :total_error_4 - - dec ecx - jnz short .loop - -; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4)) -; order = 0; -; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4)) -; order = 1; -; else if(total_error_2 < min(total_error_3, total_error_4)) -; order = 2; -; else if(total_error_3 < total_error_4) -; order = 3; -; else -; order = 4; - movq mm3, mm0 ; mm3 = total_error_1:total_error_0 - movd edi, mm2 ; edi = total_error_4 - movd esi, mm1 ; esi = total_error_3 - movd eax, mm0 ; eax = total_error_0 - punpckhdq mm1, mm1 ; mm1 = total_error_2:total_error_2 - punpckhdq mm3, mm3 ; mm3 = total_error_1:total_error_1 - movd edx, mm1 ; edx = total_error_2 - movd ecx, mm3 ; ecx = total_error_1 - - xor ebx, ebx - xor ebp, ebp - inc ebx - cmp ecx, eax - cmovb eax, ecx ; eax = min(total_error_0, total_error_1) - cmovbe ebp, ebx - inc ebx - cmp edx, eax - cmovb eax, edx ; eax = min(total_error_0, total_error_1, total_error_2) - cmovbe ebp, ebx - inc ebx - cmp esi, eax - cmovb eax, esi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3) - cmovbe ebp, ebx - inc ebx - cmp edi, eax - cmovb eax, edi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4) - cmovbe ebp, ebx - movd ebx, mm0 ; ebx = total_error_0 - emms - - ; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); - ; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0); - ; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0); - ; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0); - ; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0); - xor eax, eax - fild dword [esp + 40] ; ST = data_len (NOTE: assumes data_len is <2gigs) -.rbps_0: - test ebx, ebx - jz .total_error_0_is_0 - fld1 ; ST = 1.0 data_len - mov [esp], ebx - mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_0 - mov ebx, [esp + 44] - fild qword [esp] ; ST = total_error_0 1.0 data_len - fdiv st2 ; ST = total_error_0/data_len 1.0 data_len - fldln2 ; ST = ln2 total_error_0/data_len 1.0 data_len - fmulp st1 ; ST = ln2*total_error_0/data_len 1.0 data_len - fyl2x ; ST = log2(ln2*total_error_0/data_len) data_len - fstp dword [ebx] ; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len) ST = data_len - jmp short .rbps_1 -.total_error_0_is_0: - mov ebx, [esp + 44] - mov [ebx], eax ; residual_bits_per_sample[0] = 0.0 -.rbps_1: - test ecx, ecx - jz .total_error_1_is_0 - fld1 ; ST = 1.0 data_len - mov [esp], ecx - mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_1 - fild qword [esp] ; ST = total_error_1 1.0 data_len - fdiv st2 ; ST = total_error_1/data_len 1.0 data_len - fldln2 ; ST = ln2 total_error_1/data_len 1.0 data_len - fmulp st1 ; ST = ln2*total_error_1/data_len 1.0 data_len - fyl2x ; ST = log2(ln2*total_error_1/data_len) data_len - fstp dword [ebx + 4] ; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len) ST = data_len - jmp short .rbps_2 -.total_error_1_is_0: - mov [ebx + 4], eax ; residual_bits_per_sample[1] = 0.0 -.rbps_2: - test edx, edx - jz .total_error_2_is_0 - fld1 ; ST = 1.0 data_len - mov [esp], edx - mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_2 - fild qword [esp] ; ST = total_error_2 1.0 data_len - fdiv st2 ; ST = total_error_2/data_len 1.0 data_len - fldln2 ; ST = ln2 total_error_2/data_len 1.0 data_len - fmulp st1 ; ST = ln2*total_error_2/data_len 1.0 data_len - fyl2x ; ST = log2(ln2*total_error_2/data_len) data_len - fstp dword [ebx + 8] ; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len) ST = data_len - jmp short .rbps_3 -.total_error_2_is_0: - mov [ebx + 8], eax ; residual_bits_per_sample[2] = 0.0 -.rbps_3: - test esi, esi - jz .total_error_3_is_0 - fld1 ; ST = 1.0 data_len - mov [esp], esi - mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_3 - fild qword [esp] ; ST = total_error_3 1.0 data_len - fdiv st2 ; ST = total_error_3/data_len 1.0 data_len - fldln2 ; ST = ln2 total_error_3/data_len 1.0 data_len - fmulp st1 ; ST = ln2*total_error_3/data_len 1.0 data_len - fyl2x ; ST = log2(ln2*total_error_3/data_len) data_len - fstp dword [ebx + 12] ; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len) ST = data_len - jmp short .rbps_4 -.total_error_3_is_0: - mov [ebx + 12], eax ; residual_bits_per_sample[3] = 0.0 -.rbps_4: - test edi, edi - jz .total_error_4_is_0 - fld1 ; ST = 1.0 data_len - mov [esp], edi - mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_4 - fild qword [esp] ; ST = total_error_4 1.0 data_len - fdiv st2 ; ST = total_error_4/data_len 1.0 data_len - fldln2 ; ST = ln2 total_error_4/data_len 1.0 data_len - fmulp st1 ; ST = ln2*total_error_4/data_len 1.0 data_len - fyl2x ; ST = log2(ln2*total_error_4/data_len) data_len - fstp dword [ebx + 16] ; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len) ST = data_len - jmp short .rbps_end -.total_error_4_is_0: - mov [ebx + 16], eax ; residual_bits_per_sample[4] = 0.0 -.rbps_end: - fstp st0 ; ST = [empty] - jmp short .end -.data_len_is_0: - ; data_len == 0, so residual_bits_per_sample[*] = 0.0 - xor ebp, ebp - mov edi, [esp + 44] - mov [edi], ebp - mov [edi + 4], ebp - mov [edi + 8], ebp - mov [edi + 12], ebp - mov [edi + 16], ebp - add ebp, byte 4 ; order = 4 - -.end: - mov eax, ebp ; return order - add esp, byte 16 - pop edi - pop esi - pop ebx - pop ebp - ret - -; end diff --git a/src/libFLAC/ia32/lpc_asm.nasm b/src/libFLAC/ia32/lpc_asm.nasm deleted file mode 100644 index 8be9e7aa..00000000 --- a/src/libFLAC/ia32/lpc_asm.nasm +++ /dev/null @@ -1,727 +0,0 @@ -; vim:filetype=nasm ts=8 - -; libFLAC - Free Lossless Audio Codec library -; Copyright (C) 2001-2009 Josh Coalson -; Copyright (C) 2011-2022 Xiph.Org Foundation -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions -; are met: -; -; - Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; - Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; -; - Neither the name of the Xiph.org Foundation nor the names of its -; contributors may be used to endorse or promote products derived from -; this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -%include "nasm.h" - - data_section - -cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32 -cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx -cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32 - - code_section - -;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) -; -; for(i = 0; i < data_len; i++) { -; sum = 0; -; for(j = 0; j < order; j++) -; sum += qlp_coeff[j] * data[i-j-1]; -; residual[i] = data[i] - (sum >> lp_quantization); -; } -; - ALIGN 16 -cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32 - ;[esp + 40] residual[] - ;[esp + 36] lp_quantization - ;[esp + 32] order - ;[esp + 28] qlp_coeff[] - ;[esp + 24] data_len - ;[esp + 20] data[] - - ;ASSERT(order > 0) - - push ebp - push ebx - push esi - push edi - - mov esi, [esp + 20] ; esi = data[] - mov edi, [esp + 40] ; edi = residual[] - mov eax, [esp + 32] ; eax = order - mov ebx, [esp + 24] ; ebx = data_len - - test ebx, ebx - jz near .end ; do nothing if data_len == 0 -.begin: - cmp eax, byte 1 - jg short .i_1more - - mov ecx, [esp + 28] - mov edx, [ecx] ; edx = qlp_coeff[0] - mov eax, [esi - 4] ; eax = data[-1] - mov ecx, [esp + 36] ; cl = lp_quantization - ALIGN 16 -.i_1_loop_i: - imul eax, edx - sar eax, cl - neg eax - add eax, [esi] - mov [edi], eax - mov eax, [esi] - add edi, byte 4 - add esi, byte 4 - dec ebx - jnz .i_1_loop_i - - jmp .end - -.i_1more: - cmp eax, byte 32 ; for order <= 32 there is a faster routine - jbe short .i_32 - - ; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32 - ALIGN 16 -.i_32more_loop_i: - xor ebp, ebp - mov ecx, [esp + 32] - mov edx, ecx - shl edx, 2 - add edx, [esp + 28] - neg ecx - ALIGN 16 -.i_32more_loop_j: - sub edx, byte 4 - mov eax, [edx] - imul eax, [esi + 4 * ecx] - add ebp, eax - inc ecx - jnz short .i_32more_loop_j - - mov ecx, [esp + 36] - sar ebp, cl - neg ebp - add ebp, [esi] - mov [edi], ebp - add esi, byte 4 - add edi, byte 4 - - dec ebx - jnz .i_32more_loop_i - - jmp .end - -.mov_eip_to_eax: - mov eax, [esp] - ret - -.i_32: - sub edi, esi - neg eax - lea edx, [eax + eax * 8 + .jumper_0 - .get_eip0] - call .mov_eip_to_eax -.get_eip0: - add edx, eax - inc edx - mov eax, [esp + 28] ; eax = qlp_coeff[] - xor ebp, ebp - jmp edx - - mov ecx, [eax + 124] - imul ecx, [esi - 128] - add ebp, ecx - mov ecx, [eax + 120] - imul ecx, [esi - 124] - add ebp, ecx - mov ecx, [eax + 116] - imul ecx, [esi - 120] - add ebp, ecx - mov ecx, [eax + 112] - imul ecx, [esi - 116] - add ebp, ecx - mov ecx, [eax + 108] - imul ecx, [esi - 112] - add ebp, ecx - mov ecx, [eax + 104] - imul ecx, [esi - 108] - add ebp, ecx - mov ecx, [eax + 100] - imul ecx, [esi - 104] - add ebp, ecx - mov ecx, [eax + 96] - imul ecx, [esi - 100] - add ebp, ecx - mov ecx, [eax + 92] - imul ecx, [esi - 96] - add ebp, ecx - mov ecx, [eax + 88] - imul ecx, [esi - 92] - add ebp, ecx - mov ecx, [eax + 84] - imul ecx, [esi - 88] - add ebp, ecx - mov ecx, [eax + 80] - imul ecx, [esi - 84] - add ebp, ecx - mov ecx, [eax + 76] - imul ecx, [esi - 80] - add ebp, ecx - mov ecx, [eax + 72] - imul ecx, [esi - 76] - add ebp, ecx - mov ecx, [eax + 68] - imul ecx, [esi - 72] - add ebp, ecx - mov ecx, [eax + 64] - imul ecx, [esi - 68] - add ebp, ecx - mov ecx, [eax + 60] - imul ecx, [esi - 64] - add ebp, ecx - mov ecx, [eax + 56] - imul ecx, [esi - 60] - add ebp, ecx - mov ecx, [eax + 52] - imul ecx, [esi - 56] - add ebp, ecx - mov ecx, [eax + 48] - imul ecx, [esi - 52] - add ebp, ecx - mov ecx, [eax + 44] - imul ecx, [esi - 48] - add ebp, ecx - mov ecx, [eax + 40] - imul ecx, [esi - 44] - add ebp, ecx - mov ecx, [eax + 36] - imul ecx, [esi - 40] - add ebp, ecx - mov ecx, [eax + 32] - imul ecx, [esi - 36] - add ebp, ecx - mov ecx, [eax + 28] - imul ecx, [esi - 32] - add ebp, ecx - mov ecx, [eax + 24] - imul ecx, [esi - 28] - add ebp, ecx - mov ecx, [eax + 20] - imul ecx, [esi - 24] - add ebp, ecx - mov ecx, [eax + 16] - imul ecx, [esi - 20] - add ebp, ecx - mov ecx, [eax + 12] - imul ecx, [esi - 16] - add ebp, ecx - mov ecx, [eax + 8] - imul ecx, [esi - 12] - add ebp, ecx - mov ecx, [eax + 4] - imul ecx, [esi - 8] - add ebp, ecx - mov ecx, [eax] ; there is one byte missing - imul ecx, [esi - 4] - add ebp, ecx -.jumper_0: - - mov ecx, [esp + 36] - sar ebp, cl - neg ebp - add ebp, [esi] - mov [edi + esi], ebp - add esi, byte 4 - - dec ebx - jz short .end - xor ebp, ebp - jmp edx - -.end: - pop edi - pop esi - pop ebx - pop ebp - ret - -; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for -; the channel and qlp_coeffs must be <= 16. Especially note that this routine -; cannot be used for side-channel coded 16bps channels since the effective bps -; is 17. - ALIGN 16 -cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx - ;[esp + 40] residual[] - ;[esp + 36] lp_quantization - ;[esp + 32] order - ;[esp + 28] qlp_coeff[] - ;[esp + 24] data_len - ;[esp + 20] data[] - - ;ASSERT(order > 0) - - push ebp - push ebx - push esi - push edi - - mov esi, [esp + 20] ; esi = data[] - mov edi, [esp + 40] ; edi = residual[] - mov eax, [esp + 32] ; eax = order - mov ebx, [esp + 24] ; ebx = data_len - - test ebx, ebx - jz near .end ; do nothing if data_len == 0 - dec ebx - test ebx, ebx - jz near .last_one - - mov edx, [esp + 28] ; edx = qlp_coeff[] - movd mm6, [esp + 36] ; mm6 = 0:lp_quantization - mov ebp, esp - - and esp, 0xfffffff8 - - xor ecx, ecx -.copy_qlp_loop: - push word [edx + 4 * ecx] - inc ecx - cmp ecx, eax - jnz short .copy_qlp_loop - - and ecx, 0x3 - test ecx, ecx - je short .za_end - sub ecx, byte 4 -.za_loop: - push word 0 - inc eax - inc ecx - jnz short .za_loop -.za_end: - - movq mm5, [esp + 2 * eax - 8] - movd mm4, [esi - 16] - punpckldq mm4, [esi - 12] - movd mm0, [esi - 8] - punpckldq mm0, [esi - 4] - packssdw mm4, mm0 - - cmp eax, byte 4 - jnbe short .mmx_4more - - ALIGN 16 -.mmx_4_loop_i: - movd mm1, [esi] - movq mm3, mm4 - punpckldq mm1, [esi + 4] - psrlq mm4, 16 - movq mm0, mm1 - psllq mm0, 48 - por mm4, mm0 - movq mm2, mm4 - psrlq mm4, 16 - pxor mm0, mm0 - punpckhdq mm0, mm1 - pmaddwd mm3, mm5 - pmaddwd mm2, mm5 - psllq mm0, 16 - por mm4, mm0 - movq mm0, mm3 - punpckldq mm3, mm2 - punpckhdq mm0, mm2 - paddd mm3, mm0 - psrad mm3, mm6 - psubd mm1, mm3 - movd [edi], mm1 - punpckhdq mm1, mm1 - movd [edi + 4], mm1 - - add edi, byte 8 - add esi, byte 8 - - sub ebx, 2 - jg .mmx_4_loop_i - jmp .mmx_end - -.mmx_4more: - shl eax, 2 - neg eax - add eax, byte 16 - - ALIGN 16 -.mmx_4more_loop_i: - movd mm1, [esi] - punpckldq mm1, [esi + 4] - movq mm3, mm4 - psrlq mm4, 16 - movq mm0, mm1 - psllq mm0, 48 - por mm4, mm0 - movq mm2, mm4 - psrlq mm4, 16 - pxor mm0, mm0 - punpckhdq mm0, mm1 - pmaddwd mm3, mm5 - pmaddwd mm2, mm5 - psllq mm0, 16 - por mm4, mm0 - - mov ecx, esi - add ecx, eax - mov edx, esp - - ALIGN 16 -.mmx_4more_loop_j: - movd mm0, [ecx - 16] - movd mm7, [ecx - 8] - punpckldq mm0, [ecx - 12] - punpckldq mm7, [ecx - 4] - packssdw mm0, mm7 - pmaddwd mm0, [edx] - punpckhdq mm7, mm7 - paddd mm3, mm0 - movd mm0, [ecx - 12] - punpckldq mm0, [ecx - 8] - punpckldq mm7, [ecx] - packssdw mm0, mm7 - pmaddwd mm0, [edx] - paddd mm2, mm0 - - add edx, byte 8 - add ecx, byte 16 - cmp ecx, esi - jnz .mmx_4more_loop_j - - movq mm0, mm3 - punpckldq mm3, mm2 - punpckhdq mm0, mm2 - paddd mm3, mm0 - psrad mm3, mm6 - psubd mm1, mm3 - movd [edi], mm1 - punpckhdq mm1, mm1 - movd [edi + 4], mm1 - - add edi, byte 8 - add esi, byte 8 - - sub ebx, 2 - jg near .mmx_4more_loop_i - -.mmx_end: - emms - mov esp, ebp -.last_one: - mov eax, [esp + 32] - inc ebx - jnz near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin - -.end: - pop edi - pop esi - pop ebx - pop ebp - ret - -; ********************************************************************** -; -;void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]) -; { -; unsigned i, j; -; FLAC__int64 sum; -; -; FLAC__ASSERT(order > 0); -; -; for(i = 0; i < data_len; i++) { -; sum = 0; -; for(j = 0; j < order; j++) -; sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1]; -; residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization); -; } -; } - ALIGN 16 -cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32 - ;[esp + 40] residual[] - ;[esp + 36] lp_quantization - ;[esp + 32] order - ;[esp + 28] qlp_coeff[] - ;[esp + 24] data_len - ;[esp + 20] data[] - - ;ASSERT(order > 0) - ;ASSERT(order <= 32) - ;ASSERT(lp_quantization <= 31) - - push ebp - push ebx - push esi - push edi - - mov ebx, [esp + 24] ; ebx = data_len - test ebx, ebx - jz near .end ; do nothing if data_len == 0 - -.begin: - mov eax, [esp + 32] ; eax = order - cmp eax, 1 - jg short .i_32 - - mov esi, [esp + 40] ; esi = residual[] - mov edi, [esp + 20] ; edi = data[] - mov ecx, [esp + 28] ; ecx = qlp_coeff[] - mov ebp, [ecx] ; ebp = qlp_coeff[0] - mov eax, [edi - 4] ; eax = data[-1] - mov ecx, [esp + 36] ; cl = lp_quantization - ALIGN 16 -.i_1_loop_i: - imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1] - shrd eax, edx, cl ; 0 <= lp_quantization <= 15 - neg eax - add eax, [edi] - mov [esi], eax - mov eax, [edi] - add esi, 4 - add edi, 4 - dec ebx - jnz .i_1_loop_i - jmp .end - -.mov_eip_to_eax: - mov eax, [esp] - ret - -.i_32: ; eax = order - neg eax - add eax, eax - lea ebp, [eax + eax * 4 + .jumper_0 - .get_eip0] - call .mov_eip_to_eax -.get_eip0: - add ebp, eax - inc ebp ; compensate for the shorter opcode on the last iteration - - mov ebx, [esp + 28] ; ebx = qlp_coeff[] - mov edi, [esp + 20] ; edi = data[] - sub [esp + 40], edi ; residual[] -= data[] - - xor ecx, ecx - xor esi, esi - jmp ebp - -;eax = -- -;edx = -- -;ecx = 0 -;esi = 0 -; -;ebx = qlp_coeff[] -;edi = data[] -;ebp = @address - - mov eax, [ebx + 124] ; eax = qlp_coeff[31] - imul dword [edi - 128] ; edx:eax = qlp_coeff[31] * data[i-32] - add ecx, eax - adc esi, edx ; sum += qlp_coeff[31] * data[i-32] - - mov eax, [ebx + 120] ; eax = qlp_coeff[30] - imul dword [edi - 124] ; edx:eax = qlp_coeff[30] * data[i-31] - add ecx, eax - adc esi, edx ; sum += qlp_coeff[30] * data[i-31] - - mov eax, [ebx + 116] - imul dword [edi - 120] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 112] - imul dword [edi - 116] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 108] - imul dword [edi - 112] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 104] - imul dword [edi - 108] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 100] - imul dword [edi - 104] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 96] - imul dword [edi - 100] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 92] - imul dword [edi - 96] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 88] - imul dword [edi - 92] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 84] - imul dword [edi - 88] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 80] - imul dword [edi - 84] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 76] - imul dword [edi - 80] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 72] - imul dword [edi - 76] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 68] - imul dword [edi - 72] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 64] - imul dword [edi - 68] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 60] - imul dword [edi - 64] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 56] - imul dword [edi - 60] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 52] - imul dword [edi - 56] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 48] - imul dword [edi - 52] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 44] - imul dword [edi - 48] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 40] - imul dword [edi - 44] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 36] - imul dword [edi - 40] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 32] - imul dword [edi - 36] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 28] - imul dword [edi - 32] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 24] - imul dword [edi - 28] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 20] - imul dword [edi - 24] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 16] - imul dword [edi - 20] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 12] - imul dword [edi - 16] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 8] - imul dword [edi - 12] - add ecx, eax - adc esi, edx - - mov eax, [ebx + 4] - imul dword [edi - 8] - add ecx, eax - adc esi, edx - - mov eax, [ebx] ; eax = qlp_coeff[ 0] (NOTE: one byte missing from instruction) - imul dword [edi - 4] ; edx:eax = qlp_coeff[ 0] * data[i- 1] - add ecx, eax - adc esi, edx ; sum += qlp_coeff[ 0] * data[i- 1] - -.jumper_0: - mov edx, ecx -;esi:edx = sum - mov ecx, [esp + 36] ; cl = lp_quantization - shrd edx, esi, cl ; edx = (sum >> lp_quantization) -;eax = -- -;ecx = -- -;edx = sum >> lp_q -;esi = -- - neg edx ; edx = -(sum >> lp_quantization) - mov eax, [esp + 40] ; residual[] - data[] - add edx, [edi] ; edx = data[i] - (sum >> lp_quantization) - mov [edi + eax], edx - add edi, 4 - - dec dword [esp + 24] - jz short .end - xor ecx, ecx - xor esi, esi - jmp ebp - -.end: - pop edi - pop esi - pop ebx - pop ebp - ret - -; end diff --git a/src/libFLAC/ia32/nasm.h b/src/libFLAC/ia32/nasm.h deleted file mode 100644 index cdb8bf55..00000000 --- a/src/libFLAC/ia32/nasm.h +++ /dev/null @@ -1,95 +0,0 @@ -; libFLAC - Free Lossless Audio Codec library -; Copyright (C) 2001-2009 Josh Coalson -; Copyright (C) 2011-2022 Xiph.Org Foundation -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions -; are met: -; -; - Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; - Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; -; - Neither the name of the Xiph.org Foundation nor the names of its -; contributors may be used to endorse or promote products derived from -; this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - bits 32 - -%ifdef OBJ_FORMAT_win32 - %define FLAC__PUBLIC_NEEDS_UNDERSCORE - %idefine code_section section .text align=16 class=CODE use32 - %idefine data_section section .data align=32 class=DATA use32 - %idefine bss_section section .bss align=32 class=DATA use32 -%elifdef OBJ_FORMAT_aout - %define FLAC__PUBLIC_NEEDS_UNDERSCORE - %idefine code_section section .text - %idefine data_section section .data - %idefine bss_section section .bss -%elifdef OBJ_FORMAT_aoutb - %define FLAC__PUBLIC_NEEDS_UNDERSCORE - %idefine code_section section .text - %idefine data_section section .data - %idefine bss_section section .bss -%elifdef OBJ_FORMAT_coff - %define FLAC__PUBLIC_NEEDS_UNDERSCORE - %idefine code_section section .text - %idefine data_section section .data - %idefine bss_section section .bss -%elifdef OBJ_FORMAT_macho - %define FLAC__PUBLIC_NEEDS_UNDERSCORE - %idefine code_section section .text - %idefine data_section section .data - %idefine bss_section section .bss -%elifdef OBJ_FORMAT_elf - %idefine code_section section .text align=16 - %idefine data_section section .data align=32 - %idefine bss_section section .bss align=32 -%else - %error unsupported object format! ; this directive doesn't really work here -%endif - -%imacro cglobal 1 - %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE - global _%1 - %else - %if __NASM_MAJOR__ >= 2 - global %1:function hidden - %else - global %1 - %endif - %endif -%endmacro - -%imacro cextern 1 - %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE - extern _%1 - %else - extern %1 - %endif -%endmacro - -%imacro cident 1 -_%1: -%1: -%endmacro - -%ifdef OBJ_FORMAT_elf -section .note.GNU-stack progbits noalloc noexec nowrite align=1 -%endif - diff --git a/src/libFLAC/include/private/fixed.h b/src/libFLAC/include/private/fixed.h index aa742cac..68024b03 100644 --- a/src/libFLAC/include/private/fixed.h +++ b/src/libFLAC/include/private/fixed.h @@ -69,9 +69,6 @@ uint32_t FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[ uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]); # endif # endif -# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM -uint32_t FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); -# endif # endif #else uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]); diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h index 5212b35e..8ff3b894 100644 --- a/src/libFLAC/include/private/lpc.h +++ b/src/libFLAC/include/private/lpc.h @@ -174,13 +174,6 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon(const FLAC__in void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]); # endif -# ifdef FLAC__CPU_IA32 -# ifdef FLAC__HAS_NASM -void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]); -void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]); -void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]); -# endif -# endif # if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN # ifdef FLAC__SSE2_SUPPORTED void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]); diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c index aae0bf36..2f0b1e31 100644 --- a/src/libFLAC/stream_encoder.c +++ b/src/libFLAC/stream_encoder.c @@ -949,20 +949,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_( if(encoder->private_->cpuinfo.use_asm) { # ifdef FLAC__CPU_IA32 FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32); -# ifdef FLAC__HAS_NASM - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */ - if (encoder->private_->cpuinfo.x86.mmx) { - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32; - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx; - } - else { - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32; - encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32; - } - - if (encoder->private_->cpuinfo.x86.mmx && encoder->private_->cpuinfo.x86.cmov) - encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov; -# endif /* FLAC__HAS_NASM */ # if FLAC__HAS_X86INTRIN # ifdef FLAC__SSE2_SUPPORTED if (encoder->private_->cpuinfo.x86.sse2) { diff --git a/strip_non_asm_libtool_args.sh b/strip_non_asm_libtool_args.sh deleted file mode 100755 index d5a61f15..00000000 --- a/strip_non_asm_libtool_args.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -# -# libtool assumes that the compiler can handle the -fPIC flag. -# This isn't always true (for example, nasm can't handle it). -# Also, on some versions of OS X it tries to pass -fno-common -# to 'as' which causes problems. -command="" -while [ $1 ]; do - if [ "$1" != "-fPIC" ]; then - if [ "$1" != "-DPIC" ]; then - if [ "$1" != "-fno-common" ]; then - command="$command $1" - fi - fi - fi - shift -done -echo $command -exec $command -- cgit v1.2.1