summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-10-15 15:44:03 +0200
committerGitHub <noreply@github.com>2022-10-15 15:44:03 +0200
commit75ef7958df603ca6de29fa00e82615e0da017903 (patch)
tree688687e46c12fdf26adcef9225b3844161952cd7
parent0665053c5de1841dff379fa60b279c8614399326 (diff)
downloadflac-75ef7958df603ca6de29fa00e82615e0da017903.tar.gz
Remove all assembler
-rw-r--r--Makefile.am3
-rw-r--r--config.cmake.h.in3
-rw-r--r--configure.ac9
-rw-r--r--src/libFLAC/CMakeLists.txt12
-rw-r--r--src/libFLAC/Makefile.am17
-rw-r--r--src/libFLAC/cpu.c9
-rw-r--r--src/libFLAC/ia32/CMakeLists.txt17
-rw-r--r--src/libFLAC/ia32/Makefile.am46
-rw-r--r--src/libFLAC/ia32/cpu_asm.nasm119
-rw-r--r--src/libFLAC/ia32/fixed_asm.nasm309
-rw-r--r--src/libFLAC/ia32/lpc_asm.nasm727
-rw-r--r--src/libFLAC/ia32/nasm.h95
-rw-r--r--src/libFLAC/include/private/fixed.h3
-rw-r--r--src/libFLAC/include/private/lpc.h7
-rw-r--r--src/libFLAC/stream_encoder.c14
-rwxr-xr-xstrip_non_asm_libtool_args.sh19
16 files changed, 6 insertions, 1403 deletions
diff --git a/Makefile.am b/Makefile.am
index 7c924c2c..4484db92 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -60,7 +60,6 @@ EXTRA_DIST = \
autogen.sh \
config.rpath \
depcomp \
- ltmain.sh \
- strip_non_asm_libtool_args.sh
+ ltmain.sh
CLEANFILES = *~
diff --git a/config.cmake.h.in b/config.cmake.h.in
index 1f2387c2..10efc712 100644
--- a/config.cmake.h.in
+++ b/config.cmake.h.in
@@ -24,9 +24,6 @@
/* define if you have docbook-to-man or docbook2man */
#cmakedefine FLAC__HAS_DOCBOOK_TO_MAN
-/* define if you are compiling for x86 and have the NASM assembler */
-#cmakedefine FLAC__HAS_NASM
-
/* define if you have the ogg library */
#cmakedefine01 OGG_FOUND
#define FLAC__HAS_OGG OGG_FOUND
diff --git a/configure.ac b/configure.ac
index 179d0e1c..5117a0bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -471,14 +471,6 @@ AC_CHECK_LIB(rt, clock_gettime,
AH_TEMPLATE(HAVE_CLOCK_GETTIME, [define if you have clock_gettime]))
AC_SUBST(LIB_CLOCK_GETTIME)
-# only matters for x86
-AC_CHECK_PROGS(NASM, nasm)
-AM_CONDITIONAL(FLaC__HAS_NASM, test -n "$NASM")
-if test -n "$NASM" ; then
-AC_DEFINE(FLAC__HAS_NASM)
-AH_TEMPLATE(FLAC__HAS_NASM, [define if you are compiling for x86 and have the NASM assembler])
-fi
-
dnl If debugging is disabled AND no CFLAGS/CXXFLAGS/CPPFLAGS/LDFLAGS
dnl are provided, we can set defaults to our liking
AS_IF([test "x${ax_enable_debug}" = "xno" && test "x${enable_flags_setting}" = "xyes"], [
@@ -607,7 +599,6 @@ AC_CONFIG_FILES([ \
src/Makefile \
src/libFLAC/Makefile \
src/libFLAC/flac.pc \
- src/libFLAC/ia32/Makefile \
src/libFLAC/include/Makefile \
src/libFLAC/include/private/Makefile \
src/libFLAC/include/protected/Makefile \
diff --git a/src/libFLAC/CMakeLists.txt b/src/libFLAC/CMakeLists.txt
index f7aa852d..cd99c8f8 100644
--- a/src/libFLAC/CMakeLists.txt
+++ b/src/libFLAC/CMakeLists.txt
@@ -38,23 +38,11 @@ else()
endif()
endif()
-
-include(CheckLanguage)
-check_language(ASM_NASM)
-if(CMAKE_ASM_NASM_COMPILER)
- enable_language(ASM_NASM)
- add_definitions(-DFLAC__HAS_NASM)
-endif()
-
if(NOT WITH_ASM)
add_definitions(-DFLAC__NO_ASM)
endif()
if(FLAC__CPU_IA32)
- if(WITH_ASM AND CMAKE_ASM_NASM_COMPILER)
- add_subdirectory(ia32)
- endif()
-
option(WITH_SSE "Enable SSE2 optimizations (WITHOUT runtime detection, resulting binary requires SSE2)" ON)
check_c_compiler_flag(-msse2 HAVE_MSSE2_FLAG)
if(WITH_SSE)
diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am
index 3b8fc722..c9520949 100644
--- a/src/libFLAC/Makefile.am
+++ b/src/libFLAC/Makefile.am
@@ -55,19 +55,9 @@ endif
AM_CFLAGS = $(DEBUGCFLAGS) $(CPUCFLAGS) ${ASSOCMATHCFLAGS} @OGG_CFLAGS@
-if FLaC__NO_ASM
-else
-if FLaC__CPU_IA32
-if FLaC__HAS_NASM
-ARCH_SUBDIRS = ia32
-LOCAL_EXTRA_LIBADD = ia32/libFLAC-asm.la
-endif
-endif
-endif
-
-libFLAC_la_LIBADD = $(LOCAL_EXTRA_LIBADD) @OGG_LIBS@ -lm
+libFLAC_la_LIBADD = @OGG_LIBS@ -lm
-SUBDIRS = $(ARCH_SUBDIRS) include .
+SUBDIRS = include .
m4datadir = $(datadir)/aclocal
m4data_DATA = libFLAC.m4
@@ -97,7 +87,7 @@ extra_ogg_sources = \
endif
# see 'http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning' for numbering convention
-libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0 $(LOCAL_EXTRA_LDFLAGS)
+libFLAC_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-info 12:0:0
libFLAC_sources = \
bitmath.c \
@@ -134,5 +124,4 @@ libFLAC_sources = \
libFLAC_la_SOURCES = $(libFLAC_sources)
# needed for test_libFLAC
-libFLAC_static_la_LIBADD = $(LOCAL_EXTRA_LIBADD)
libFLAC_static_la_SOURCES = $(libFLAC_sources)
diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c
index 4b1b371e..cba0ad02 100644
--- a/src/libFLAC/cpu.c
+++ b/src/libFLAC/cpu.c
@@ -57,7 +57,7 @@
#include <sys/auxv.h>
#endif
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
/* these are flags in EDX of CPUID AX=00000001 */
static const uint32_t FLAC__CPUINFO_X86_CPUID_CMOV = 0x00008000;
@@ -97,8 +97,6 @@ cpu_have_cpuid(void)
#if defined FLAC__CPU_X86_64 || defined __i686__ || defined __SSE__ || (defined _M_IX86_FP && _M_IX86_FP > 0)
/* target CPU does have CPUID instruction */
return 1;
-#elif defined FLAC__HAS_NASM
- return FLAC__cpu_have_cpuid_asm_ia32();
#elif defined __GNUC__ && defined HAVE_CPUID_H
if (__get_cpuid_max(0, 0) != 0)
return 1;
@@ -151,9 +149,6 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint
__cpuid_count(level, 0, *eax, *ebx, *ecx, *edx);
return;
}
-#elif defined FLAC__HAS_NASM && defined FLAC__CPU_IA32
- FLAC__cpu_info_asm_ia32(level, eax, ebx, ecx, edx);
- return;
#endif
*eax = *ebx = *ecx = *edx = 0;
}
@@ -163,7 +158,7 @@ cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint
static void
x86_cpu_info (FLAC__CPUInfo *info)
{
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
FLAC__bool x86_osxsave = false;
FLAC__bool os_avx = false;
FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
diff --git a/src/libFLAC/ia32/CMakeLists.txt b/src/libFLAC/ia32/CMakeLists.txt
deleted file mode 100644
index 014f6654..00000000
--- a/src/libFLAC/ia32/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
-add_compile_options(-I${CMAKE_CURRENT_SOURCE_DIR}/)
-
-if(APPLE)
- add_compile_options(-dOBJ_FORMAT_macho)
-elseif(WIN32)
- #add_compile_options(-d OBJ_FORMAT_win32)
- # FIXME the command above doesn't seem to work on Windows
- set(CMAKE_ASM_NASM_FLAGS -dOBJ_FORMAT_win32)
-else()
- add_compile_options(-dOBJ_FORMAT_elf)
-endif()
-
-add_library(FLAC-asm OBJECT
- cpu_asm.nasm
- fixed_asm.nasm
- lpc_asm.nasm)
diff --git a/src/libFLAC/ia32/Makefile.am b/src/libFLAC/ia32/Makefile.am
deleted file mode 100644
index 33409906..00000000
--- a/src/libFLAC/ia32/Makefile.am
+++ /dev/null
@@ -1,46 +0,0 @@
-# libFLAC - Free Lossless Audio Codec library
-# Copyright (C) 2001-2009 Josh Coalson
-# Copyright (C) 2011-2022 Xiph.Org Foundation
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# - Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# - Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-#
-# - Neither the name of the Xiph.org Foundation nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-SUFFIXES = .nasm .lo
-
-STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh
-AM_CPPFLAGS = -I$(top_builddir) -I$(srcdir)/include -I$(top_srcdir)/include
-.nasm.lo:
- $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) $(NASM) -f $(OBJ_FORMAT) -d OBJ_FORMAT_$(OBJ_FORMAT) -i$(srcdir)/ $< -o $@
-
-noinst_LTLIBRARIES = libFLAC-asm.la
-libFLAC_asm_la_SOURCES = \
- cpu_asm.nasm \
- fixed_asm.nasm \
- lpc_asm.nasm \
- nasm.h
-
-EXTRA_DIST = CMakeLists.txt
diff --git a/src/libFLAC/ia32/cpu_asm.nasm b/src/libFLAC/ia32/cpu_asm.nasm
deleted file mode 100644
index b0b2701b..00000000
--- a/src/libFLAC/ia32/cpu_asm.nasm
+++ /dev/null
@@ -1,119 +0,0 @@
-; vim:filetype=nasm ts=8
-
-; libFLAC - Free Lossless Audio Codec library
-; Copyright (C) 2001-2009 Josh Coalson
-; Copyright (C) 2011-2022 Xiph.Org Foundation
-;
-; Redistribution and use in source and binary forms, with or without
-; modification, are permitted provided that the following conditions
-; are met:
-;
-; - Redistributions of source code must retain the above copyright
-; notice, this list of conditions and the following disclaimer.
-;
-; - Redistributions in binary form must reproduce the above copyright
-; notice, this list of conditions and the following disclaimer in the
-; documentation and/or other materials provided with the distribution.
-;
-; - Neither the name of the Xiph.org Foundation nor the names of its
-; contributors may be used to endorse or promote products derived from
-; this software without specific prior written permission.
-;
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
-; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "nasm.h"
-
- data_section
-
-cglobal FLAC__cpu_have_cpuid_asm_ia32
-cglobal FLAC__cpu_info_asm_ia32
-
- code_section
-
-; **********************************************************************
-;
-; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32()
-;
-
-cident FLAC__cpu_have_cpuid_asm_ia32
- pushfd
- pop eax
- mov edx, eax
- xor eax, 0x00200000
- push eax
- popfd
- pushfd
- pop eax
- xor eax, edx
- and eax, 0x00200000
- shr eax, 0x15
- push edx
- popfd
- ret
-
-
-; **********************************************************************
-;
-; void FLAC__cpu_info_asm_ia32(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx)
-;
-
-cident FLAC__cpu_info_asm_ia32
- ;[esp + 8] == level
- ;[esp + 12] == flags_eax
- ;[esp + 16] == flags_ebx
- ;[esp + 20] == flags_ecx
- ;[esp + 24] == flags_edx
-
- push ebx
- call FLAC__cpu_have_cpuid_asm_ia32
- test eax, eax
- jz .no_cpuid
-
- mov eax, [esp + 8]
- and eax, 0x80000000
- cpuid
- cmp eax, [esp + 8]
- jb .no_cpuid
- xor ecx, ecx
- mov eax, [esp + 8]
- cpuid
-
- push ebx
- ;[esp + 16] == flags_eax
- mov ebx, [esp + 16]
- mov [ebx], eax
- pop eax
- ;[esp + 16] == flags_ebx
- mov ebx, [esp + 16]
- mov [ebx], eax
- mov ebx, [esp + 20]
- mov [ebx], ecx
- mov ebx, [esp + 24]
- mov [ebx], edx
- jmp .end
-
-.no_cpuid:
- xor eax, eax
- mov ebx, [esp + 12]
- mov [ebx], eax
- mov ebx, [esp + 16]
- mov [ebx], eax
- mov ebx, [esp + 20]
- mov [ebx], eax
- mov ebx, [esp + 24]
- mov [ebx], eax
-.end:
- pop ebx
- ret
-
-; end
diff --git a/src/libFLAC/ia32/fixed_asm.nasm b/src/libFLAC/ia32/fixed_asm.nasm
deleted file mode 100644
index be1ee064..00000000
--- a/src/libFLAC/ia32/fixed_asm.nasm
+++ /dev/null
@@ -1,309 +0,0 @@
-; vim:filetype=nasm ts=8
-
-; libFLAC - Free Lossless Audio Codec library
-; Copyright (C) 2001-2009 Josh Coalson
-; Copyright (C) 2011-2022 Xiph.Org Foundation
-;
-; Redistribution and use in source and binary forms, with or without
-; modification, are permitted provided that the following conditions
-; are met:
-;
-; - Redistributions of source code must retain the above copyright
-; notice, this list of conditions and the following disclaimer.
-;
-; - Redistributions in binary form must reproduce the above copyright
-; notice, this list of conditions and the following disclaimer in the
-; documentation and/or other materials provided with the distribution.
-;
-; - Neither the name of the Xiph.org Foundation nor the names of its
-; contributors may be used to endorse or promote products derived from
-; this software without specific prior written permission.
-;
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
-; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "nasm.h"
-
- data_section
-
-cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
-
- code_section
-
-; **********************************************************************
-;
-; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
-; {
-; FLAC__int32 last_error_0 = data[-1];
-; FLAC__int32 last_error_1 = data[-1] - data[-2];
-; FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
-; FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
-; FLAC__int32 error, save;
-; FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
-; unsigned i, order;
-;
-; for(i = 0; i < data_len; i++) {
-; error = data[i] ; total_error_0 += local_abs(error); save = error;
-; error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
-; error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
-; error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
-; error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
-; }
-;
-; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
-; order = 0;
-; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
-; order = 1;
-; else if(total_error_2 < min(total_error_3, total_error_4))
-; order = 2;
-; else if(total_error_3 < total_error_4)
-; order = 3;
-; else
-; order = 4;
-;
-; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
-; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
-; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
-; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
-; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
-;
-; return order;
-; }
- ALIGN 16
-cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
-
- ; esp + 36 == data[]
- ; esp + 40 == data_len
- ; esp + 44 == residual_bits_per_sample[]
-
- push ebp
- push ebx
- push esi
- push edi
- sub esp, byte 16
- ; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
-
- ; ebx == &data[i]
- ; ecx == loop counter (i)
- ; ebp == order
- ; mm0 == total_error_1:total_error_0
- ; mm1 == total_error_2:total_error_3
- ; mm2 == :total_error_4
- ; mm3 == last_error_1:last_error_0
- ; mm4 == last_error_2:last_error_3
-
- mov ecx, [esp + 40] ; ecx = data_len
- test ecx, ecx
- jz near .data_len_is_0
-
- mov ebx, [esp + 36] ; ebx = data[]
- movd mm3, [ebx - 4] ; mm3 = 0:last_error_0
- movd mm2, [ebx - 8] ; mm2 = 0:data[-2]
- movd mm1, [ebx - 12] ; mm1 = 0:data[-3]
- movd mm0, [ebx - 16] ; mm0 = 0:data[-4]
- movq mm5, mm3 ; mm5 = 0:last_error_0
- psubd mm5, mm2 ; mm5 = 0:last_error_1
- punpckldq mm3, mm5 ; mm3 = last_error_1:last_error_0
- psubd mm2, mm1 ; mm2 = 0:data[-2] - data[-3]
- psubd mm5, mm2 ; mm5 = 0:last_error_2
- movq mm4, mm5 ; mm4 = 0:last_error_2
- psubd mm4, mm2 ; mm4 = 0:last_error_2 - (data[-2] - data[-3])
- paddd mm4, mm1 ; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3])
- psubd mm4, mm0 ; mm4 = 0:last_error_3
- punpckldq mm4, mm5 ; mm4 = last_error_2:last_error_3
- pxor mm0, mm0 ; mm0 = total_error_1:total_error_0
- pxor mm1, mm1 ; mm1 = total_error_2:total_error_3
- pxor mm2, mm2 ; mm2 = 0:total_error_4
-
- ALIGN 16
-.loop:
- movd mm7, [ebx] ; mm7 = 0:error_0
- add ebx, byte 4
- movq mm6, mm7 ; mm6 = 0:error_0
- psubd mm7, mm3 ; mm7 = :error_1
- punpckldq mm6, mm7 ; mm6 = error_1:error_0
- movq mm5, mm6 ; mm5 = error_1:error_0
- movq mm7, mm6 ; mm7 = error_1:error_0
- psubd mm5, mm3 ; mm5 = error_2:
- movq mm3, mm6 ; mm3 = error_1:error_0
- psrad mm6, 31
- pxor mm7, mm6
- psubd mm7, mm6 ; mm7 = abs(error_1):abs(error_0)
- paddd mm0, mm7 ; mm0 = total_error_1:total_error_0
- movq mm6, mm5 ; mm6 = error_2:
- psubd mm5, mm4 ; mm5 = error_3:
- punpckhdq mm5, mm6 ; mm5 = error_2:error_3
- movq mm7, mm5 ; mm7 = error_2:error_3
- movq mm6, mm5 ; mm6 = error_2:error_3
- psubd mm5, mm4 ; mm5 = :error_4
- movq mm4, mm6 ; mm4 = error_2:error_3
- psrad mm6, 31
- pxor mm7, mm6
- psubd mm7, mm6 ; mm7 = abs(error_2):abs(error_3)
- paddd mm1, mm7 ; mm1 = total_error_2:total_error_3
- movq mm6, mm5 ; mm6 = :error_4
- psrad mm5, 31
- pxor mm6, mm5
- psubd mm6, mm5 ; mm6 = :abs(error_4)
- paddd mm2, mm6 ; mm2 = :total_error_4
-
- dec ecx
- jnz short .loop
-
-; if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
-; order = 0;
-; else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
-; order = 1;
-; else if(total_error_2 < min(total_error_3, total_error_4))
-; order = 2;
-; else if(total_error_3 < total_error_4)
-; order = 3;
-; else
-; order = 4;
- movq mm3, mm0 ; mm3 = total_error_1:total_error_0
- movd edi, mm2 ; edi = total_error_4
- movd esi, mm1 ; esi = total_error_3
- movd eax, mm0 ; eax = total_error_0
- punpckhdq mm1, mm1 ; mm1 = total_error_2:total_error_2
- punpckhdq mm3, mm3 ; mm3 = total_error_1:total_error_1
- movd edx, mm1 ; edx = total_error_2
- movd ecx, mm3 ; ecx = total_error_1
-
- xor ebx, ebx
- xor ebp, ebp
- inc ebx
- cmp ecx, eax
- cmovb eax, ecx ; eax = min(total_error_0, total_error_1)
- cmovbe ebp, ebx
- inc ebx
- cmp edx, eax
- cmovb eax, edx ; eax = min(total_error_0, total_error_1, total_error_2)
- cmovbe ebp, ebx
- inc ebx
- cmp esi, eax
- cmovb eax, esi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3)
- cmovbe ebp, ebx
- inc ebx
- cmp edi, eax
- cmovb eax, edi ; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4)
- cmovbe ebp, ebx
- movd ebx, mm0 ; ebx = total_error_0
- emms
-
- ; residual_bits_per_sample[0] = (float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
- ; residual_bits_per_sample[1] = (float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
- ; residual_bits_per_sample[2] = (float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
- ; residual_bits_per_sample[3] = (float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
- ; residual_bits_per_sample[4] = (float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
- xor eax, eax
- fild dword [esp + 40] ; ST = data_len (NOTE: assumes data_len is <2gigs)
-.rbps_0:
- test ebx, ebx
- jz .total_error_0_is_0
- fld1 ; ST = 1.0 data_len
- mov [esp], ebx
- mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_0
- mov ebx, [esp + 44]
- fild qword [esp] ; ST = total_error_0 1.0 data_len
- fdiv st2 ; ST = total_error_0/data_len 1.0 data_len
- fldln2 ; ST = ln2 total_error_0/data_len 1.0 data_len
- fmulp st1 ; ST = ln2*total_error_0/data_len 1.0 data_len
- fyl2x ; ST = log2(ln2*total_error_0/data_len) data_len
- fstp dword [ebx] ; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len) ST = data_len
- jmp short .rbps_1
-.total_error_0_is_0:
- mov ebx, [esp + 44]
- mov [ebx], eax ; residual_bits_per_sample[0] = 0.0
-.rbps_1:
- test ecx, ecx
- jz .total_error_1_is_0
- fld1 ; ST = 1.0 data_len
- mov [esp], ecx
- mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_1
- fild qword [esp] ; ST = total_error_1 1.0 data_len
- fdiv st2 ; ST = total_error_1/data_len 1.0 data_len
- fldln2 ; ST = ln2 total_error_1/data_len 1.0 data_len
- fmulp st1 ; ST = ln2*total_error_1/data_len 1.0 data_len
- fyl2x ; ST = log2(ln2*total_error_1/data_len) data_len
- fstp dword [ebx + 4] ; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len) ST = data_len
- jmp short .rbps_2
-.total_error_1_is_0:
- mov [ebx + 4], eax ; residual_bits_per_sample[1] = 0.0
-.rbps_2:
- test edx, edx
- jz .total_error_2_is_0
- fld1 ; ST = 1.0 data_len
- mov [esp], edx
- mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_2
- fild qword [esp] ; ST = total_error_2 1.0 data_len
- fdiv st2 ; ST = total_error_2/data_len 1.0 data_len
- fldln2 ; ST = ln2 total_error_2/data_len 1.0 data_len
- fmulp st1 ; ST = ln2*total_error_2/data_len 1.0 data_len
- fyl2x ; ST = log2(ln2*total_error_2/data_len) data_len
- fstp dword [ebx + 8] ; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len) ST = data_len
- jmp short .rbps_3
-.total_error_2_is_0:
- mov [ebx + 8], eax ; residual_bits_per_sample[2] = 0.0
-.rbps_3:
- test esi, esi
- jz .total_error_3_is_0
- fld1 ; ST = 1.0 data_len
- mov [esp], esi
- mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_3
- fild qword [esp] ; ST = total_error_3 1.0 data_len
- fdiv st2 ; ST = total_error_3/data_len 1.0 data_len
- fldln2 ; ST = ln2 total_error_3/data_len 1.0 data_len
- fmulp st1 ; ST = ln2*total_error_3/data_len 1.0 data_len
- fyl2x ; ST = log2(ln2*total_error_3/data_len) data_len
- fstp dword [ebx + 12] ; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len) ST = data_len
- jmp short .rbps_4
-.total_error_3_is_0:
- mov [ebx + 12], eax ; residual_bits_per_sample[3] = 0.0
-.rbps_4:
- test edi, edi
- jz .total_error_4_is_0
- fld1 ; ST = 1.0 data_len
- mov [esp], edi
- mov [esp + 4], eax ; [esp] = (FLAC__uint64)total_error_4
- fild qword [esp] ; ST = total_error_4 1.0 data_len
- fdiv st2 ; ST = total_error_4/data_len 1.0 data_len
- fldln2 ; ST = ln2 total_error_4/data_len 1.0 data_len
- fmulp st1 ; ST = ln2*total_error_4/data_len 1.0 data_len
- fyl2x ; ST = log2(ln2*total_error_4/data_len) data_len
- fstp dword [ebx + 16] ; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len) ST = data_len
- jmp short .rbps_end
-.total_error_4_is_0:
- mov [ebx + 16], eax ; residual_bits_per_sample[4] = 0.0
-.rbps_end:
- fstp st0 ; ST = [empty]
- jmp short .end
-.data_len_is_0:
- ; data_len == 0, so residual_bits_per_sample[*] = 0.0
- xor ebp, ebp
- mov edi, [esp + 44]
- mov [edi], ebp
- mov [edi + 4], ebp
- mov [edi + 8], ebp
- mov [edi + 12], ebp
- mov [edi + 16], ebp
- add ebp, byte 4 ; order = 4
-
-.end:
- mov eax, ebp ; return order
- add esp, byte 16
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-; end
diff --git a/src/libFLAC/ia32/lpc_asm.nasm b/src/libFLAC/ia32/lpc_asm.nasm
deleted file mode 100644
index 8be9e7aa..00000000
--- a/src/libFLAC/ia32/lpc_asm.nasm
+++ /dev/null
@@ -1,727 +0,0 @@
-; vim:filetype=nasm ts=8
-
-; libFLAC - Free Lossless Audio Codec library
-; Copyright (C) 2001-2009 Josh Coalson
-; Copyright (C) 2011-2022 Xiph.Org Foundation
-;
-; Redistribution and use in source and binary forms, with or without
-; modification, are permitted provided that the following conditions
-; are met:
-;
-; - Redistributions of source code must retain the above copyright
-; notice, this list of conditions and the following disclaimer.
-;
-; - Redistributions in binary form must reproduce the above copyright
-; notice, this list of conditions and the following disclaimer in the
-; documentation and/or other materials provided with the distribution.
-;
-; - Neither the name of the Xiph.org Foundation nor the names of its
-; contributors may be used to endorse or promote products derived from
-; this software without specific prior written permission.
-;
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
-; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "nasm.h"
-
- data_section
-
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
-
- code_section
-
-;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
-;
-; for(i = 0; i < data_len; i++) {
-; sum = 0;
-; for(j = 0; j < order; j++)
-; sum += qlp_coeff[j] * data[i-j-1];
-; residual[i] = data[i] - (sum >> lp_quantization);
-; }
-;
- ALIGN 16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
- ;[esp + 40] residual[]
- ;[esp + 36] lp_quantization
- ;[esp + 32] order
- ;[esp + 28] qlp_coeff[]
- ;[esp + 24] data_len
- ;[esp + 20] data[]
-
- ;ASSERT(order > 0)
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov esi, [esp + 20] ; esi = data[]
- mov edi, [esp + 40] ; edi = residual[]
- mov eax, [esp + 32] ; eax = order
- mov ebx, [esp + 24] ; ebx = data_len
-
- test ebx, ebx
- jz near .end ; do nothing if data_len == 0
-.begin:
- cmp eax, byte 1
- jg short .i_1more
-
- mov ecx, [esp + 28]
- mov edx, [ecx] ; edx = qlp_coeff[0]
- mov eax, [esi - 4] ; eax = data[-1]
- mov ecx, [esp + 36] ; cl = lp_quantization
- ALIGN 16
-.i_1_loop_i:
- imul eax, edx
- sar eax, cl
- neg eax
- add eax, [esi]
- mov [edi], eax
- mov eax, [esi]
- add edi, byte 4
- add esi, byte 4
- dec ebx
- jnz .i_1_loop_i
-
- jmp .end
-
-.i_1more:
- cmp eax, byte 32 ; for order <= 32 there is a faster routine
- jbe short .i_32
-
- ; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
- ALIGN 16
-.i_32more_loop_i:
- xor ebp, ebp
- mov ecx, [esp + 32]
- mov edx, ecx
- shl edx, 2
- add edx, [esp + 28]
- neg ecx
- ALIGN 16
-.i_32more_loop_j:
- sub edx, byte 4
- mov eax, [edx]
- imul eax, [esi + 4 * ecx]
- add ebp, eax
- inc ecx
- jnz short .i_32more_loop_j
-
- mov ecx, [esp + 36]
- sar ebp, cl
- neg ebp
- add ebp, [esi]
- mov [edi], ebp
- add esi, byte 4
- add edi, byte 4
-
- dec ebx
- jnz .i_32more_loop_i
-
- jmp .end
-
-.mov_eip_to_eax:
- mov eax, [esp]
- ret
-
-.i_32:
- sub edi, esi
- neg eax
- lea edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
- call .mov_eip_to_eax
-.get_eip0:
- add edx, eax
- inc edx
- mov eax, [esp + 28] ; eax = qlp_coeff[]
- xor ebp, ebp
- jmp edx
-
- mov ecx, [eax + 124]
- imul ecx, [esi - 128]
- add ebp, ecx
- mov ecx, [eax + 120]
- imul ecx, [esi - 124]
- add ebp, ecx
- mov ecx, [eax + 116]
- imul ecx, [esi - 120]
- add ebp, ecx
- mov ecx, [eax + 112]
- imul ecx, [esi - 116]
- add ebp, ecx
- mov ecx, [eax + 108]
- imul ecx, [esi - 112]
- add ebp, ecx
- mov ecx, [eax + 104]
- imul ecx, [esi - 108]
- add ebp, ecx
- mov ecx, [eax + 100]
- imul ecx, [esi - 104]
- add ebp, ecx
- mov ecx, [eax + 96]
- imul ecx, [esi - 100]
- add ebp, ecx
- mov ecx, [eax + 92]
- imul ecx, [esi - 96]
- add ebp, ecx
- mov ecx, [eax + 88]
- imul ecx, [esi - 92]
- add ebp, ecx
- mov ecx, [eax + 84]
- imul ecx, [esi - 88]
- add ebp, ecx
- mov ecx, [eax + 80]
- imul ecx, [esi - 84]
- add ebp, ecx
- mov ecx, [eax + 76]
- imul ecx, [esi - 80]
- add ebp, ecx
- mov ecx, [eax + 72]
- imul ecx, [esi - 76]
- add ebp, ecx
- mov ecx, [eax + 68]
- imul ecx, [esi - 72]
- add ebp, ecx
- mov ecx, [eax + 64]
- imul ecx, [esi - 68]
- add ebp, ecx
- mov ecx, [eax + 60]
- imul ecx, [esi - 64]
- add ebp, ecx
- mov ecx, [eax + 56]
- imul ecx, [esi - 60]
- add ebp, ecx
- mov ecx, [eax + 52]
- imul ecx, [esi - 56]
- add ebp, ecx
- mov ecx, [eax + 48]
- imul ecx, [esi - 52]
- add ebp, ecx
- mov ecx, [eax + 44]
- imul ecx, [esi - 48]
- add ebp, ecx
- mov ecx, [eax + 40]
- imul ecx, [esi - 44]
- add ebp, ecx
- mov ecx, [eax + 36]
- imul ecx, [esi - 40]
- add ebp, ecx
- mov ecx, [eax + 32]
- imul ecx, [esi - 36]
- add ebp, ecx
- mov ecx, [eax + 28]
- imul ecx, [esi - 32]
- add ebp, ecx
- mov ecx, [eax + 24]
- imul ecx, [esi - 28]
- add ebp, ecx
- mov ecx, [eax + 20]
- imul ecx, [esi - 24]
- add ebp, ecx
- mov ecx, [eax + 16]
- imul ecx, [esi - 20]
- add ebp, ecx
- mov ecx, [eax + 12]
- imul ecx, [esi - 16]
- add ebp, ecx
- mov ecx, [eax + 8]
- imul ecx, [esi - 12]
- add ebp, ecx
- mov ecx, [eax + 4]
- imul ecx, [esi - 8]
- add ebp, ecx
- mov ecx, [eax] ; there is one byte missing
- imul ecx, [esi - 4]
- add ebp, ecx
-.jumper_0:
-
- mov ecx, [esp + 36]
- sar ebp, cl
- neg ebp
- add ebp, [esi]
- mov [edi + esi], ebp
- add esi, byte 4
-
- dec ebx
- jz short .end
- xor ebp, ebp
- jmp edx
-
-.end:
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel and qlp_coeffs must be <= 16. Especially note that this routine
-; cannot be used for side-channel coded 16bps channels since the effective bps
-; is 17.
- ALIGN 16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
- ;[esp + 40] residual[]
- ;[esp + 36] lp_quantization
- ;[esp + 32] order
- ;[esp + 28] qlp_coeff[]
- ;[esp + 24] data_len
- ;[esp + 20] data[]
-
- ;ASSERT(order > 0)
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov esi, [esp + 20] ; esi = data[]
- mov edi, [esp + 40] ; edi = residual[]
- mov eax, [esp + 32] ; eax = order
- mov ebx, [esp + 24] ; ebx = data_len
-
- test ebx, ebx
- jz near .end ; do nothing if data_len == 0
- dec ebx
- test ebx, ebx
- jz near .last_one
-
- mov edx, [esp + 28] ; edx = qlp_coeff[]
- movd mm6, [esp + 36] ; mm6 = 0:lp_quantization
- mov ebp, esp
-
- and esp, 0xfffffff8
-
- xor ecx, ecx
-.copy_qlp_loop:
- push word [edx + 4 * ecx]
- inc ecx
- cmp ecx, eax
- jnz short .copy_qlp_loop
-
- and ecx, 0x3
- test ecx, ecx
- je short .za_end
- sub ecx, byte 4
-.za_loop:
- push word 0
- inc eax
- inc ecx
- jnz short .za_loop
-.za_end:
-
- movq mm5, [esp + 2 * eax - 8]
- movd mm4, [esi - 16]
- punpckldq mm4, [esi - 12]
- movd mm0, [esi - 8]
- punpckldq mm0, [esi - 4]
- packssdw mm4, mm0
-
- cmp eax, byte 4
- jnbe short .mmx_4more
-
- ALIGN 16
-.mmx_4_loop_i:
- movd mm1, [esi]
- movq mm3, mm4
- punpckldq mm1, [esi + 4]
- psrlq mm4, 16
- movq mm0, mm1
- psllq mm0, 48
- por mm4, mm0
- movq mm2, mm4
- psrlq mm4, 16
- pxor mm0, mm0
- punpckhdq mm0, mm1
- pmaddwd mm3, mm5
- pmaddwd mm2, mm5
- psllq mm0, 16
- por mm4, mm0
- movq mm0, mm3
- punpckldq mm3, mm2
- punpckhdq mm0, mm2
- paddd mm3, mm0
- psrad mm3, mm6
- psubd mm1, mm3
- movd [edi], mm1
- punpckhdq mm1, mm1
- movd [edi + 4], mm1
-
- add edi, byte 8
- add esi, byte 8
-
- sub ebx, 2
- jg .mmx_4_loop_i
- jmp .mmx_end
-
-.mmx_4more:
- shl eax, 2
- neg eax
- add eax, byte 16
-
- ALIGN 16
-.mmx_4more_loop_i:
- movd mm1, [esi]
- punpckldq mm1, [esi + 4]
- movq mm3, mm4
- psrlq mm4, 16
- movq mm0, mm1
- psllq mm0, 48
- por mm4, mm0
- movq mm2, mm4
- psrlq mm4, 16
- pxor mm0, mm0
- punpckhdq mm0, mm1
- pmaddwd mm3, mm5
- pmaddwd mm2, mm5
- psllq mm0, 16
- por mm4, mm0
-
- mov ecx, esi
- add ecx, eax
- mov edx, esp
-
- ALIGN 16
-.mmx_4more_loop_j:
- movd mm0, [ecx - 16]
- movd mm7, [ecx - 8]
- punpckldq mm0, [ecx - 12]
- punpckldq mm7, [ecx - 4]
- packssdw mm0, mm7
- pmaddwd mm0, [edx]
- punpckhdq mm7, mm7
- paddd mm3, mm0
- movd mm0, [ecx - 12]
- punpckldq mm0, [ecx - 8]
- punpckldq mm7, [ecx]
- packssdw mm0, mm7
- pmaddwd mm0, [edx]
- paddd mm2, mm0
-
- add edx, byte 8
- add ecx, byte 16
- cmp ecx, esi
- jnz .mmx_4more_loop_j
-
- movq mm0, mm3
- punpckldq mm3, mm2
- punpckhdq mm0, mm2
- paddd mm3, mm0
- psrad mm3, mm6
- psubd mm1, mm3
- movd [edi], mm1
- punpckhdq mm1, mm1
- movd [edi + 4], mm1
-
- add edi, byte 8
- add esi, byte 8
-
- sub ebx, 2
- jg near .mmx_4more_loop_i
-
-.mmx_end:
- emms
- mov esp, ebp
-.last_one:
- mov eax, [esp + 32]
- inc ebx
- jnz near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin
-
-.end:
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-; **********************************************************************
-;
-;void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
-; {
-; unsigned i, j;
-; FLAC__int64 sum;
-;
-; FLAC__ASSERT(order > 0);
-;
-; for(i = 0; i < data_len; i++) {
-; sum = 0;
-; for(j = 0; j < order; j++)
-; sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
-; residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
-; }
-; }
- ALIGN 16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
- ;[esp + 40] residual[]
- ;[esp + 36] lp_quantization
- ;[esp + 32] order
- ;[esp + 28] qlp_coeff[]
- ;[esp + 24] data_len
- ;[esp + 20] data[]
-
- ;ASSERT(order > 0)
- ;ASSERT(order <= 32)
- ;ASSERT(lp_quantization <= 31)
-
- push ebp
- push ebx
- push esi
- push edi
-
- mov ebx, [esp + 24] ; ebx = data_len
- test ebx, ebx
- jz near .end ; do nothing if data_len == 0
-
-.begin:
- mov eax, [esp + 32] ; eax = order
- cmp eax, 1
- jg short .i_32
-
- mov esi, [esp + 40] ; esi = residual[]
- mov edi, [esp + 20] ; edi = data[]
- mov ecx, [esp + 28] ; ecx = qlp_coeff[]
- mov ebp, [ecx] ; ebp = qlp_coeff[0]
- mov eax, [edi - 4] ; eax = data[-1]
- mov ecx, [esp + 36] ; cl = lp_quantization
- ALIGN 16
-.i_1_loop_i:
- imul ebp ; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
- shrd eax, edx, cl ; 0 <= lp_quantization <= 15
- neg eax
- add eax, [edi]
- mov [esi], eax
- mov eax, [edi]
- add esi, 4
- add edi, 4
- dec ebx
- jnz .i_1_loop_i
- jmp .end
-
-.mov_eip_to_eax:
- mov eax, [esp]
- ret
-
-.i_32: ; eax = order
- neg eax
- add eax, eax
- lea ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
- call .mov_eip_to_eax
-.get_eip0:
- add ebp, eax
- inc ebp ; compensate for the shorter opcode on the last iteration
-
- mov ebx, [esp + 28] ; ebx = qlp_coeff[]
- mov edi, [esp + 20] ; edi = data[]
- sub [esp + 40], edi ; residual[] -= data[]
-
- xor ecx, ecx
- xor esi, esi
- jmp ebp
-
-;eax = --
-;edx = --
-;ecx = 0
-;esi = 0
-;
-;ebx = qlp_coeff[]
-;edi = data[]
-;ebp = @address
-
- mov eax, [ebx + 124] ; eax = qlp_coeff[31]
- imul dword [edi - 128] ; edx:eax = qlp_coeff[31] * data[i-32]
- add ecx, eax
- adc esi, edx ; sum += qlp_coeff[31] * data[i-32]
-
- mov eax, [ebx + 120] ; eax = qlp_coeff[30]
- imul dword [edi - 124] ; edx:eax = qlp_coeff[30] * data[i-31]
- add ecx, eax
- adc esi, edx ; sum += qlp_coeff[30] * data[i-31]
-
- mov eax, [ebx + 116]
- imul dword [edi - 120]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 112]
- imul dword [edi - 116]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 108]
- imul dword [edi - 112]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 104]
- imul dword [edi - 108]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 100]
- imul dword [edi - 104]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 96]
- imul dword [edi - 100]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 92]
- imul dword [edi - 96]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 88]
- imul dword [edi - 92]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 84]
- imul dword [edi - 88]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 80]
- imul dword [edi - 84]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 76]
- imul dword [edi - 80]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 72]
- imul dword [edi - 76]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 68]
- imul dword [edi - 72]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 64]
- imul dword [edi - 68]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 60]
- imul dword [edi - 64]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 56]
- imul dword [edi - 60]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 52]
- imul dword [edi - 56]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 48]
- imul dword [edi - 52]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 44]
- imul dword [edi - 48]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 40]
- imul dword [edi - 44]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 36]
- imul dword [edi - 40]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 32]
- imul dword [edi - 36]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 28]
- imul dword [edi - 32]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 24]
- imul dword [edi - 28]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 20]
- imul dword [edi - 24]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 16]
- imul dword [edi - 20]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 12]
- imul dword [edi - 16]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 8]
- imul dword [edi - 12]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx + 4]
- imul dword [edi - 8]
- add ecx, eax
- adc esi, edx
-
- mov eax, [ebx] ; eax = qlp_coeff[ 0] (NOTE: one byte missing from instruction)
- imul dword [edi - 4] ; edx:eax = qlp_coeff[ 0] * data[i- 1]
- add ecx, eax
- adc esi, edx ; sum += qlp_coeff[ 0] * data[i- 1]
-
-.jumper_0:
- mov edx, ecx
-;esi:edx = sum
- mov ecx, [esp + 36] ; cl = lp_quantization
- shrd edx, esi, cl ; edx = (sum >> lp_quantization)
-;eax = --
-;ecx = --
-;edx = sum >> lp_q
-;esi = --
- neg edx ; edx = -(sum >> lp_quantization)
- mov eax, [esp + 40] ; residual[] - data[]
- add edx, [edi] ; edx = data[i] - (sum >> lp_quantization)
- mov [edi + eax], edx
- add edi, 4
-
- dec dword [esp + 24]
- jz short .end
- xor ecx, ecx
- xor esi, esi
- jmp ebp
-
-.end:
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-; end
diff --git a/src/libFLAC/ia32/nasm.h b/src/libFLAC/ia32/nasm.h
deleted file mode 100644
index cdb8bf55..00000000
--- a/src/libFLAC/ia32/nasm.h
+++ /dev/null
@@ -1,95 +0,0 @@
-; libFLAC - Free Lossless Audio Codec library
-; Copyright (C) 2001-2009 Josh Coalson
-; Copyright (C) 2011-2022 Xiph.Org Foundation
-;
-; Redistribution and use in source and binary forms, with or without
-; modification, are permitted provided that the following conditions
-; are met:
-;
-; - Redistributions of source code must retain the above copyright
-; notice, this list of conditions and the following disclaimer.
-;
-; - Redistributions in binary form must reproduce the above copyright
-; notice, this list of conditions and the following disclaimer in the
-; documentation and/or other materials provided with the distribution.
-;
-; - Neither the name of the Xiph.org Foundation nor the names of its
-; contributors may be used to endorse or promote products derived from
-; this software without specific prior written permission.
-;
-; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
-; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- bits 32
-
-%ifdef OBJ_FORMAT_win32
- %define FLAC__PUBLIC_NEEDS_UNDERSCORE
- %idefine code_section section .text align=16 class=CODE use32
- %idefine data_section section .data align=32 class=DATA use32
- %idefine bss_section section .bss align=32 class=DATA use32
-%elifdef OBJ_FORMAT_aout
- %define FLAC__PUBLIC_NEEDS_UNDERSCORE
- %idefine code_section section .text
- %idefine data_section section .data
- %idefine bss_section section .bss
-%elifdef OBJ_FORMAT_aoutb
- %define FLAC__PUBLIC_NEEDS_UNDERSCORE
- %idefine code_section section .text
- %idefine data_section section .data
- %idefine bss_section section .bss
-%elifdef OBJ_FORMAT_coff
- %define FLAC__PUBLIC_NEEDS_UNDERSCORE
- %idefine code_section section .text
- %idefine data_section section .data
- %idefine bss_section section .bss
-%elifdef OBJ_FORMAT_macho
- %define FLAC__PUBLIC_NEEDS_UNDERSCORE
- %idefine code_section section .text
- %idefine data_section section .data
- %idefine bss_section section .bss
-%elifdef OBJ_FORMAT_elf
- %idefine code_section section .text align=16
- %idefine data_section section .data align=32
- %idefine bss_section section .bss align=32
-%else
- %error unsupported object format! ; this directive doesn't really work here
-%endif
-
-%imacro cglobal 1
- %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
- global _%1
- %else
- %if __NASM_MAJOR__ >= 2
- global %1:function hidden
- %else
- global %1
- %endif
- %endif
-%endmacro
-
-%imacro cextern 1
- %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
- extern _%1
- %else
- extern %1
- %endif
-%endmacro
-
-%imacro cident 1
-_%1:
-%1:
-%endmacro
-
-%ifdef OBJ_FORMAT_elf
-section .note.GNU-stack progbits noalloc noexec nowrite align=1
-%endif
-
diff --git a/src/libFLAC/include/private/fixed.h b/src/libFLAC/include/private/fixed.h
index aa742cac..68024b03 100644
--- a/src/libFLAC/include/private/fixed.h
+++ b/src/libFLAC/include/private/fixed.h
@@ -69,9 +69,6 @@ uint32_t FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[
uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
# endif
# endif
-# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
-uint32_t FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
-# endif
# endif
#else
uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h
index 5212b35e..8ff3b894 100644
--- a/src/libFLAC/include/private/lpc.h
+++ b/src/libFLAC/include/private/lpc.h
@@ -174,13 +174,6 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon(const FLAC__in
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
# endif
-# ifdef FLAC__CPU_IA32
-# ifdef FLAC__HAS_NASM
-void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
-# endif
-# endif
# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index aae0bf36..2f0b1e31 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -949,20 +949,6 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
if(encoder->private_->cpuinfo.use_asm) {
# ifdef FLAC__CPU_IA32
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
-# ifdef FLAC__HAS_NASM
- encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
- if (encoder->private_->cpuinfo.x86.mmx) {
- encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
- encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx;
- }
- else {
- encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
- encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
- }
-
- if (encoder->private_->cpuinfo.x86.mmx && encoder->private_->cpuinfo.x86.cmov)
- encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
-# endif /* FLAC__HAS_NASM */
# if FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
if (encoder->private_->cpuinfo.x86.sse2) {
diff --git a/strip_non_asm_libtool_args.sh b/strip_non_asm_libtool_args.sh
deleted file mode 100755
index d5a61f15..00000000
--- a/strip_non_asm_libtool_args.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/sh
-#
-# libtool assumes that the compiler can handle the -fPIC flag.
-# This isn't always true (for example, nasm can't handle it).
-# Also, on some versions of OS X it tries to pass -fno-common
-# to 'as' which causes problems.
-command=""
-while [ $1 ]; do
- if [ "$1" != "-fPIC" ]; then
- if [ "$1" != "-DPIC" ]; then
- if [ "$1" != "-fno-common" ]; then
- command="$command $1"
- fi
- fi
- fi
- shift
-done
-echo $command
-exec $command