diff options
-rw-r--r-- | build/compile.mk | 15 | ||||
-rw-r--r-- | build/exe.mk | 8 | ||||
-rw-r--r-- | build/lib.mk | 9 | ||||
-rw-r--r-- | configure.ac | 27 | ||||
-rw-r--r-- | src/libFLAC/Makefile.am | 23 | ||||
-rw-r--r-- | src/libFLAC/Makefile.lite | 5 | ||||
-rw-r--r-- | src/libFLAC/cpu.c | 96 | ||||
-rw-r--r-- | src/libFLAC/include/private/cpu.h | 8 | ||||
-rw-r--r-- | src/libFLAC/include/private/lpc.h | 5 | ||||
-rw-r--r-- | src/libFLAC/ppc/Makefile.am | 32 | ||||
-rw-r--r-- | src/libFLAC/ppc/as/Makefile.am | 53 | ||||
-rw-r--r-- | src/libFLAC/ppc/as/lpc_asm.s | 430 | ||||
-rw-r--r-- | src/libFLAC/ppc/gas/Makefile.am | 53 | ||||
-rw-r--r-- | src/libFLAC/ppc/gas/lpc_asm.s | 432 | ||||
-rw-r--r-- | src/libFLAC/stream_decoder.c | 23 |
15 files changed, 13 insertions, 1206 deletions
diff --git a/build/compile.mk b/build/compile.mk index fb243442..98e128af 100644 --- a/build/compile.mk +++ b/build/compile.mk @@ -39,21 +39,6 @@ %.debug.i %.release.i : %.cpp $(CCC) $(CXXFLAGS) -E $< -o $@ -%.debug.o %.release.o : %.s -ifeq ($(OS),Darwin) - #$(CC) -c -arch $(PROC) -Wall -force_cpusubtype_ALL $< -o $@ - $(AS) -arch $(PROC) -force_cpusubtype_ALL $< -o $@ -else - $(AS) $< -o $@ -endif -%.debug.pic.o %.release.pic.o : %.s -ifeq ($(OS),Darwin) - #$(CC) -c -arch $(PROC) -Wall -force_cpusubtype_ALL $< -o $@ - $(AS) -arch $(PROC) -force_cpusubtype_ALL $< -o $@ -else - $(AS) $< -o $@ -endif - %.debug.o : %.nasm $(NASM) -f elf -d OBJ_FORMAT_elf -i ia32/ -g $< -o $@ %.release.o : %.nasm diff --git a/build/exe.mk b/build/exe.mk index 576c3c0d..a44a9a9f 100644 --- a/build/exe.mk +++ b/build/exe.mk @@ -51,11 +51,11 @@ CXXFLAGS = $(CFLAGS) LFLAGS = -L$(LIBPATH) -DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o) $(SRCS_S:%.s=%.debug.o) -RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o) $(SRCS_S:%.s=%.release.o) +DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o) +RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o) ifeq ($(PROC),x86_64) -DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o) $(SRCS_S:%.s=%.debug.pic.o) -RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o) $(SRCS_S:%.s=%.release.pic.o) +DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o) +RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o) endif debug : $(DEBUG_PROGRAM) diff --git a/build/lib.mk b/build/lib.mk index 5d81f8f5..3cda5d09 100644 --- a/build/lib.mk +++ b/build/lib.mk @@ -29,7 +29,6 @@ else CC = gcc CCC = g++ endif -AS = as NASM = nasm LINK = ar cru OBJPATH = $(topdir)/objs @@ -71,11 +70,11 @@ CXXFLAGS = $(CFLAGS) LFLAGS = -L$(LIBPATH) -DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o) $(SRCS_S:%.s=%.debug.o) -RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o) $(SRCS_S:%.s=%.release.o) +DEBUG_OBJS = $(SRCS_C:%.c=%.debug.o) $(SRCS_CC:%.cc=%.debug.o) $(SRCS_CPP:%.cpp=%.debug.o) $(SRCS_NASM:%.nasm=%.debug.o) +RELEASE_OBJS = $(SRCS_C:%.c=%.release.o) $(SRCS_CC:%.cc=%.release.o) $(SRCS_CPP:%.cpp=%.release.o) $(SRCS_NASM:%.nasm=%.release.o) ifeq ($(PROC),x86_64) -DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o) $(SRCS_S:%.s=%.debug.pic.o) -RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o) $(SRCS_S:%.s=%.release.pic.o) +DEBUG_PIC_OBJS = $(SRCS_C:%.c=%.debug.pic.o) $(SRCS_CC:%.cc=%.debug.pic.o) $(SRCS_CPP:%.cpp=%.debug.pic.o) $(SRCS_NASM:%.nasm=%.debug.pic.o) +RELEASE_PIC_OBJS = $(SRCS_C:%.c=%.release.pic.o) $(SRCS_CC:%.cc=%.release.pic.o) $(SRCS_CPP:%.cpp=%.release.pic.o) $(SRCS_NASM:%.nasm=%.release.pic.o) endif debug : $(DEBUG_STATIC_LIB) $(DEBUG_DYNAMIC_LIB) diff --git a/configure.ac b/configure.ac index f74bb1d6..db5402d8 100644 --- a/configure.ac +++ b/configure.ac @@ -358,25 +358,6 @@ AC_DEFINE(FLAC__HAS_NASM) AH_TEMPLATE(FLAC__HAS_NASM, [define if you are compiling for x86 and have the NASM assembler]) fi -# only matters for PowerPC -AC_CHECK_PROGS(AS, as, as) -AC_CHECK_PROGS(GAS, gas, gas) - -# try -v (apple as) and --version (gas) at the same time -test "$AS" = "as" && as --version -v < /dev/null 2>&1 | grep Apple >/dev/null || AS=gas - -AM_CONDITIONAL(FLaC__HAS_AS, test "$AS" = "as") -AM_CONDITIONAL(FLaC__HAS_GAS, test "$AS" = "gas") -if test "$AS" = "as" ; then -AC_DEFINE(FLAC__HAS_AS) -AH_TEMPLATE(FLAC__HAS_AS, [define if you are compiling for PowerPC and have the 'as' assembler]) -fi -if test "$AS" = "gas" ; then -# funniest. macro. ever. -AC_DEFINE(FLAC__HAS_GAS) -AH_TEMPLATE(FLAC__HAS_GAS, [define if you are compiling for PowerPC and have the 'gas' assembler]) -fi - if test "x$debug" = xtrue; then CPPFLAGS="-DDEBUG $CPPFLAGS" CFLAGS=$(echo "$CFLAGS" | sed 's/-g//') @@ -431,25 +412,17 @@ if test x$enable_werror = "xyes" ; then AC_LANG_POP([C++]) fi - if test x$enable_stack_smash_protection = "xyes" ; then XIPH_GCC_STACK_PROTECTOR XIPH_GXX_STACK_PROTECTOR fi -#@@@ -AM_CONDITIONAL(FLaC__HAS_AS__TEMPORARILY_DISABLED, test "yes" = "no") -AM_CONDITIONAL(FLaC__HAS_GAS__TEMPORARILY_DISABLED, test "yes" = "no") - AC_CONFIG_FILES([ \ Makefile \ src/Makefile \ src/libFLAC/Makefile \ src/libFLAC/flac.pc \ src/libFLAC/ia32/Makefile \ - src/libFLAC/ppc/Makefile \ - src/libFLAC/ppc/as/Makefile \ - src/libFLAC/ppc/gas/Makefile \ src/libFLAC/include/Makefile \ src/libFLAC/include/private/Makefile \ src/libFLAC/include/protected/Makefile \ diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am index 0fcd64ee..473473d3 100644 --- a/src/libFLAC/Makefile.am +++ b/src/libFLAC/Makefile.am @@ -39,19 +39,13 @@ endif # FIXME: The following logic should be part of configure, not of Makefile.am if FLaC__CPU_PPC -# The -force_cpusubtype_ALL is needed to insert a ppc64 instruction -# into cpu.c with an asm(). if FLaC__SYS_DARWIN -#@@@ PPC optimizations temporarily disabled -CPUCFLAGS = -faltivec -force_cpusubtype_ALL -DFLAC__NO_ASM +CPUCFLAGS = -faltivec else -# Linux-gcc for PPC does not have -force_cpusubtype_ALL, it is Darwin-specific CPUCFLAGS = if FLaC__USE_ALTIVEC CPUCFLAGS += -maltivec -mabi=altivec endif -#@@@ PPC optimizations temporarily disabled -CPUCFLAGS += -DFLAC__NO_ASM endif endif @@ -65,21 +59,6 @@ ARCH_SUBDIRS = ia32 LOCAL_EXTRA_LIBADD = ia32/libFLAC-asm.la endif endif -if FLaC__CPU_PPC -ARCH_SUBDIRS = ppc -if FLaC__HAS_AS__TEMPORARILY_DISABLED -if FLaC__CPU_PPC_SPE -else -LOCAL_EXTRA_LIBADD = ppc/as/libFLAC-asm.la -LOCAL_EXTRA_LDFLAGS = "-Wl,-read_only_relocs,warning" -else -if FLaC__HAS_GAS__TEMPORARILY_DISABLED -LOCAL_EXTRA_LIBADD = ppc/gas/libFLAC-asm.la -LOCAL_EXTRA_LDFLAGS = "" -endif -endif -endif -endif endif if OS_IS_WINDOWS diff --git a/src/libFLAC/Makefile.lite b/src/libFLAC/Makefile.lite index cbffc6fc..8f5bf8c1 100644 --- a/src/libFLAC/Makefile.lite +++ b/src/libFLAC/Makefile.lite @@ -62,17 +62,12 @@ endif INCLUDES = -I./include -I$(topdir)/include $(OGG_INCLUDES) DEBUG_CFLAGS = -DFLAC__OVERFLOW_DETECT -ifeq ($(OS),Darwin) -SRCS_S = \ - ppc/as/lpc_asm.s -else ifeq ($(PROC),i386) SRCS_NASM = \ ia32/cpu_asm.nasm \ ia32/fixed_asm.nasm \ ia32/lpc_asm.nasm endif -endif OGG_SRCS_C = \ ogg_decoder_aspect.c \ diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c index 1248d055..790c0157 100644 --- a/src/libFLAC/cpu.c +++ b/src/libFLAC/cpu.c @@ -45,37 +45,8 @@ static void disable_sse(FLAC__CPUInfo *info) { info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false; } -#elif defined FLAC__CPU_PPC -# if !defined FLAC__NO_ASM -# if defined FLAC__SYS_DARWIN -# include <sys/sysctl.h> -# include <mach/mach.h> -# include <mach/mach_host.h> -# include <mach/host_info.h> -# include <mach/machine.h> -# ifndef CPU_SUBTYPE_POWERPC_970 -# define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100) -# endif -# else /* FLAC__SYS_DARWIN */ -# include <signal.h> -# include <setjmp.h> - -static sigjmp_buf jmpbuf; -static volatile sig_atomic_t canjump = 0; - -static void sigill_handler (int sig) -{ - if (!canjump) { - signal (sig, SIG_DFL); - raise (sig); - } - canjump = 0; - siglongjmp (jmpbuf, 1); -} -# endif /* FLAC__SYS_DARWIN */ -# endif /* FLAC__NO_ASM */ -#endif /* FLAC__CPU_PPC */ +#endif #if defined (__NetBSD__) || defined(__OpenBSD__) #include <sys/param.h> @@ -360,71 +331,6 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) #endif /* - * PPC-specific - */ -#elif defined FLAC__CPU_PPC - info->type = FLAC__CPUINFO_TYPE_PPC; -# if !defined FLAC__NO_ASM - info->use_asm = true; -# ifdef FLAC__USE_ALTIVEC -# if defined FLAC__SYS_DARWIN - { - int val = 0, mib[2] = { CTL_HW, HW_VECTORUNIT }; - size_t len = sizeof(val); - info->ppc.altivec = !(sysctl(mib, 2, &val, &len, NULL, 0) || !val); - } - { - host_basic_info_data_t hostInfo; - mach_msg_type_number_t infoCount; - - infoCount = HOST_BASIC_INFO_COUNT; - host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount); - - info->ppc.ppc64 = (hostInfo.cpu_type == CPU_TYPE_POWERPC) && (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970); - } -# else /* FLAC__USE_ALTIVEC && !FLAC__SYS_DARWIN */ - { - /* no Darwin, do it the brute-force way */ - /* @@@@@@ this is not thread-safe; replace with SSE OS method above or remove */ - info->ppc.altivec = 0; - info->ppc.ppc64 = 0; - - signal (SIGILL, sigill_handler); - canjump = 0; - if (!sigsetjmp (jmpbuf, 1)) { - canjump = 1; - - asm volatile ( - "mtspr 256, %0\n\t" - "vand %%v0, %%v0, %%v0" - : - : "r" (-1) - ); - - info->ppc.altivec = 1; - } - canjump = 0; - if (!sigsetjmp (jmpbuf, 1)) { - int x = 0; - canjump = 1; - - /* PPC64 hardware implements the cntlzd instruction */ - asm volatile ("cntlzd %0, %1" : "=r" (x) : "r" (x) ); - - info->ppc.ppc64 = 1; - } - signal (SIGILL, SIG_DFL); /*@@@@@@ should save and restore old signal */ - } -# endif -# else /* !FLAC__USE_ALTIVEC */ - info->ppc.altivec = 0; - info->ppc.ppc64 = 0; -# endif -# else - info->use_asm = false; -# endif - -/* * unknown CPU */ #else diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h index f9b8b041..2bdc9272 100644 --- a/src/libFLAC/include/private/cpu.h +++ b/src/libFLAC/include/private/cpu.h @@ -83,7 +83,6 @@ typedef enum { FLAC__CPUINFO_TYPE_IA32, FLAC__CPUINFO_TYPE_X86_64, - FLAC__CPUINFO_TYPE_PPC, FLAC__CPUINFO_TYPE_UNKNOWN } FLAC__CPUInfo_Type; @@ -111,11 +110,6 @@ typedef struct { FLAC__bool sse41; FLAC__bool sse42; } FLAC__CPUInfo_x86_64; -#elif defined FLAC__CPU_PPC -typedef struct { - FLAC__bool altivec; - FLAC__bool ppc64; -} FLAC__CPUInfo_PPC; #endif typedef struct { @@ -125,8 +119,6 @@ typedef struct { FLAC__CPUInfo_IA32 ia32; #elif defined FLAC__CPU_X86_64 FLAC__CPUInfo_x86_64 x86_64; -#elif defined FLAC__CPU_PPC - FLAC__CPUInfo_PPC ppc; #endif } FLAC__CPUInfo; diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h index 52cbe4dd..a8555a3f 100644 --- a/src/libFLAC/include/private/lpc.h +++ b/src/libFLAC/include/private/lpc.h @@ -196,10 +196,7 @@ void FLAC__lpc_restore_signal_asm_ia32(const FLAC__int32 residual[], unsigned da void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); void FLAC__lpc_restore_signal_wide_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); # endif /* FLAC__HAS_NASM */ -# elif defined FLAC__CPU_PPC -void FLAC__lpc_restore_signal_asm_ppc_altivec_16(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); -void FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); -# endif /* FLAC__CPU_IA32 || FLAC__CPU_PPC */ +# endif /* FLAC__CPU_IA32 */ # if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN # ifdef FLAC__SSE2_SUPPORTED void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); diff --git a/src/libFLAC/ppc/Makefile.am b/src/libFLAC/ppc/Makefile.am deleted file mode 100644 index c652313c..00000000 --- a/src/libFLAC/ppc/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -# libFLAC - Free Lossless Audio Codec library -# Copyright (C) 2004-2009 Josh Coalson -# Copyright (C) 2011-2013 Xiph.Org Foundation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# - Neither the name of the Xiph.org Foundation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -SUBDIRS = as gas diff --git a/src/libFLAC/ppc/as/Makefile.am b/src/libFLAC/ppc/as/Makefile.am deleted file mode 100644 index 366bf49e..00000000 --- a/src/libFLAC/ppc/as/Makefile.am +++ /dev/null @@ -1,53 +0,0 @@ -# libFLAC - Free Lossless Audio Codec library -# Copyright (C) 2004-2009 Josh Coalson -# Copyright (C) 2011-2013 Xiph.Org Foundation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# - Neither the name of the Xiph.org Foundation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#@@@ -if FLaC__HAS_AS__TEMPORARILY_DISABLED - -SUFFIXES = .s .lo - -STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh - -# For some unknown reason libtool can't figure out the tag for 'as', so -# we fake it with --tag=CC and strip out unwanted options. -.s.lo: - $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) as -force_cpusubtype_ALL -o $@ $< - -noinst_LTLIBRARIES = libFLAC-asm.la -libFLAC_asm_la_SOURCES = \ - lpc_asm.s - -else - -EXTRA_DIST = \ - lpc_asm.s - -endif diff --git a/src/libFLAC/ppc/as/lpc_asm.s b/src/libFLAC/ppc/as/lpc_asm.s deleted file mode 100644 index e3c07fc9..00000000 --- a/src/libFLAC/ppc/as/lpc_asm.s +++ /dev/null @@ -1,430 +0,0 @@ -; libFLAC - Free Lossless Audio Codec library -; Copyright (C) 2004-2009 Josh Coalson -; Copyright (C) 2011-2013 Xiph.Org Foundation -; -; Redistribution and use in source and binary forms, with or without -; modification, are permitted provided that the following conditions -; are met: -; -; - Redistributions of source code must retain the above copyright -; notice, this list of conditions and the following disclaimer. -; -; - Redistributions in binary form must reproduce the above copyright -; notice, this list of conditions and the following disclaimer in the -; documentation and/or other materials provided with the distribution. -; -; - Neither the name of the Xiph.org Foundation nor the names of its -; contributors may be used to endorse or promote products derived from -; this software without specific prior written permission. -; -; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -.text - .align 2 -.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16 - -.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8 - -_FLAC__lpc_restore_signal_asm_ppc_altivec_16: -; r3: residual[] -; r4: data_len -; r5: qlp_coeff[] -; r6: order -; r7: lp_quantization -; r8: data[] - -; see src/libFLAC/lpc.c:FLAC__lpc_restore_signal() -; these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual -; bps<=15 for mid-side coding, since that uses an extra bit) - -; these should be fast; the inner loop is unrolled (it takes no more than -; 3*(order%4) instructions, all of which are arithmetic), and all of the -; coefficients and all relevant history stay in registers, so the outer loop -; has only one load from memory (the residual) - -; I have not yet run this through simg4, so there may be some avoidable stalls, -; and there may be a somewhat more clever way to do the outer loop - -; the branch mechanism may prevent dynamic loading; I still need to examine -; this issue, and there may be a more elegant method - - stmw r31,-4(r1) - - addi r9,r1,-28 - li r31,0xf - andc r9,r9,r31 ; for quadword-aligned stack data - - slwi r6,r6,2 ; adjust for word size - slwi r4,r4,2 - add r4,r4,r8 ; r4 = data+data_len - - mfspr r0,256 ; cache old vrsave - addis r31,0,hi16(0xfffffc00) - ori r31,r31,lo16(0xfffffc00) - mtspr 256,r31 ; declare VRs in vrsave - - cmplw cr0,r8,r4 ; i<data_len - bc 4,0,L1400 - - ; load coefficients into v0-v7 and initial history into v8-v15 - li r31,0xf - and r31,r8,r31 ; r31: data%4 - li r11,16 - subf r31,r31,r11 ; r31: 4-(data%4) - slwi r31,r31,3 ; convert to bits for vsro - li r10,-4 - stw r31,-4(r9) - lvewx v0,r10,r9 - vspltisb v18,-1 - vsro v18,v18,v0 ; v18: mask vector - - li r31,0x8 - lvsl v0,0,r31 - vsldoi v0,v0,v0,12 - li r31,0xc - lvsl v1,0,r31 - vspltisb v2,0 - vspltisb v3,-1 - vmrglw v2,v2,v3 - vsel v0,v1,v0,v2 ; v0: reversal permutation vector - - add r10,r5,r6 - lvsl v17,0,r5 ; v17: coefficient alignment permutation vector - vperm v17,v17,v17,v0 ; v17: reversal coefficient alignment permutation vector - - mr r11,r8 - lvsl v16,0,r11 ; v16: history alignment permutation vector - - lvx v0,0,r5 - addi r5,r5,16 - lvx v1,0,r5 - vperm v0,v0,v1,v17 - lvx v8,0,r11 - addi r11,r11,-16 - lvx v9,0,r11 - vperm v8,v9,v8,v16 - cmplw cr0,r5,r10 - bc 12,0,L1101 - vand v0,v0,v18 - addis r31,0,hi16(L1307) - ori r31,r31,lo16(L1307) - b L1199 - -L1101: - addi r5,r5,16 - lvx v2,0,r5 - vperm v1,v1,v2,v17 - addi r11,r11,-16 - lvx v10,0,r11 - vperm v9,v10,v9,v16 - cmplw cr0,r5,r10 - bc 12,0,L1102 - vand v1,v1,v18 - addis r31,0,hi16(L1306) - ori r31,r31,lo16(L1306) - b L1199 - -L1102: - addi r5,r5,16 - lvx v3,0,r5 - vperm v2,v2,v3,v17 - addi r11,r11,-16 - lvx v11,0,r11 - vperm v10,v11,v10,v16 - cmplw cr0,r5,r10 - bc 12,0,L1103 - vand v2,v2,v18 - addis r31,0,hi16(L1305) - ori r31,r31,lo16(L1305) - b L1199 - -L1103: - addi r5,r5,16 - lvx v4,0,r5 - vperm v3,v3,v4,v17 - addi r11,r11,-16 - lvx v12,0,r11 - vperm v11,v12,v11,v16 - cmplw cr0,r5,r10 - bc 12,0,L1104 - vand v3,v3,v18 - addis r31,0,hi16(L1304) - ori r31,r31,lo16(L1304) - b L1199 - -L1104: - addi r5,r5,16 - lvx v5,0,r5 - vperm v4,v4,v5,v17 - addi r11,r11,-16 - lvx v13,0,r11 - vperm v12,v13,v12,v16 - cmplw cr0,r5,r10 - bc 12,0,L1105 - vand v4,v4,v18 - addis r31,0,hi16(L1303) - ori r31,r31,lo16(L1303) - b L1199 - -L1105: - addi r5,r5,16 - lvx v6,0,r5 - vperm v5,v5,v6,v17 - addi r11,r11,-16 - lvx v14,0,r11 - vperm v13,v14,v13,v16 - cmplw cr0,r5,r10 - bc 12,0,L1106 - vand v5,v5,v18 - addis r31,0,hi16(L1302) - ori r31,r31,lo16(L1302) - b L1199 - -L1106: - addi r5,r5,16 - lvx v7,0,r5 - vperm v6,v6,v7,v17 - addi r11,r11,-16 - lvx v15,0,r11 - vperm v14,v15,v14,v16 - cmplw cr0,r5,r10 - bc 12,0,L1107 - vand v6,v6,v18 - addis r31,0,hi16(L1301) - ori r31,r31,lo16(L1301) - b L1199 - -L1107: - addi r5,r5,16 - lvx v19,0,r5 - vperm v7,v7,v19,v17 - addi r11,r11,-16 - lvx v19,0,r11 - vperm v15,v19,v15,v16 - vand v7,v7,v18 - addis r31,0,hi16(L1300) - ori r31,r31,lo16(L1300) - -L1199: - mtctr r31 - - ; set up invariant vectors - vspltish v16,0 ; v16: zero vector - - li r10,-12 - lvsr v17,r10,r8 ; v17: result shift vector - lvsl v18,r10,r3 ; v18: residual shift back vector - - li r10,-4 - stw r7,-4(r9) - lvewx v19,r10,r9 ; v19: lp_quantization vector - -L1200: - vmulosh v20,v0,v8 ; v20: sum vector - bcctr 20,0 - -L1300: - vmulosh v21,v7,v15 - vsldoi v15,v15,v14,4 ; increment history - vaddsws v20,v20,v21 - -L1301: - vmulosh v21,v6,v14 - vsldoi v14,v14,v13,4 - vaddsws v20,v20,v21 - -L1302: - vmulosh v21,v5,v13 - vsldoi v13,v13,v12,4 - vaddsws v20,v20,v21 - -L1303: - vmulosh v21,v4,v12 - vsldoi v12,v12,v11,4 - vaddsws v20,v20,v21 - -L1304: - vmulosh v21,v3,v11 - vsldoi v11,v11,v10,4 - vaddsws v20,v20,v21 - -L1305: - vmulosh v21,v2,v10 - vsldoi v10,v10,v9,4 - vaddsws v20,v20,v21 - -L1306: - vmulosh v21,v1,v9 - vsldoi v9,v9,v8,4 - vaddsws v20,v20,v21 - -L1307: - vsumsws v20,v20,v16 ; v20[3]: sum - vsraw v20,v20,v19 ; v20[3]: sum >> lp_quantization - - lvewx v21,0,r3 ; v21[n]: *residual - vperm v21,v21,v21,v18 ; v21[3]: *residual - vaddsws v20,v21,v20 ; v20[3]: *residual + (sum >> lp_quantization) - vsldoi v18,v18,v18,4 ; increment shift vector - - vperm v21,v20,v20,v17 ; v21[n]: shift for storage - vsldoi v17,v17,v17,12 ; increment shift vector - stvewx v21,0,r8 - - vsldoi v20,v20,v20,12 - vsldoi v8,v8,v20,4 ; insert value onto history - - addi r3,r3,4 - addi r8,r8,4 - cmplw cr0,r8,r4 ; i<data_len - bc 12,0,L1200 - -L1400: - mtspr 256,r0 ; restore old vrsave - lmw r31,-4(r1) - blr - -_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8: -; r3: residual[] -; r4: data_len -; r5: qlp_coeff[] -; r6: order -; r7: lp_quantization -; r8: data[] - -; see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above -; this version assumes order<=8; it uses fewer vector registers, which should -; save time in context switches, and has less code, which may improve -; instruction caching - - stmw r31,-4(r1) - - addi r9,r1,-28 - li r31,0xf - andc r9,r9,r31 ; for quadword-aligned stack data - - slwi r6,r6,2 ; adjust for word size - slwi r4,r4,2 - add r4,r4,r8 ; r4 = data+data_len - - mfspr r0,256 ; cache old vrsave - addis r31,0,hi16(0xffc00000) - ori r31,r31,lo16(0xffc00000) - mtspr 256,r31 ; declare VRs in vrsave - - cmplw cr0,r8,r4 ; i<data_len - bc 4,0,L2400 - - ; load coefficients into v0-v1 and initial history into v2-v3 - li r31,0xf - and r31,r8,r31 ; r31: data%4 - li r11,16 - subf r31,r31,r11 ; r31: 4-(data%4) - slwi r31,r31,3 ; convert to bits for vsro - li r10,-4 - stw r31,-4(r9) - lvewx v0,r10,r9 - vspltisb v6,-1 - vsro v6,v6,v0 ; v6: mask vector - - li r31,0x8 - lvsl v0,0,r31 - vsldoi v0,v0,v0,12 - li r31,0xc - lvsl v1,0,r31 - vspltisb v2,0 - vspltisb v3,-1 - vmrglw v2,v2,v3 - vsel v0,v1,v0,v2 ; v0: reversal permutation vector - - add r10,r5,r6 - lvsl v5,0,r5 ; v5: coefficient alignment permutation vector - vperm v5,v5,v5,v0 ; v5: reversal coefficient alignment permutation vector - - mr r11,r8 - lvsl v4,0,r11 ; v4: history alignment permutation vector - - lvx v0,0,r5 - addi r5,r5,16 - lvx v1,0,r5 - vperm v0,v0,v1,v5 - lvx v2,0,r11 - addi r11,r11,-16 - lvx v3,0,r11 - vperm v2,v3,v2,v4 - cmplw cr0,r5,r10 - bc 12,0,L2101 - vand v0,v0,v6 - addis r31,0,hi16(L2301) - ori r31,r31,lo16(L2301) - b L2199 - -L2101: - addi r5,r5,16 - lvx v7,0,r5 - vperm v1,v1,v7,v5 - addi r11,r11,-16 - lvx v7,0,r11 - vperm v3,v7,v3,v4 - vand v1,v1,v6 - addis r31,0,hi16(L2300) - ori r31,r31,lo16(L2300) - -L2199: - mtctr r31 - - ; set up invariant vectors - vspltish v4,0 ; v4: zero vector - - li r10,-12 - lvsr v5,r10,r8 ; v5: result shift vector - lvsl v6,r10,r3 ; v6: residual shift back vector - - li r10,-4 - stw r7,-4(r9) - lvewx v7,r10,r9 ; v7: lp_quantization vector - -L2200: - vmulosh v8,v0,v2 ; v8: sum vector - bcctr 20,0 - -L2300: - vmulosh v9,v1,v3 - vsldoi v3,v3,v2,4 - vaddsws v8,v8,v9 - -L2301: - vsumsws v8,v8,v4 ; v8[3]: sum - vsraw v8,v8,v7 ; v8[3]: sum >> lp_quantization - - lvewx v9,0,r3 ; v9[n]: *residual - vperm v9,v9,v9,v6 ; v9[3]: *residual - vaddsws v8,v9,v8 ; v8[3]: *residual + (sum >> lp_quantization) - vsldoi v6,v6,v6,4 ; increment shift vector - - vperm v9,v8,v8,v5 ; v9[n]: shift for storage - vsldoi v5,v5,v5,12 ; increment shift vector - stvewx v9,0,r8 - - vsldoi v8,v8,v8,12 - vsldoi v2,v2,v8,4 ; insert value onto history - - addi r3,r3,4 - addi r8,r8,4 - cmplw cr0,r8,r4 ; i<data_len - bc 12,0,L2200 - -L2400: - mtspr 256,r0 ; restore old vrsave - lmw r31,-4(r1) - blr diff --git a/src/libFLAC/ppc/gas/Makefile.am b/src/libFLAC/ppc/gas/Makefile.am deleted file mode 100644 index 81b3d703..00000000 --- a/src/libFLAC/ppc/gas/Makefile.am +++ /dev/null @@ -1,53 +0,0 @@ -# libFLAC - Free Lossless Audio Codec library -# Copyright (C) 2004-2009 Josh Coalson -# Copyright (C) 2011-2013 Xiph.Org Foundation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# - Neither the name of the Xiph.org Foundation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#@@@ -if FLaC__HAS_GAS__TEMPORARILY_DISABLED - -SUFFIXES = .s .lo - -STRIP_NON_ASM = sh $(top_srcdir)/strip_non_asm_libtool_args.sh - -# For some unknown reason libtool can't figure out the tag for 'gas', so -# we fake it with --tag=CC and strip out unwanted options. -.s.lo: - $(LIBTOOL) --tag=CC --mode=compile $(STRIP_NON_ASM) gas -force_cpusubtype_ALL -o $@ $< - -noinst_LTLIBRARIES = libFLAC-asm.la -libFLAC_asm_la_SOURCES = \ - lpc_asm.s - -else - -EXTRA_DIST = \ - lpc_asm.s - -endif diff --git a/src/libFLAC/ppc/gas/lpc_asm.s b/src/libFLAC/ppc/gas/lpc_asm.s deleted file mode 100644 index 77a72bb4..00000000 --- a/src/libFLAC/ppc/gas/lpc_asm.s +++ /dev/null @@ -1,432 +0,0 @@ -# libFLAC - Free Lossless Audio Codec library -# Copyright (C) 2004-2009 Josh Coalson -# Copyright (C) 2011-2013 Xiph.Org Foundation -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# - Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# - Neither the name of the Xiph.org Foundation nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -.text - .align 2 -.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16 -.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16, @function - -.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8 -.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8, @function - -_FLAC__lpc_restore_signal_asm_ppc_altivec_16: -# r3: residual[] -# r4: data_len -# r5: qlp_coeff[] -# r6: order -# r7: lp_quantization -# r8: data[] - -# see src/libFLAC/lpc.c:FLAC__lpc_restore_signal() -# these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual -# bps<=15 for mid-side coding, since that uses an extra bit) - -# these should be fast; the inner loop is unrolled (it takes no more than -# 3*(order%4) instructions, all of which are arithmetic), and all of the -# coefficients and all relevant history stay in registers, so the outer loop -# has only one load from memory (the residual) - -# I have not yet run this through simg4, so there may be some avoidable stalls, -# and there may be a somewhat more clever way to do the outer loop - -# the branch mechanism may prevent dynamic loading; I still need to examine -# this issue, and there may be a more elegant method - - stmw r31,-4(r1) - - addi r9,r1,-28 - li r31,0xf - andc r9,r9,r31 # for quadword-aligned stack data - - slwi r6,r6,2 # adjust for word size - slwi r4,r4,2 - add r4,r4,r8 # r4 = data+data_len - - mfspr r0,256 # cache old vrsave - addis r31,0,0xffff - ori r31,r31,0xfc00 - mtspr 256,r31 # declare VRs in vrsave - - cmplw cr0,r8,r4 # i<data_len - bc 4,0,L1400 - - # load coefficients into v0-v7 and initial history into v8-v15 - li r31,0xf - and r31,r8,r31 # r31: data%4 - li r11,16 - subf r31,r31,r11 # r31: 4-(data%4) - slwi r31,r31,3 # convert to bits for vsro - li r10,-4 - stw r31,-4(r9) - lvewx v0,r10,r9 - vspltisb v18,-1 - vsro v18,v18,v0 # v18: mask vector - - li r31,0x8 - lvsl v0,0,r31 - vsldoi v0,v0,v0,12 - li r31,0xc - lvsl v1,0,r31 - vspltisb v2,0 - vspltisb v3,-1 - vmrglw v2,v2,v3 - vsel v0,v1,v0,v2 # v0: reversal permutation vector - - add r10,r5,r6 - lvsl v17,0,r5 # v17: coefficient alignment permutation vector - vperm v17,v17,v17,v0 # v17: reversal coefficient alignment permutation vector - - mr r11,r8 - lvsl v16,0,r11 # v16: history alignment permutation vector - - lvx v0,0,r5 - addi r5,r5,16 - lvx v1,0,r5 - vperm v0,v0,v1,v17 - lvx v8,0,r11 - addi r11,r11,-16 - lvx v9,0,r11 - vperm v8,v9,v8,v16 - cmplw cr0,r5,r10 - bc 12,0,L1101 - vand v0,v0,v18 - addis r31,0,L1307@ha - ori r31,r31,L1307@l - b L1199 - -L1101: - addi r5,r5,16 - lvx v2,0,r5 - vperm v1,v1,v2,v17 - addi r11,r11,-16 - lvx v10,0,r11 - vperm v9,v10,v9,v16 - cmplw cr0,r5,r10 - bc 12,0,L1102 - vand v1,v1,v18 - addis r31,0,L1306@ha - ori r31,r31,L1306@l - b L1199 - -L1102: - addi r5,r5,16 - lvx v3,0,r5 - vperm v2,v2,v3,v17 - addi r11,r11,-16 - lvx v11,0,r11 - vperm v10,v11,v10,v16 - cmplw cr0,r5,r10 - bc 12,0,L1103 - vand v2,v2,v18 - lis r31,L1305@ha - la r31,L1305@l(r31) - b L1199 - -L1103: - addi r5,r5,16 - lvx v4,0,r5 - vperm v3,v3,v4,v17 - addi r11,r11,-16 - lvx v12,0,r11 - vperm v11,v12,v11,v16 - cmplw cr0,r5,r10 - bc 12,0,L1104 - vand v3,v3,v18 - lis r31,L1304@ha - la r31,L1304@l(r31) - b L1199 - -L1104: - addi r5,r5,16 - lvx v5,0,r5 - vperm v4,v4,v5,v17 - addi r11,r11,-16 - lvx v13,0,r11 - vperm v12,v13,v12,v16 - cmplw cr0,r5,r10 - bc 12,0,L1105 - vand v4,v4,v18 - lis r31,L1303@ha - la r31,L1303@l(r31) - b L1199 - -L1105: - addi r5,r5,16 - lvx v6,0,r5 - vperm v5,v5,v6,v17 - addi r11,r11,-16 - lvx v14,0,r11 - vperm v13,v14,v13,v16 - cmplw cr0,r5,r10 - bc 12,0,L1106 - vand v5,v5,v18 - lis r31,L1302@ha - la r31,L1302@l(r31) - b L1199 - -L1106: - addi r5,r5,16 - lvx v7,0,r5 - vperm v6,v6,v7,v17 - addi r11,r11,-16 - lvx v15,0,r11 - vperm v14,v15,v14,v16 - cmplw cr0,r5,r10 - bc 12,0,L1107 - vand v6,v6,v18 - lis r31,L1301@ha - la r31,L1301@l(r31) - b L1199 - -L1107: - addi r5,r5,16 - lvx v19,0,r5 - vperm v7,v7,v19,v17 - addi r11,r11,-16 - lvx v19,0,r11 - vperm v15,v19,v15,v16 - vand v7,v7,v18 - lis r31,L1300@ha - la r31,L1300@l(r31) - -L1199: - mtctr r31 - - # set up invariant vectors - vspltish v16,0 # v16: zero vector - - li r10,-12 - lvsr v17,r10,r8 # v17: result shift vector - lvsl v18,r10,r3 # v18: residual shift back vector - - li r10,-4 - stw r7,-4(r9) - lvewx v19,r10,r9 # v19: lp_quantization vector - -L1200: - vmulosh v20,v0,v8 # v20: sum vector - bcctr 20,0 - -L1300: - vmulosh v21,v7,v15 - vsldoi v15,v15,v14,4 # increment history - vaddsws v20,v20,v21 - -L1301: - vmulosh v21,v6,v14 - vsldoi v14,v14,v13,4 - vaddsws v20,v20,v21 - -L1302: - vmulosh v21,v5,v13 - vsldoi v13,v13,v12,4 - vaddsws v20,v20,v21 - -L1303: - vmulosh v21,v4,v12 - vsldoi v12,v12,v11,4 - vaddsws v20,v20,v21 - -L1304: - vmulosh v21,v3,v11 - vsldoi v11,v11,v10,4 - vaddsws v20,v20,v21 - -L1305: - vmulosh v21,v2,v10 - vsldoi v10,v10,v9,4 - vaddsws v20,v20,v21 - -L1306: - vmulosh v21,v1,v9 - vsldoi v9,v9,v8,4 - vaddsws v20,v20,v21 - -L1307: - vsumsws v20,v20,v16 # v20[3]: sum - vsraw v20,v20,v19 # v20[3]: sum >> lp_quantization - - lvewx v21,0,r3 # v21[n]: *residual - vperm v21,v21,v21,v18 # v21[3]: *residual - vaddsws v20,v21,v20 # v20[3]: *residual + (sum >> lp_quantization) - vsldoi v18,v18,v18,4 # increment shift vector - - vperm v21,v20,v20,v17 # v21[n]: shift for storage - vsldoi v17,v17,v17,12 # increment shift vector - stvewx v21,0,r8 - - vsldoi v20,v20,v20,12 - vsldoi v8,v8,v20,4 # insert value onto history - - addi r3,r3,4 - addi r8,r8,4 - cmplw cr0,r8,r4 # i<data_len - bc 12,0,L1200 - -L1400: - mtspr 256,r0 # restore old vrsave - lmw r31,-4(r1) - blr - -_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8: -# r3: residual[] -# r4: data_len -# r5: qlp_coeff[] -# r6: order -# r7: lp_quantization -# r8: data[] - -# see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above -# this version assumes order<=8; it uses fewer vector registers, which should -# save time in context switches, and has less code, which may improve -# instruction caching - - stmw r31,-4(r1) - - addi r9,r1,-28 - li r31,0xf - andc r9,r9,r31 # for quadword-aligned stack data - - slwi r6,r6,2 # adjust for word size - slwi r4,r4,2 - add r4,r4,r8 # r4 = data+data_len - - mfspr r0,256 # cache old vrsave - addis r31,0,0xffc0 - ori r31,r31,0x0000 - mtspr 256,r31 # declare VRs in vrsave - - cmplw cr0,r8,r4 # i<data_len - bc 4,0,L2400 - - # load coefficients into v0-v1 and initial history into v2-v3 - li r31,0xf - and r31,r8,r31 # r31: data%4 - li r11,16 - subf r31,r31,r11 # r31: 4-(data%4) - slwi r31,r31,3 # convert to bits for vsro - li r10,-4 - stw r31,-4(r9) - lvewx v0,r10,r9 - vspltisb v6,-1 - vsro v6,v6,v0 # v6: mask vector - - li r31,0x8 - lvsl v0,0,r31 - vsldoi v0,v0,v0,12 - li r31,0xc - lvsl v1,0,r31 - vspltisb v2,0 - vspltisb v3,-1 - vmrglw v2,v2,v3 - vsel v0,v1,v0,v2 # v0: reversal permutation vector - - add r10,r5,r6 - lvsl v5,0,r5 # v5: coefficient alignment permutation vector - vperm v5,v5,v5,v0 # v5: reversal coefficient alignment permutation vector - - mr r11,r8 - lvsl v4,0,r11 # v4: history alignment permutation vector - - lvx v0,0,r5 - addi r5,r5,16 - lvx v1,0,r5 - vperm v0,v0,v1,v5 - lvx v2,0,r11 - addi r11,r11,-16 - lvx v3,0,r11 - vperm v2,v3,v2,v4 - cmplw cr0,r5,r10 - bc 12,0,L2101 - vand v0,v0,v6 - lis r31,L2301@ha - la r31,L2301@l(r31) - b L2199 - -L2101: - addi r5,r5,16 - lvx v7,0,r5 - vperm v1,v1,v7,v5 - addi r11,r11,-16 - lvx v7,0,r11 - vperm v3,v7,v3,v4 - vand v1,v1,v6 - lis r31,L2300@ha - la r31,L2300@l(r31) - -L2199: - mtctr r31 - - # set up invariant vectors - vspltish v4,0 # v4: zero vector - - li r10,-12 - lvsr v5,r10,r8 # v5: result shift vector - lvsl v6,r10,r3 # v6: residual shift back vector - - li r10,-4 - stw r7,-4(r9) - lvewx v7,r10,r9 # v7: lp_quantization vector - -L2200: - vmulosh v8,v0,v2 # v8: sum vector - bcctr 20,0 - -L2300: - vmulosh v9,v1,v3 - vsldoi v3,v3,v2,4 - vaddsws v8,v8,v9 - -L2301: - vsumsws v8,v8,v4 # v8[3]: sum - vsraw v8,v8,v7 # v8[3]: sum >> lp_quantization - - lvewx v9,0,r3 # v9[n]: *residual - vperm v9,v9,v9,v6 # v9[3]: *residual - vaddsws v8,v9,v8 # v8[3]: *residual + (sum >> lp_quantization) - vsldoi v6,v6,v6,4 # increment shift vector - - vperm v9,v8,v8,v5 # v9[n]: shift for storage - vsldoi v5,v5,v5,12 # increment shift vector - stvewx v9,0,r8 - - vsldoi v8,v8,v8,12 - vsldoi v2,v2,v8,4 # insert value onto history - - addi r3,r3,4 - addi r8,r8,4 - cmplw cr0,r8,r4 # i<data_len - bc 12,0,L2200 - -L2400: - mtspr 256,r0 # restore old vrsave - lmw r31,-4(r1) - blr diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c index e238eacc..0a03c1e1 100644 --- a/src/libFLAC/stream_decoder.c +++ b/src/libFLAC/stream_decoder.c @@ -142,8 +142,6 @@ typedef struct FLAC__StreamDecoderPrivate { void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); /* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit): */ void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); - /* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit), AND order <= 8: */ - void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]); void *client_data; FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */ FLAC__BitReader *input; @@ -391,7 +389,6 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal; decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal; - decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal; /* now override with asm where appropriate */ #ifndef FLAC__NO_ASM if(decoder->private_->cpuinfo.use_asm) { @@ -402,19 +399,16 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( if(decoder->private_->cpuinfo.ia32.mmx) { decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx; - decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32_mmx; } else { decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32; - decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ia32; } #endif #ifdef FLAC__HAS_X86INTRIN # if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT_SSE: not faster than ASM/MMX code */ if(decoder->private_->cpuinfo.ia32.sse2) { decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2; - decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2; } # endif # if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT_SSE: faster than asm; TODO: more tests */ @@ -422,12 +416,6 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_( decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41; # endif #endif -#elif defined FLAC__CPU_PPC - FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC); - if(decoder->private_->cpuinfo.ppc.altivec) { - decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ppc_altivec_16; - decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8; - } #endif } #endif @@ -1320,9 +1308,6 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne memset(tmp, 0, sizeof(FLAC__int32)*4); decoder->private_->output[i] = tmp + 4; - /* WATCHOUT: - * minimum of quadword alignment for PPC vector optimizations is REQUIRED: - */ if(!FLAC__memory_alloc_aligned_int32_array(size, &decoder->private_->residual_unaligned[i], &decoder->private_->residual[i])) { decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR; return false; @@ -2664,12 +2649,8 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un if( (FLAC__uint64)order * ((((FLAC__uint64)1)<<bps)-1) * ((1<<subframe->qlp_coeff_precision)-1) < (((FLAC__uint64)-1) << 32) ) */ if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32) - if(bps <= 16 && subframe->qlp_coeff_precision <= 16) { - if(order <= 8) - decoder->private_->local_lpc_restore_signal_16bit_order8(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); - else - decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); - } + if(bps <= 16 && subframe->qlp_coeff_precision <= 16) + decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); else decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order); else |