summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Koleszar <jkoleszar@google.com>2011-08-19 14:06:00 -0400
committerJohn Koleszar <jkoleszar@google.com>2011-12-19 13:37:49 -0800
commit976752507a9d56e367bbc9af250f9d2f72a16928 (patch)
tree8be963f474f22bbf64a851b1e83f31be61f12fd4
parent20573f0640e181e8363fd1efc52f16569715eb97 (diff)
downloadlibvpx-sandbox/jkoleszar/new-rtcd.tar.gz
New RTCD prototype implementationsandbox/jkoleszar/new-rtcd
This is a proof of concept RTCD implementation to replace the current system of nested includes, prototypes, INVOKE macros, etc. Currently only the decoder specific functions are implemented in the new system, and only for x86. Overview: RTCD "functions" are implemented as either a global function pointer or a macro (when only one elligable specialization available). Functions which have RTCD specializations are listed using a simple DSL identifying the function's base name, its prototype, and the architecture extensions that specializations are available for. Advantages over the old system: - No INVOKE macros. A call to an RTCD function looks like an ordinary function call. - No need to pass vtables around. - If there is only one elligable function to call, the function is called directly, rather than indirecting through a function pointer. - Supports the notion of "required" extensions, so in combination with the above, on x86_64 if the best function available is sse2 or lower it will be called directly, since all x86_64 platforms implement sse2. - Elides all references to functions which will never be called, which could reduce binary size. For example if sse2 is required and there are both mmx and sse2 implementations of a certain function, the code will have no link time references to the mmx code. - Significantly easier to add a new function, just one file to edit. Disadvantages: - Requires global writable data (though this is not a new requirement) - 1 new generated source file. TODO: - configure time --disable-* extensions are not yet passed through to vpx_rtcd.h generation. - configure should support specifying required extensions. Change-Id: Iae6edab65315f79c168485c96872641c5aa09d55
-rwxr-xr-xbuild/make/rtcd.sh291
-rw-r--r--libs.mk9
-rw-r--r--vp8/common/blockd.h2
-rw-r--r--vp8/common/rtcd.c12
-rw-r--r--vp8/decoder/arm/arm_dsystemdependent.c50
-rw-r--r--vp8/decoder/arm/armv6/idct_blk_v6.c1
-rw-r--r--vp8/decoder/arm/dequantize_arm.c4
-rw-r--r--vp8/decoder/arm/neon/idct_blk_neon.c1
-rw-r--r--vp8/decoder/decodframe.c12
-rw-r--r--vp8/decoder/dequantize.c3
-rw-r--r--vp8/decoder/dequantize.h85
-rw-r--r--vp8/decoder/generic/dsystemdependent.c18
-rw-r--r--vp8/decoder/idct_blk.c1
-rw-r--r--vp8/decoder/onyxd_int.h5
-rw-r--r--vp8/decoder/rtcd_defs.sh18
-rw-r--r--vp8/decoder/threading.c10
-rw-r--r--vp8/decoder/x86/dequantize_x86.h58
-rw-r--r--vp8/decoder/x86/idct_blk_mmx.c2
-rw-r--r--vp8/decoder/x86/idct_blk_sse2.c1
-rw-r--r--vp8/decoder/x86/x86_dsystemdependent.c36
-rw-r--r--vp8/vp8_common.mk1
-rw-r--r--vp8/vp8dx.mk3
-rw-r--r--vp8/vp8dx_arm.mk1
23 files changed, 354 insertions, 270 deletions
diff --git a/build/make/rtcd.sh b/build/make/rtcd.sh
new file mode 100755
index 000000000..eea5bed82
--- /dev/null
+++ b/build/make/rtcd.sh
@@ -0,0 +1,291 @@
+#!/bin/sh
+self=$0
+
+usage() {
+ cat <<EOF >&2
+Usage: $self [options] FILE
+
+Reads the Run Time CPU Detections definitions from FILE and generates a
+C header file on stdout.
+
+Options:
+ --arch=ARCH Architecture to generate defs for (required)
+ --disable-EXT Disable support for EXT extensions
+ --sym=SYMBOL Unique symbol to use for RTCD initialization function
+
+EOF
+ exit 1
+}
+
+die() {
+ echo "$@" >&2
+ exit 1
+}
+
+die_argument_required() {
+ die "Option $opt requires argument"
+}
+
+for opt; do
+ optval="${opt#*=}"
+ case "$opt" in
+ --arch) die_argument_required;;
+ --arch=*) arch=${optval};;
+ --disable-*) eval "disable_${opt#--disable-}=true";;
+ --sym) die_argument_required;;
+ --sym=*) symbol=${optval};;
+ --rtcd=*) CONFIG_RUNTIME_CPU_DETECT=${optval};;
+ -h|--help)
+ usage
+ ;;
+ -*)
+ die "Unrecognized option: ${opt%%=*}"
+ ;;
+ *)
+ defs_file="$defs_file $opt"
+ ;;
+ esac
+ shift
+done
+for f in $defs_file; do [ -f "$f" ] || usage; done
+[ -n "$arch" ] || usage
+
+#
+# Routines for the RTCD DSL to call
+#
+prototype() {
+ local rtyp="$1"
+ local fn="$2"
+ local args="$3"
+
+ eval "${2}_rtyp='$1'"
+ eval "${2}_args='$3'"
+ ALL_FUNCS="$ALL_FUNCS $fn"
+}
+
+specialize() {
+ local fn="$1"
+ shift
+ for opt in c "$@"; do
+ eval "${fn}_${opt}=${fn}_${opt}"
+ done
+}
+
+require() {
+ for fn in $ALL_FUNCS; do
+ for opt in "$@"; do
+ local ofn=$(eval "echo \$${fn}_${opt}")
+ [ -z "$ofn" ] && continue
+
+ # if we already have a default, then we can undefine it, as we know
+ # we can do better.
+ local best=$(eval "echo \$${fn}_default")
+ [ -n "$best" ] && eval "unset $best"
+ eval "${fn}_default=${fn}_${opt}"
+ done
+ done
+}
+
+forward_decls() {
+ ALL_FORWARD_DECLS="$ALL_FORWARD_DECLS $1"
+}
+
+#
+# Include the user's directives
+#
+for f in $defs_file; do
+ . $f
+done
+
+#
+# Process the directives according to the command line
+#
+process_forward_decls() {
+ for fn in $ALL_FORWARD_DECLS; do
+ eval $fn
+ done
+}
+
+determine_indirection() {
+ [ "$CONFIG_RUNTIME_CPU_DETECT" = "yes" ] || require $ALL_ARCHS
+ for fn in $ALL_FUNCS; do
+ local n=""
+ local rtyp=$(eval "echo \$${fn}_rtyp")
+ local args=$(eval "echo \$${fn}_args")
+ local dfn=$(eval "echo \$${fn}_default")
+ dfn=$(eval "echo \$${dfn}")
+ for opt in "$@"; do
+ local ofn=$(eval "echo \$${fn}_${opt}")
+ [ -z "$ofn" ] && continue
+ n="${n}x"
+ done
+ if [ "$n" = "x" ]; then
+ eval "${fn}_indirect=false"
+ else
+ eval "${fn}_indirect=true"
+ fi
+ echo
+ done
+}
+
+declare_function_pointers() {
+ for fn in $ALL_FUNCS; do
+ local n=""
+ local rtyp=$(eval "echo \$${fn}_rtyp")
+ local args=$(eval "echo \$${fn}_args")
+ local dfn=$(eval "echo \$${fn}_default")
+ dfn=$(eval "echo \$${dfn}")
+ for opt in "$@"; do
+ local ofn=$(eval "echo \$${fn}_${opt}")
+ [ -z "$ofn" ] && continue
+ n="${n}x"
+ echo "$rtyp ${ofn}($args);"
+ done
+ if [ "$n" = "x" ]; then
+ echo "#define ${fn} ${dfn}"
+ else
+ echo "RTCD_EXTERN $rtyp (*${fn})($args);"
+ fi
+ echo
+ done
+}
+
+set_function_pointers() {
+ for fn in $ALL_FUNCS; do
+ local n=""
+ local rtyp=$(eval "echo \$${fn}_rtyp")
+ local args=$(eval "echo \$${fn}_args")
+ local dfn=$(eval "echo \$${fn}_default")
+ dfn=$(eval "echo \$${dfn}")
+ if $(eval "echo \$${fn}_indirect"); then
+ echo " $fn = $dfn;"
+ for opt in "$@"; do
+ local ofn=$(eval "echo \$${fn}_${opt}")
+ [ -z "$ofn" ] && continue
+ [ "$ofn" = "$dfn" ] && continue;
+ echo " if (have_${opt}) $fn = $ofn;"
+ done
+ echo
+ fi
+ done
+}
+
+filter() {
+ local filtered
+ for opt in "$@"; do
+ [ -z $(eval "echo \$disable_${opt}") ] && filtered="$filtered $opt"
+ done
+ echo $filtered
+}
+
+#
+# Helper functions for generating the arch specific RTCD files
+#
+common_top() {
+ local outfile_basename=$(basename ${outfile:-rtcd.h})
+ local include_guard=$(echo -n $outfile_basename | tr [a-z] [A-Z] | tr -c [A-Z] _)
+ cat <<EOF
+#ifndef ${include_guard}
+#define ${include_guard}
+
+#ifdef RTCD_C
+#define RTCD_EXTERN
+#else
+#define RTCD_EXTERN extern
+#endif
+
+$(process_forward_decls)
+
+$(declare_function_pointers c $ALL_ARCHS)
+EOF
+}
+
+common_bottom() {
+ cat <<EOF
+#endif
+EOF
+}
+
+x86() {
+ determine_indirection c $ALL_ARCHS
+ cat <<EOF
+$(common_top)
+void ${symbol:-rtcd}(void);
+
+#ifdef RTCD_C
+#include "vpx_ports/x86.h"
+void ${symbol:-rtcd}(void)
+{
+ int flags = x86_simd_caps();
+EOF
+
+ # Write out the helper variable for each enabled extension
+ for opt in $ALL_ARCHS; do
+ local uc=$(echo -n $opt | tr [a-z] [A-Z])
+ echo " int have_${opt} = flags & HAS_${uc};"
+ done
+ cat <<EOF
+
+$(set_function_pointers c $ALL_ARCHS)
+}
+#endif
+$(common_bottom)
+EOF
+}
+
+arm() {
+ determine_indirection c $ALL_ARCHS
+ cat <<EOF
+$(common_top)
+#include "vpx_config.h"
+#include "vp8/decoder/onyxd_int.h"
+
+void ${symbol:-rtcd}(VP8D_COMP *pbi);
+
+#ifdef RTCD_C
+void ${symbol:-rtcd}(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+ int flags = pbi->common.rtcd.flags;
+
+ int have_v5te = flags & HAS_EDSP;
+ int have_v6 = flags & HAS_MEDIA;
+ int have_neon = flags & HAS_NEON;
+#endif
+
+$(set_function_pointers c $ALL_ARCHS)
+}
+#endif
+$(common_bottom)
+EOF
+}
+
+#
+# Main Driver
+#
+require c
+case $arch in
+ x86)
+ ALL_ARCHS=$(filter mmx sse sse2 sse3 sse4_1)
+ x86
+ ;;
+ x86_64)
+ ALL_ARCHS=$(filter mmx sse sse2 sse3 sse4_1)
+ require $(filter mmx sse sse2)
+ x86
+ ;;
+ armv5te)
+ ALL_ARCHS=$(filter v5te)
+ arm
+ ;;
+ armv6)
+ ALL_ARCHS=$(filter v5te v6)
+ arm
+ ;;
+ armv7)
+ ALL_ARCHS=$(filter v5te v6 neon)
+ arm
+ ;;
+ *)
+ die "Unrecognized architecture: $arch"
+esac
diff --git a/libs.mk b/libs.mk
index 79a1d001d..a4792efb5 100644
--- a/libs.mk
+++ b/libs.mk
@@ -322,6 +322,15 @@ endif
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
+#
+# Rule to generate runtime cpu detection files
+#
+$(OBJS-yes:.o=.d): vpx_rtcd.h
+vpx_rtcd.h: $(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
+ $(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) --sym=vpx_rtcd \
+ --rtcd=$(CONFIG_RUNTIME_CPU_DETECT) $^ > $@
+CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h
+
CODEC_DOC_SRCS += vpx/vpx_codec.h \
vpx/vpx_decoder.h \
vpx/vpx_encoder.h \
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 91e90e2a6..7a07abde6 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -182,7 +182,7 @@ typedef struct
} LOWER_RES_INFO;
#endif
-typedef struct
+typedef struct blockd
{
short *qcoeff;
short *dqcoeff;
diff --git a/vp8/common/rtcd.c b/vp8/common/rtcd.c
new file mode 100644
index 000000000..232640dc8
--- /dev/null
+++ b/vp8/common/rtcd.c
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vpx_config.h"
+#define RTCD_C
+#include "vpx_rtcd.h"
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
deleted file mode 100644
index f802c5181..000000000
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_config.h"
-#include "vpx_ports/arm.h"
-#include "vp8/common/blockd.h"
-#include "vp8/common/pragmas.h"
-#include "vp8/decoder/dequantize.h"
-#include "vp8/decoder/onyxd_int.h"
-
-void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- int flags = pbi->common.rtcd.flags;
-
-#if HAVE_ARMV5TE
- if (flags & HAS_EDSP)
- {
- }
-#endif
-
-#if HAVE_ARMV6
- if (flags & HAS_MEDIA)
- {
- pbi->dequant.block = vp8_dequantize_b_v6;
- pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
- }
-#endif
-
-#if HAVE_ARMV7
- if (flags & HAS_NEON)
- {
- pbi->dequant.block = vp8_dequantize_b_neon;
- pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
- }
-#endif
-#endif
-}
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
index c1ef2852f..578f7668f 100644
--- a/vp8/decoder/arm/armv6/idct_blk_v6.c
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -10,7 +10,6 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 2918e0512..334888ba7 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -10,9 +10,7 @@
#include "vpx_config.h"
-#include "vp8/decoder/dequantize.h"
-#include "vp8/common/idct.h"
-#include "vpx_mem/vpx_mem.h"
+#include "vp8/common/blockd.h"
#if HAVE_ARMV7
extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
index 185895f05..7424b029c 100644
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -10,7 +10,6 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
/* place these declarations here because we don't want to maintain them
* outside of this scope
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 31eafcf54..ea957f86d 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -9,13 +9,14 @@
*/
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
#include "onyxd_int.h"
#include "vp8/common/header.h"
#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/recon.h"
#include "vp8/common/reconinter.h"
-#include "dequantize.h"
#include "detokenize.h"
#include "vp8/common/invtrans.h"
#include "vp8/common/alloccommon.h"
@@ -32,7 +33,6 @@
#endif
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/idct.h"
-#include "dequantize.h"
#include "vp8/common/threading.h"
#include "decoderthreading.h"
#include "dboolhuff.h"
@@ -218,7 +218,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
{
if (xd->eobs[i] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ vp8_dequant_idct_add
(b->qcoeff, b->dequant,
*(b->base_dst) + b->dst, b->dst_stride);
}
@@ -247,7 +247,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, block)(b);
+ vp8_dequantize_b(b);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@@ -272,7 +272,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
DQC[0] = 1;
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ vp8_dequant_idct_add_y_block
(xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
@@ -281,7 +281,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
DQC[0] = dc_dequant_temp;
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ vp8_dequant_idct_add_uv_block
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 4a48a3192..14ebd2356 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -10,7 +10,8 @@
#include "vpx_config.h"
-#include "dequantize.h"
+#include "vpx_rtcd.h"
+#include "vp8/common/blockd.h"
#include "vp8/common/idct.h"
#include "vpx_mem/vpx_mem.h"
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
deleted file mode 100644
index f66cf2bac..000000000
--- a/vp8/decoder/dequantize.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DEQUANTIZE_H
-#define DEQUANTIZE_H
-#include "vp8/common/blockd.h"
-
-#define prototype_dequant_block(sym) \
- void sym(BLOCKD *x)
-
-#define prototype_dequant_idct_add(sym) \
- void sym(short *input, short *dq, \
- unsigned char *output, \
- int stride)
-
-#define prototype_dequant_idct_add_y_block(sym) \
- void sym(short *q, short *dq, \
- unsigned char *dst, \
- int stride, char *eobs)
-
-#define prototype_dequant_idct_add_uv_block(sym) \
- void sym(short *q, short *dq, \
- unsigned char *dst_u, \
- unsigned char *dst_v, int stride, char *eobs)
-
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/dequantize_x86.h"
-#endif
-
-#if ARCH_ARM
-#include "arm/dequantize_arm.h"
-#endif
-
-#ifndef vp8_dequant_block
-#define vp8_dequant_block vp8_dequantize_b_c
-#endif
-extern prototype_dequant_block(vp8_dequant_block);
-
-#ifndef vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_add_c
-#endif
-extern prototype_dequant_idct_add(vp8_dequant_idct_add);
-
-#ifndef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
-#endif
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block);
-
-#ifndef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
-#endif
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);
-
-
-typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
-
-typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
-
-typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t));
-
-typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t));
-
-typedef struct
-{
- vp8_dequant_block_fn_t block;
- vp8_dequant_idct_add_fn_t idct_add;
- vp8_dequant_idct_add_y_block_fn_t idct_add_y_block;
- vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block;
-} vp8_dequant_rtcd_vtable_t;
-
-#if CONFIG_RUNTIME_CPU_DETECT
-#define DEQUANT_INVOKE(ctx,fn) (ctx)->fn
-#else
-#define DEQUANT_INVOKE(ctx,fn) vp8_dequant_##fn
-#endif
-
-#endif
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index d9f9ba3c8..83d68c5a1 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -10,28 +10,16 @@
#include "vpx_config.h"
-#include "vp8/decoder/dequantize.h"
+#include "vpx_rtcd.h"
#include "vp8/decoder/onyxd_int.h"
-extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
-extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
-
void vp8_dmachine_specific_config(VP8D_COMP *pbi)
{
/* Pure C: */
#if CONFIG_RUNTIME_CPU_DETECT
pbi->mb.rtcd = &pbi->common.rtcd;
- pbi->dequant.block = vp8_dequantize_b_c;
- pbi->dequant.idct_add = vp8_dequant_idct_add_c;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
-#endif
-
-#if ARCH_X86 || ARCH_X86_64
- vp8_arch_x86_decode_init(pbi);
#endif
-#if ARCH_ARM
- vp8_arch_arm_decode_init(pbi);
-#endif
+ /* Move this to common once we use it from more than one place. */
+ vpx_rtcd();
}
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
index 249fad4ea..b9c5d3212 100644
--- a/vp8/decoder/idct_blk.c
+++ b/vp8/decoder/idct_blk.c
@@ -10,7 +10,6 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
-#include "dequantize.h"
void vp8_dequant_idct_add_c(short *input, short *dq,
unsigned char *dest, int stride);
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 519a7f2b9..f93c05551 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -16,7 +16,6 @@
#include "treereader.h"
#include "vp8/common/onyxc_int.h"
#include "vp8/common/threading.h"
-#include "dequantize.h"
#if CONFIG_ERROR_CONCEALMENT
#include "ec_types.h"
#endif
@@ -93,10 +92,6 @@ typedef struct VP8Decompressor
DATARATE dr[16];
-#if CONFIG_RUNTIME_CPU_DETECT
- vp8_dequant_rtcd_vtable_t dequant;
-#endif
-
vp8_prob prob_intra;
vp8_prob prob_last;
diff --git a/vp8/decoder/rtcd_defs.sh b/vp8/decoder/rtcd_defs.sh
new file mode 100644
index 000000000..4c4647b03
--- /dev/null
+++ b/vp8/decoder/rtcd_defs.sh
@@ -0,0 +1,18 @@
+decoder_forward_decls() {
+cat <<EOF
+struct blockd;
+EOF
+}
+forward_decls decoder_forward_decls
+
+prototype void vp8_dequantize_b "struct blockd*"
+specialize vp8_dequantize_b mmx v6 neon
+
+prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
+specialize vp8_dequant_idct_add mmx v6 neon
+
+prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
+specialize vp8_dequant_idct_add_y_block mmx sse2 v6 neon
+
+prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
+specialize vp8_dequant_idct_add_uv_block mmx sse2 v6 neon
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 1967781eb..382d3f114 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -9,6 +9,8 @@
*/
+#include "vpx_config.h"
+#include "vpx_rtcd.h"
#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
# include <unistd.h>
#endif
@@ -189,7 +191,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
{
if (xd->eobs[i] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, idct_add)
+ vp8_dequant_idct_add
(b->qcoeff, b->dequant,
*(b->base_dst) + b->dst, b->dst_stride);
}
@@ -217,7 +219,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
/* do 2nd order transform on the dc block */
if (xd->eobs[24] > 1)
{
- DEQUANT_INVOKE(&pbi->dequant, block)(b);
+ vp8_dequantize_b(b);
IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0],
xd->qcoeff);
@@ -248,13 +250,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int m
DQC = local_dequant;
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+ vp8_dequant_idct_add_y_block
(xd->qcoeff, DQC,
xd->dst.y_buffer,
xd->dst.y_stride, xd->eobs);
}
- DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+ vp8_dequant_idct_add_uv_block
(xd->qcoeff+16*16, xd->block[16].dequant,
xd->dst.u_buffer, xd->dst.v_buffer,
xd->dst.uv_stride, xd->eobs+16);
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
deleted file mode 100644
index 49bcb7f19..000000000
--- a/vp8/decoder/x86/dequantize_x86.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef DEQUANTIZE_X86_H
-#define DEQUANTIZE_X86_H
-
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-#if HAVE_MMX
-extern prototype_dequant_block(vp8_dequantize_b_mmx);
-extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx);
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_dequant_block
-#define vp8_dequant_block vp8_dequantize_b_mmx
-
-#undef vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
-
-#undef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx
-
-#undef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx
-
-#endif
-#endif
-
-#if HAVE_SSE2
-extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2);
-extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef vp8_dequant_idct_add_y_block
-#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2
-
-#undef vp8_dequant_idct_add_uv_block
-#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2
-
-#endif
-#endif
-
-#endif
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
index 29276e5d7..1e3666b3b 100644
--- a/vp8/decoder/x86/idct_blk_mmx.c
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@@ -9,8 +9,8 @@
*/
#include "vpx_config.h"
+#include "vpx_rtcd.h"
#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
void vp8_dequant_idct_add_y_block_mmx
(short *q, short *dq,
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
index 03c2878c1..e8dca8df0 100644
--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -10,7 +10,6 @@
#include "vpx_config.h"
#include "vp8/common/idct.h"
-#include "vp8/decoder/dequantize.h"
void vp8_idct_dequant_0_2x_sse2
(short *q, short *dq ,
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 91dba7e1a..c3ea9c848 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -10,8 +10,8 @@
#include "vpx_config.h"
-#include "vpx_ports/x86.h"
-#include "vp8/decoder/onyxd_int.h"
+#include "vpx_rtcd.h"
+#include "vp8/common/blockd.h"
#if HAVE_MMX
@@ -25,35 +25,3 @@ void vp8_dequantize_b_mmx(BLOCKD *d)
vp8_dequantize_b_impl_mmx(sq, dq, q);
}
#endif
-
-void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
- int flags = x86_simd_caps();
-
- /* Note:
- *
- * This platform can be built without runtime CPU detection as well. If
- * you modify any of the function mappings present in this file, be sure
- * to also update them in static mapings (<arch>/filename_<arch>.h)
- */
- /* Override default functions with fastest ones for this CPU. */
-#if HAVE_MMX
- if (flags & HAS_MMX)
- {
- pbi->dequant.block = vp8_dequantize_b_mmx;
- pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
- }
-#endif
-#if HAVE_SSE2
- if (flags & HAS_SSE2)
- {
- pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
- pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
- }
-#endif
-
-#endif
-}
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 683af34b0..2a2691162 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -51,6 +51,7 @@ VP8_COMMON_SRCS-yes += common/recon.h
VP8_COMMON_SRCS-yes += common/reconinter.h
VP8_COMMON_SRCS-yes += common/reconintra.h
VP8_COMMON_SRCS-yes += common/reconintra4x4.h
+VP8_COMMON_SRCS-yes += common/rtcd.c
VP8_COMMON_SRCS-yes += common/setupintrarecon.h
VP8_COMMON_SRCS-yes += common/subpixel.h
VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index d88b595fb..00cff7080 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -61,7 +61,6 @@ VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c
VP8_DX_SRCS-yes += decoder/dboolhuff.h
VP8_DX_SRCS-yes += decoder/decodemv.h
VP8_DX_SRCS-yes += decoder/decoderthreading.h
-VP8_DX_SRCS-yes += decoder/dequantize.h
VP8_DX_SRCS-yes += decoder/detokenize.h
VP8_DX_SRCS-yes += decoder/onyxd_int.h
VP8_DX_SRCS-yes += decoder/treereader.h
@@ -70,10 +69,10 @@ VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
VP8_DX_SRCS-yes += decoder/idct_blk.c
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h
VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c
+VP8_DX_SRCS-yes += decoder/rtcd_defs.sh
VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
-VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h
VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c
VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm
VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index b08f9464f..058630cf6 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -11,7 +11,6 @@
#VP8_DX_SRCS list is modified according to different platforms.
-VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.c
VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/dequantize_arm.h