summaryrefslogtreecommitdiff
path: root/vp8/encoder/x86/csystemdependent.c
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/encoder/x86/csystemdependent.c')
-rw-r--r--vp8/encoder/x86/csystemdependent.c289
1 files changed, 289 insertions, 0 deletions
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
new file mode 100644
index 000000000..186ee6856
--- /dev/null
+++ b/vp8/encoder/x86/csystemdependent.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license and patent
+ * grant that can be found in the LICENSE file in the root of the source
+ * tree. All contributing project authors may be found in the AUTHORS
+ * file in the root of the source tree.
+ */
+
+
+#include "variance.h"
+#include "onyx_int.h"
+
+SADFunction *vp8_sad16x16;
+SADFunction *vp8_sad16x8;
+SADFunction *vp8_sad8x16;
+SADFunction *vp8_sad8x8;
+SADFunction *vp8_sad4x4;
+
+variance_function *vp8_variance4x4;
+variance_function *vp8_variance8x8;
+variance_function *vp8_variance8x16;
+variance_function *vp8_variance16x8;
+variance_function *vp8_variance16x16;
+
+
+variance_function *vp8_mse16x16;
+
+sub_pixel_variance_function *vp8_sub_pixel_variance4x4;
+sub_pixel_variance_function *vp8_sub_pixel_variance8x8;
+sub_pixel_variance_function *vp8_sub_pixel_variance8x16;
+sub_pixel_variance_function *vp8_sub_pixel_variance16x8;
+sub_pixel_variance_function *vp8_sub_pixel_variance16x16;
+
+int (*vp8_block_error)(short *, short *);
+int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc);
+void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride);
+
+extern void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride);
+extern void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride);
+
+extern int vp8_block_error_c(short *, short *);
+extern int vp8_mbblock_error_c(MACROBLOCK *x, int dc);
+
+extern int vp8_block_error_mmx(short *, short *);
+extern int vp8_mbblock_error_mmx(MACROBLOCK *x, int dc);
+
+extern int vp8_block_error_xmm(short *, short *);
+extern int vp8_mbblock_error_xmm(MACROBLOCK *x, int dc);
+
+
+
+int (*vp8_mbuverror)(MACROBLOCK *mb);
+unsigned int (*vp8_get_mb_ss)(short *);
+void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
+void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
+void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
+void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
+
+void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch);
+void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
+void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
+unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
+unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
+
+// c imports
+extern int vp8_mbuverror_c(MACROBLOCK *mb);
+extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
+extern void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
+extern void vp8_fast_fdct4x4_c(short *input, short *output, int pitch);
+extern void vp8_fast_fdct8x4_c(short *input, short *output, int pitch);
+
+
+extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch);
+extern void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
+extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
+
+extern SADFunction vp8_sad16x16_c;
+extern SADFunction vp8_sad16x8_c;
+extern SADFunction vp8_sad8x16_c;
+extern SADFunction vp8_sad8x8_c;
+extern SADFunction vp8_sad4x4_c;
+
+extern SADFunction vp8_sad16x16_wmt;
+extern SADFunction vp8_sad16x8_wmt;
+extern SADFunction vp8_sad8x16_wmt;
+extern SADFunction vp8_sad8x8_wmt;
+extern SADFunction vp8_sad4x4_wmt;
+
+extern SADFunction vp8_sad16x16_mmx;
+extern SADFunction vp8_sad16x8_mmx;
+extern SADFunction vp8_sad8x16_mmx;
+extern SADFunction vp8_sad8x8_mmx;
+extern SADFunction vp8_sad4x4_mmx;
+
+extern variance_function vp8_variance16x16_c;
+extern variance_function vp8_variance8x16_c;
+extern variance_function vp8_variance16x8_c;
+extern variance_function vp8_variance8x8_c;
+extern variance_function vp8_variance4x4_c;
+extern variance_function vp8_mse16x16_c;
+
+extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_c;
+extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_c;
+extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_c;
+extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_c;
+extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_c;
+
+extern unsigned int vp8_get_mb_ss_c(short *);
+extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
+extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
+
+// mmx imports
+extern int vp8_mbuverror_mmx(MACROBLOCK *mb);
+extern void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d);
+extern void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch);
+extern void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
+extern void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch);
+extern void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch);
+extern void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch);
+extern void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch);
+extern variance_function vp8_variance4x4_mmx;
+extern variance_function vp8_variance8x8_mmx;
+extern variance_function vp8_variance8x16_mmx;
+extern variance_function vp8_variance16x8_mmx;
+extern variance_function vp8_variance16x16_mmx;
+
+extern variance_function vp8_mse16x16_mmx;
+extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_mmx;
+extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_mmx;
+extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_mmx;
+extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_mmx;
+extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_mmx;
+
+extern unsigned int vp8_get16x16pred_error_mmx(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
+extern unsigned int vp8_get_mb_ss_mmx(short *);
+extern unsigned int vp8_get8x8var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern unsigned int vp8_get16x16var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern unsigned int vp8_get4x4sse_cs_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
+
+
+// wmt imports
+extern int vp8_mbuverror_xmm(MACROBLOCK *mb);
+extern void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d);
+extern void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch);
+extern variance_function vp8_variance4x4_wmt;
+extern variance_function vp8_variance8x8_wmt;
+extern variance_function vp8_variance8x16_wmt;
+extern variance_function vp8_variance16x8_wmt;
+extern variance_function vp8_variance16x16_wmt;
+
+extern variance_function vp8_mse16x16_wmt;
+extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_wmt;
+extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_wmt;
+extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_wmt;
+extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_wmt;
+extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_wmt;
+extern unsigned int vp8_get16x16pred_error_sse2(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
+extern unsigned int vp8_get_mb_ss_sse2(short *src_ptr);
+extern unsigned int vp8_get8x8var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+extern unsigned int vp8_get16x16var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum);
+
+extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
+
+void vp8_cmachine_specific_config(void)
+{
+ int mmx_enabled;
+ int xmm_enabled;
+ int wmt_enabled;
+
+ vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
+
+ if (wmt_enabled) // Willamette
+ {
+ // Willamette instruction set available:
+ vp8_mbuverror = vp8_mbuverror_xmm;
+ vp8_fast_quantize_b = vp8_fast_quantize_b_sse;
+ vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
+ vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
+ vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx;
+ vp8_fast_fdct8x4 = vp8_fast_fdct8x4_wmt;
+ vp8_subtract_b = vp8_subtract_b_mmx;
+ vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
+ vp8_variance4x4 = vp8_variance4x4_mmx;
+ vp8_variance8x8 = vp8_variance8x8_mmx;
+ vp8_variance8x16 = vp8_variance8x16_wmt;
+ vp8_variance16x8 = vp8_variance16x8_wmt;
+ vp8_variance16x16 = vp8_variance16x16_wmt;
+ vp8_mse16x16 = vp8_mse16x16_wmt;
+ vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt;
+ vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt;
+ vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt;
+ vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt;
+ vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
+ vp8_get_mb_ss = vp8_get_mb_ss_sse2;
+ vp8_get16x16pred_error = vp8_get16x16pred_error_sse2;
+ vp8_get8x8var = vp8_get8x8var_sse2;
+ vp8_get16x16var = vp8_get16x16var_sse2;
+ vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
+ vp8_sad16x16 = vp8_sad16x16_wmt;
+ vp8_sad16x8 = vp8_sad16x8_wmt;
+ vp8_sad8x16 = vp8_sad8x16_wmt;
+ vp8_sad8x8 = vp8_sad8x8_wmt;
+ vp8_sad4x4 = vp8_sad4x4_wmt;
+ vp8_block_error = vp8_block_error_xmm;
+ vp8_mbblock_error = vp8_mbblock_error_xmm;
+ vp8_subtract_mby = vp8_subtract_mby_mmx;
+
+ }
+ else if (mmx_enabled)
+ {
+ // MMX instruction set available:
+ vp8_mbuverror = vp8_mbuverror_mmx;
+ vp8_fast_quantize_b = vp8_fast_quantize_b_mmx;
+ vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx;
+ vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx;
+ vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx;
+ vp8_fast_fdct8x4 = vp8_fast_fdct8x4_mmx;
+ vp8_subtract_b = vp8_subtract_b_mmx;
+ vp8_subtract_mbuv = vp8_subtract_mbuv_mmx;
+ vp8_variance4x4 = vp8_variance4x4_mmx;
+ vp8_variance8x8 = vp8_variance8x8_mmx;
+ vp8_variance8x16 = vp8_variance8x16_mmx;
+ vp8_variance16x8 = vp8_variance16x8_mmx;
+ vp8_variance16x16 = vp8_variance16x16_mmx;
+ vp8_mse16x16 = vp8_mse16x16_mmx;
+ vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx;
+ vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx;
+ vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx;
+ vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx;
+ vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
+ vp8_get_mb_ss = vp8_get_mb_ss_mmx;
+ vp8_get16x16pred_error = vp8_get16x16pred_error_mmx;
+ vp8_get8x8var = vp8_get8x8var_mmx;
+ vp8_get16x16var = vp8_get16x16var_mmx;
+ vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
+ vp8_sad16x16 = vp8_sad16x16_mmx;
+ vp8_sad16x8 = vp8_sad16x8_mmx;
+ vp8_sad8x16 = vp8_sad8x16_mmx;
+ vp8_sad8x8 = vp8_sad8x8_mmx;
+ vp8_sad4x4 = vp8_sad4x4_mmx;
+ vp8_block_error = vp8_block_error_mmx;
+ vp8_mbblock_error = vp8_mbblock_error_mmx;
+ vp8_subtract_mby = vp8_subtract_mby_mmx;
+
+ }
+ else
+ {
+ // Pure C:
+ vp8_mbuverror = vp8_mbuverror_c;
+ vp8_fast_quantize_b = vp8_fast_quantize_b_c;
+ vp8_short_fdct4x4 = vp8_short_fdct4x4_c;
+ vp8_short_fdct8x4 = vp8_short_fdct8x4_c;
+ vp8_fast_fdct4x4 = vp8_fast_fdct4x4_c;
+ vp8_fast_fdct8x4 = vp8_fast_fdct8x4_c;
+ vp8_subtract_b = vp8_subtract_b_c;
+ vp8_subtract_mbuv = vp8_subtract_mbuv_c;
+ vp8_variance4x4 = vp8_variance4x4_c;
+ vp8_variance8x8 = vp8_variance8x8_c;
+ vp8_variance8x16 = vp8_variance8x16_c;
+ vp8_variance16x8 = vp8_variance16x8_c;
+ vp8_variance16x16 = vp8_variance16x16_c;
+ vp8_mse16x16 = vp8_mse16x16_c;
+ vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c;
+ vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c;
+ vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c;
+ vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c;
+ vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
+ vp8_get_mb_ss = vp8_get_mb_ss_c;
+ vp8_get16x16pred_error = vp8_get16x16pred_error_c;
+ vp8_get8x8var = vp8_get8x8var_c;
+ vp8_get16x16var = vp8_get16x16var_c;
+ vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
+ vp8_sad16x16 = vp8_sad16x16_c;
+ vp8_sad16x8 = vp8_sad16x8_c;
+ vp8_sad8x16 = vp8_sad8x16_c;
+ vp8_sad8x8 = vp8_sad8x8_c;
+ vp8_sad4x4 = vp8_sad4x4_c;
+ vp8_block_error = vp8_block_error_c;
+ vp8_mbblock_error = vp8_mbblock_error_c;
+ vp8_subtract_mby = vp8_subtract_mby_c;
+ }
+
+}