diff options
Diffstat (limited to 'gst/fieldanalysis')
-rw-r--r-- | gst/fieldanalysis/gstfieldanalysisorc-dist.c | 551 | ||||
-rw-r--r-- | gst/fieldanalysis/gstfieldanalysisorc-dist.h | 20 |
2 files changed, 299 insertions, 272 deletions
diff --git a/gst/fieldanalysis/gstfieldanalysisorc-dist.c b/gst/fieldanalysis/gstfieldanalysisorc-dist.c index 969916376..351ee1a92 100644 --- a/gst/fieldanalysis/gstfieldanalysisorc-dist.c +++ b/gst/fieldanalysis/gstfieldanalysisorc-dist.c @@ -4,9 +4,6 @@ #ifdef HAVE_CONFIG_H #include "config.h" #endif -#ifndef DISABLE_ORC -#include <orc/orc.h> -#endif #include <glib.h> #ifndef _ORC_INTEGER_TYPEDEFS_ @@ -32,6 +29,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include <limits.h> typedef signed char orc_int8; @@ -71,19 +69,34 @@ typedef union orc_int16 x4[4]; } orc_union64; #endif +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif -void orc_same_parity_sad_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, int p2, int n); -void orc_same_parity_ssd_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, int p2, int n); -void orc_same_parity_3_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, - const orc_uint8 * s5, const orc_uint8 * s6, int p2, int n); -void orc_opposite_parity_5_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, - const orc_uint8 * s5, int p2, int n); - -void gst_fieldanalysis_orc_init (void); +#ifndef DISABLE_ORC +#include <orc/orc.h> +#endif +void orc_same_parity_sad_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + int p2, int n); +void orc_same_parity_ssd_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + int p2, int n); +void orc_same_parity_3_tap_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + const orc_uint8 * ORC_RESTRICT s5, const orc_uint8 * ORC_RESTRICT s6, + int p2, int n); +void orc_opposite_parity_5_tap_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + const orc_uint8 * ORC_RESTRICT s5, int p2, int n); /* begin Orc C target preamble */ @@ -117,6 +130,7 @@ void gst_fieldanalysis_orc_init (void); #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) +#ifndef ORC_RESTRICT #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define ORC_RESTRICT restrict #elif defined(__GNUC__) && __GNUC__ >= 4 @@ -124,6 +138,7 @@ void gst_fieldanalysis_orc_init (void); #else #define ORC_RESTRICT #endif +#endif /* end Orc C target preamble */ @@ -131,8 +146,9 @@ void gst_fieldanalysis_orc_init (void); /* orc_same_parity_sad_planar_yuv */ #ifdef DISABLE_ORC void -orc_same_parity_sad_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, int p2, int n) +orc_same_parity_sad_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + int p2, int n) { int i; const orc_int8 *ORC_RESTRICT ptr4; @@ -183,7 +199,7 @@ orc_same_parity_sad_planar_yuv (guint32 * a1, const orc_uint8 * s1, #else static void -_backup_orc_same_parity_sad_planar_yuv (OrcExecutor * ex) +_backup_orc_same_parity_sad_planar_yuv (OrcExecutor * ORC_RESTRICT ex) { int i; int n = ex->n; @@ -233,15 +249,55 @@ _backup_orc_same_parity_sad_planar_yuv (OrcExecutor * ex) } -static OrcProgram *_orc_program_orc_same_parity_sad_planar_yuv; void -orc_same_parity_sad_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, int p2, int n) +orc_same_parity_sad_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + int p2, int n) { OrcExecutor _ex, *ex = &_ex; - OrcProgram *p = _orc_program_orc_same_parity_sad_planar_yuv; + static volatile int p_inited = 0; + static OrcProgram *p = 0; void (*func) (OrcExecutor *); + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + + p = orc_program_new (); + orc_program_set_name (p, "orc_same_parity_sad_planar_yuv"); + orc_program_set_backup_function (p, + _backup_orc_same_parity_sad_planar_yuv); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_accumulator (p, 4, "a1"); + orc_program_add_parameter (p, 4, "p2"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 4, "t3"); + orc_program_add_temporary (p, 4, "t4"); + + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + + orc_program_compile (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } ex->program = p; ex->n = n; @@ -259,8 +315,9 @@ orc_same_parity_sad_planar_yuv (guint32 * a1, const orc_uint8 * s1, /* orc_same_parity_ssd_planar_yuv */ #ifdef DISABLE_ORC void -orc_same_parity_ssd_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, int p2, int n) +orc_same_parity_ssd_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + int p2, int n) { int i; const orc_int8 *ORC_RESTRICT ptr4; @@ -308,7 +365,7 @@ orc_same_parity_ssd_planar_yuv (guint32 * a1, const orc_uint8 * s1, #else static void -_backup_orc_same_parity_ssd_planar_yuv (OrcExecutor * ex) +_backup_orc_same_parity_ssd_planar_yuv (OrcExecutor * ORC_RESTRICT ex) { int i; int n = ex->n; @@ -355,15 +412,53 @@ _backup_orc_same_parity_ssd_planar_yuv (OrcExecutor * ex) } -static OrcProgram *_orc_program_orc_same_parity_ssd_planar_yuv; void -orc_same_parity_ssd_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, int p2, int n) +orc_same_parity_ssd_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + int p2, int n) { OrcExecutor _ex, *ex = &_ex; - OrcProgram *p = _orc_program_orc_same_parity_ssd_planar_yuv; + static volatile int p_inited = 0; + static OrcProgram *p = 0; void (*func) (OrcExecutor *); + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + + p = orc_program_new (); + orc_program_set_name (p, "orc_same_parity_ssd_planar_yuv"); + orc_program_set_backup_function (p, + _backup_orc_same_parity_ssd_planar_yuv); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_accumulator (p, 4, "a1"); + orc_program_add_parameter (p, 4, "p2"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 4, "t3"); + orc_program_add_temporary (p, 4, "t4"); + + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + + orc_program_compile (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } ex->program = p; ex->n = n; @@ -381,9 +476,11 @@ orc_same_parity_ssd_planar_yuv (guint32 * a1, const orc_uint8 * s1, /* orc_same_parity_3_tap_planar_yuv */ #ifdef DISABLE_ORC void -orc_same_parity_3_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, - const orc_uint8 * s5, const orc_uint8 * s6, int p2, int n) +orc_same_parity_3_tap_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + const orc_uint8 * ORC_RESTRICT s5, const orc_uint8 * ORC_RESTRICT s6, + int p2, int n) { int i; const orc_int8 *ORC_RESTRICT ptr4; @@ -484,7 +581,7 @@ orc_same_parity_3_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, #else static void -_backup_orc_same_parity_3_tap_planar_yuv (OrcExecutor * ex) +_backup_orc_same_parity_3_tap_planar_yuv (OrcExecutor * ORC_RESTRICT ex) { int i; int n = ex->n; @@ -584,16 +681,86 @@ _backup_orc_same_parity_3_tap_planar_yuv (OrcExecutor * ex) } -static OrcProgram *_orc_program_orc_same_parity_3_tap_planar_yuv; void -orc_same_parity_3_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, - const orc_uint8 * s5, const orc_uint8 * s6, int p2, int n) +orc_same_parity_3_tap_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + const orc_uint8 * ORC_RESTRICT s5, const orc_uint8 * ORC_RESTRICT s6, + int p2, int n) { OrcExecutor _ex, *ex = &_ex; - OrcProgram *p = _orc_program_orc_same_parity_3_tap_planar_yuv; + static volatile int p_inited = 0; + static OrcProgram *p = 0; void (*func) (OrcExecutor *); + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + + p = orc_program_new (); + orc_program_set_name (p, "orc_same_parity_3_tap_planar_yuv"); + orc_program_set_backup_function (p, + _backup_orc_same_parity_3_tap_planar_yuv); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_source (p, 1, "s3"); + orc_program_add_source (p, 1, "s4"); + orc_program_add_source (p, 1, "s5"); + orc_program_add_source (p, 1, "s6"); + orc_program_add_accumulator (p, 4, "a1"); + orc_program_add_constant (p, 4, 0x00000002, "c1"); + orc_program_add_parameter (p, 4, "p2"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 2, "t3"); + orc_program_add_temporary (p, 2, "t4"); + orc_program_add_temporary (p, 2, "t5"); + orc_program_add_temporary (p, 2, "t6"); + orc_program_add_temporary (p, 4, "t7"); + orc_program_add_temporary (p, 4, "t8"); + + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_S4, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T5, ORC_VAR_S5, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T6, ORC_VAR_S6, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "shlw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "shlw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T3, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_P2, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8, + ORC_VAR_D1); + orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + + orc_program_compile (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } ex->program = p; ex->n = n; @@ -615,9 +782,10 @@ orc_same_parity_3_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, /* orc_opposite_parity_5_tap_planar_yuv */ #ifdef DISABLE_ORC void -orc_opposite_parity_5_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, - const orc_uint8 * s5, int p2, int n) +orc_opposite_parity_5_tap_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + const orc_uint8 * ORC_RESTRICT s5, int p2, int n) { int i; const orc_int8 *ORC_RESTRICT ptr4; @@ -658,9 +826,9 @@ orc_opposite_parity_5_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, ptr8 = (orc_int8 *) s5; /* 11: loadpw */ - var44.i = 0x00000003; /* 3 or 1.4822e-323f */ + var44.i = (int) 0x00000003; /* 3 or 1.4822e-323f */ /* 13: loadpw */ - var45.i = 0x00000003; /* 3 or 1.4822e-323f */ + var45.i = (int) 0x00000003; /* 3 or 1.4822e-323f */ /* 21: loadpl */ var46.i = p2; @@ -716,7 +884,7 @@ orc_opposite_parity_5_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, #else static void -_backup_orc_opposite_parity_5_tap_planar_yuv (OrcExecutor * ex) +_backup_orc_opposite_parity_5_tap_planar_yuv (OrcExecutor * ORC_RESTRICT ex) { int i; int n = ex->n; @@ -758,9 +926,9 @@ _backup_orc_opposite_parity_5_tap_planar_yuv (OrcExecutor * ex) ptr8 = (orc_int8 *) ex->arrays[8]; /* 11: loadpw */ - var44.i = 0x00000003; /* 3 or 1.4822e-323f */ + var44.i = (int) 0x00000003; /* 3 or 1.4822e-323f */ /* 13: loadpw */ - var45.i = 0x00000003; /* 3 or 1.4822e-323f */ + var45.i = (int) 0x00000003; /* 3 or 1.4822e-323f */ /* 21: loadpl */ var46.i = ex->params[25]; @@ -814,16 +982,82 @@ _backup_orc_opposite_parity_5_tap_planar_yuv (OrcExecutor * ex) } -static OrcProgram *_orc_program_orc_opposite_parity_5_tap_planar_yuv; void -orc_opposite_parity_5_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, - const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, - const orc_uint8 * s5, int p2, int n) +orc_opposite_parity_5_tap_planar_yuv (guint32 * ORC_RESTRICT a1, + const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, + const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, + const orc_uint8 * ORC_RESTRICT s5, int p2, int n) { OrcExecutor _ex, *ex = &_ex; - OrcProgram *p = _orc_program_orc_opposite_parity_5_tap_planar_yuv; + static volatile int p_inited = 0; + static OrcProgram *p = 0; void (*func) (OrcExecutor *); + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + + p = orc_program_new (); + orc_program_set_name (p, "orc_opposite_parity_5_tap_planar_yuv"); + orc_program_set_backup_function (p, + _backup_orc_opposite_parity_5_tap_planar_yuv); + orc_program_add_source (p, 1, "s1"); + orc_program_add_source (p, 1, "s2"); + orc_program_add_source (p, 1, "s3"); + orc_program_add_source (p, 1, "s4"); + orc_program_add_source (p, 1, "s5"); + orc_program_add_accumulator (p, 4, "a1"); + orc_program_add_constant (p, 4, 0x00000002, "c1"); + orc_program_add_constant (p, 4, 0x00000003, "c2"); + orc_program_add_parameter (p, 4, "p2"); + orc_program_add_temporary (p, 2, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 2, "t3"); + orc_program_add_temporary (p, 2, "t4"); + orc_program_add_temporary (p, 2, "t5"); + orc_program_add_temporary (p, 4, "t6"); + orc_program_add_temporary (p, 4, "t7"); + + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_S4, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T5, ORC_VAR_S5, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "shlw", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T3, + ORC_VAR_D1); + orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_P2, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7, + ORC_VAR_D1); + orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T6, ORC_VAR_D1, + ORC_VAR_D1); + + orc_program_compile (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } ex->program = p; ex->n = n; @@ -839,218 +1073,3 @@ orc_opposite_parity_5_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1); } #endif - - -void -gst_fieldanalysis_orc_init (void) -{ -#ifndef DISABLE_ORC - { - /* orc_same_parity_sad_planar_yuv */ - OrcProgram *p; - OrcCompileResult result; - - p = orc_program_new (); - orc_program_set_name (p, "orc_same_parity_sad_planar_yuv"); - orc_program_set_backup_function (p, _backup_orc_same_parity_sad_planar_yuv); - orc_program_add_source (p, 1, "s1"); - orc_program_add_source (p, 1, "s2"); - orc_program_add_accumulator (p, 4, "a1"); - orc_program_add_parameter (p, 4, "p2"); - orc_program_add_temporary (p, 2, "t1"); - orc_program_add_temporary (p, 2, "t2"); - orc_program_add_temporary (p, 4, "t3"); - orc_program_add_temporary (p, 4, "t4"); - - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, - ORC_VAR_D1); - orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2, - ORC_VAR_D1); - orc_program_append_2 (p, "andl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, - ORC_VAR_D1); - orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T3, ORC_VAR_D1, - ORC_VAR_D1); - - result = orc_program_compile (p); - - _orc_program_orc_same_parity_sad_planar_yuv = p; - } - { - /* orc_same_parity_ssd_planar_yuv */ - OrcProgram *p; - OrcCompileResult result; - - p = orc_program_new (); - orc_program_set_name (p, "orc_same_parity_ssd_planar_yuv"); - orc_program_set_backup_function (p, _backup_orc_same_parity_ssd_planar_yuv); - orc_program_add_source (p, 1, "s1"); - orc_program_add_source (p, 1, "s2"); - orc_program_add_accumulator (p, 4, "a1"); - orc_program_add_parameter (p, 4, "p2"); - orc_program_add_temporary (p, 2, "t1"); - orc_program_add_temporary (p, 2, "t2"); - orc_program_add_temporary (p, 4, "t3"); - orc_program_add_temporary (p, 4, "t4"); - - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, - ORC_VAR_D1); - orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_T1, - ORC_VAR_D1); - orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_P2, - ORC_VAR_D1); - orc_program_append_2 (p, "andl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, - ORC_VAR_D1); - orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T3, ORC_VAR_D1, - ORC_VAR_D1); - - result = orc_program_compile (p); - - _orc_program_orc_same_parity_ssd_planar_yuv = p; - } - { - /* orc_same_parity_3_tap_planar_yuv */ - OrcProgram *p; - OrcCompileResult result; - - p = orc_program_new (); - orc_program_set_name (p, "orc_same_parity_3_tap_planar_yuv"); - orc_program_set_backup_function (p, - _backup_orc_same_parity_3_tap_planar_yuv); - orc_program_add_source (p, 1, "s1"); - orc_program_add_source (p, 1, "s2"); - orc_program_add_source (p, 1, "s3"); - orc_program_add_source (p, 1, "s4"); - orc_program_add_source (p, 1, "s5"); - orc_program_add_source (p, 1, "s6"); - orc_program_add_accumulator (p, 4, "a1"); - orc_program_add_constant (p, 4, 0x00000002, "c1"); - orc_program_add_parameter (p, 4, "p2"); - orc_program_add_temporary (p, 2, "t1"); - orc_program_add_temporary (p, 2, "t2"); - orc_program_add_temporary (p, 2, "t3"); - orc_program_add_temporary (p, 2, "t4"); - orc_program_add_temporary (p, 2, "t5"); - orc_program_add_temporary (p, 2, "t6"); - orc_program_add_temporary (p, 4, "t7"); - orc_program_add_temporary (p, 4, "t8"); - - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_S4, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T5, ORC_VAR_S5, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T6, ORC_VAR_S6, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "shlw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, - ORC_VAR_D1); - orc_program_append_2 (p, "shlw", 0, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C1, - ORC_VAR_D1); - orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, - ORC_VAR_D1); - orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T3, - ORC_VAR_D1); - orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T5, - ORC_VAR_D1); - orc_program_append_2 (p, "addw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_T6, - ORC_VAR_D1); - orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T4, - ORC_VAR_D1); - orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T7, ORC_VAR_T1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_P2, - ORC_VAR_D1); - orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T8, - ORC_VAR_D1); - orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T7, ORC_VAR_D1, - ORC_VAR_D1); - - result = orc_program_compile (p); - - _orc_program_orc_same_parity_3_tap_planar_yuv = p; - } - { - /* orc_opposite_parity_5_tap_planar_yuv */ - OrcProgram *p; - OrcCompileResult result; - - p = orc_program_new (); - orc_program_set_name (p, "orc_opposite_parity_5_tap_planar_yuv"); - orc_program_set_backup_function (p, - _backup_orc_opposite_parity_5_tap_planar_yuv); - orc_program_add_source (p, 1, "s1"); - orc_program_add_source (p, 1, "s2"); - orc_program_add_source (p, 1, "s3"); - orc_program_add_source (p, 1, "s4"); - orc_program_add_source (p, 1, "s5"); - orc_program_add_accumulator (p, 4, "a1"); - orc_program_add_constant (p, 4, 0x00000002, "c1"); - orc_program_add_constant (p, 4, 0x00000003, "c2"); - orc_program_add_parameter (p, 4, "p2"); - orc_program_add_temporary (p, 2, "t1"); - orc_program_add_temporary (p, 2, "t2"); - orc_program_add_temporary (p, 2, "t3"); - orc_program_add_temporary (p, 2, "t4"); - orc_program_add_temporary (p, 2, "t5"); - orc_program_add_temporary (p, 4, "t6"); - orc_program_add_temporary (p, 4, "t7"); - - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T4, ORC_VAR_S4, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convubw", 0, ORC_VAR_T5, ORC_VAR_S5, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "shlw", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_C1, - ORC_VAR_D1); - orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, - ORC_VAR_D1); - orc_program_append_2 (p, "mullw", 0, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C2, - ORC_VAR_D1); - orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, - ORC_VAR_D1); - orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T3, - ORC_VAR_D1); - orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T4, - ORC_VAR_D1); - orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T5, - ORC_VAR_D1); - orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1, - ORC_VAR_D1); - orc_program_append_2 (p, "cmpgtsl", 0, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_P2, - ORC_VAR_D1); - orc_program_append_2 (p, "andl", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T7, - ORC_VAR_D1); - orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T6, ORC_VAR_D1, - ORC_VAR_D1); - - result = orc_program_compile (p); - - _orc_program_orc_opposite_parity_5_tap_planar_yuv = p; - } -#endif -} diff --git a/gst/fieldanalysis/gstfieldanalysisorc-dist.h b/gst/fieldanalysis/gstfieldanalysisorc-dist.h index b46b6fa60..40a64ee94 100644 --- a/gst/fieldanalysis/gstfieldanalysisorc-dist.h +++ b/gst/fieldanalysis/gstfieldanalysisorc-dist.h @@ -10,8 +10,6 @@ extern "C" { #endif -void gst_fieldanalysis_orc_init (void); - #ifndef _ORC_INTEGER_TYPEDEFS_ @@ -37,6 +35,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include <limits.h> typedef signed char orc_int8; @@ -59,10 +58,19 @@ typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; #endif -void orc_same_parity_sad_planar_yuv (guint32 * a1, const orc_uint8 * s1, const orc_uint8 * s2, int p2, int n); -void orc_same_parity_ssd_planar_yuv (guint32 * a1, const orc_uint8 * s1, const orc_uint8 * s2, int p2, int n); -void orc_same_parity_3_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, const orc_uint8 * s5, const orc_uint8 * s6, int p2, int n); -void orc_opposite_parity_5_tap_planar_yuv (guint32 * a1, const orc_uint8 * s1, const orc_uint8 * s2, const orc_uint8 * s3, const orc_uint8 * s4, const orc_uint8 * s5, int p2, int n); +#ifndef ORC_RESTRICT +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define ORC_RESTRICT restrict +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define ORC_RESTRICT __restrict__ +#else +#define ORC_RESTRICT +#endif +#endif +void orc_same_parity_sad_planar_yuv (guint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int p2, int n); +void orc_same_parity_ssd_planar_yuv (guint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int p2, int n); +void orc_same_parity_3_tap_planar_yuv (guint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, const orc_uint8 * ORC_RESTRICT s5, const orc_uint8 * ORC_RESTRICT s6, int p2, int n); +void orc_opposite_parity_5_tap_planar_yuv (guint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, const orc_uint8 * ORC_RESTRICT s3, const orc_uint8 * ORC_RESTRICT s4, const orc_uint8 * ORC_RESTRICT s5, int p2, int n); #ifdef __cplusplus } |