summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2008-09-13 05:52:49 -0700
committerDavid Schleef <ds@schleef.org>2008-09-13 05:52:49 -0700
commita28a9de744adfa810537fa5c5cdd6baef08e7919 (patch)
tree49715818221a1176a6132693d15f4b32a42f85d2
parent361296501c71c6ee7a1039c553e0f767b60c492f (diff)
downloadliboil-a28a9de744adfa810537fa5c5cdd6baef08e7919.tar.gz
Add avg2_32xn_u8
-rw-r--r--liboil/i386_amd64/sad8x8.c71
-rw-r--r--liboil/liboilclasses.h2
-rw-r--r--liboil/liboilfuncs-04.h2
-rw-r--r--liboil/liboilfuncs-doc.h2
-rw-r--r--liboil/liboilfuncs.h6
-rw-r--r--liboil/liboiltrampolines.c20
-rw-r--r--liboil/ref/wavelet.c49
7 files changed, 152 insertions, 0 deletions
diff --git a/liboil/i386_amd64/sad8x8.c b/liboil/i386_amd64/sad8x8.c
index f72cbf5..fbc268e 100644
--- a/liboil/i386_amd64/sad8x8.c
+++ b/liboil/i386_amd64/sad8x8.c
@@ -617,6 +617,47 @@ combine4_16xn_u8_mmx (uint8_t *d, int ds1,
OIL_DEFINE_IMPL_FULL (combine4_16xn_u8_mmx, combine4_16xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
void
+combine4_32xn_u8_mmx (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int j;
+
+ asm volatile ("\n"
+ " pxor %%mm7, %%mm7\n"
+ " movq 0(%0), %%mm6\n"
+ " movd 8(%0), %%mm4\n"
+ " pshufw $0x00, %%mm4, %%mm4\n"
+ ::"r" (s5_6));
+
+ for(j=0;j<n;j++){
+ asm volatile ("\n"
+ DO_4(0)
+ DO_4(4)
+ DO_4(8)
+ DO_4(12)
+ DO_4(16)
+ DO_4(20)
+ DO_4(24)
+ DO_4(28)
+
+ :
+ : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+ asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_32xn_u8_mmx, combine4_32xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
+
+void
combine2_12xn_u8_mmx (uint8_t *d, int ds1,
uint8_t *s1, int ss1,
uint8_t *s2, int ss2,
@@ -819,3 +860,33 @@ avg2_16xn_u8_mmx (uint8_t *d, int ds1, uint8_t *s1, int ss1,
}
OIL_DEFINE_IMPL_FULL (avg2_16xn_u8_mmx, avg2_16xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
+void
+avg2_32xn_u8_mmx (uint8_t *d, int ds1, uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2, int n)
+{
+ int j;
+ for(j=0;j<n;j++){
+ asm volatile ("\n"
+ " movq 0(%[s1]), %%mm0\n"
+ " pavgb 0(%[s2]), %%mm0\n"
+ " movq %%mm0, 0(%[d])\n"
+ " movq 8(%[s1]), %%mm0\n"
+ " pavgb 8(%[s2]), %%mm0\n"
+ " movq %%mm0, 8(%[d])\n"
+ " movq 16(%[s1]), %%mm0\n"
+ " pavgb 16(%[s2]), %%mm0\n"
+ " movq %%mm0, 16(%[d])\n"
+ " movq 24(%[s1]), %%mm0\n"
+ " pavgb 24(%[s2]), %%mm0\n"
+ " movq %%mm0, 24(%[d])\n"
+ :
+ : [d] "r" (d), [s1] "r" (s1), [s2] "r" (s2));
+
+ s1 += ss1;
+ s2 += ss2;
+ d += ds1;
+ }
+ asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (avg2_32xn_u8_mmx, avg2_32xn_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
+
diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h
index 8cfa0b2..3fc6e03 100644
--- a/liboil/liboilclasses.h
+++ b/liboil/liboilclasses.h
@@ -54,6 +54,7 @@ OIL_DECLARE_CLASS(argb_paint_u8);
OIL_DECLARE_CLASS(average2_u8);
OIL_DECLARE_CLASS(avg2_12xn_u8);
OIL_DECLARE_CLASS(avg2_16xn_u8);
+OIL_DECLARE_CLASS(avg2_32xn_u8);
OIL_DECLARE_CLASS(avg2_8xn_u8);
OIL_DECLARE_CLASS(ayuv2argb_u8);
OIL_DECLARE_CLASS(ayuv2uyvy);
@@ -129,6 +130,7 @@ OIL_DECLARE_CLASS(combine2_16xn_u8);
OIL_DECLARE_CLASS(combine2_8xn_u8);
OIL_DECLARE_CLASS(combine4_12xn_u8);
OIL_DECLARE_CLASS(combine4_16xn_u8);
+OIL_DECLARE_CLASS(combine4_32xn_u8);
OIL_DECLARE_CLASS(combine4_8xn_u8);
OIL_DECLARE_CLASS(compare_u8);
OIL_DECLARE_CLASS(composite_add_argb);
diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h
index 39a07de..103c19e 100644
--- a/liboil/liboilfuncs-04.h
+++ b/liboil/liboilfuncs-04.h
@@ -54,6 +54,7 @@ void oil_argb_paint_u8 (uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s
void oil_average2_u8 (uint8_t * d, int dstr, const uint8_t * s1, int sstr1, const uint8_t * s2, int sstr2, int n);
void oil_avg2_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
void oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n);
void oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
void oil_ayuv2argb_u8 (uint8_t * d_4xn, const uint8_t * s_4xn, int n);
void oil_ayuv2uyvy (uint32_t * d_n, const uint32_t * s_n, int n);
@@ -129,6 +130,7 @@ void oil_combine2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, i
void oil_combine2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const int16_t * s3_4, int n);
void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n);
void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
diff --git a/liboil/liboilfuncs-doc.h b/liboil/liboilfuncs-doc.h
index c5b3b9e..b914e19 100644
--- a/liboil/liboilfuncs-doc.h
+++ b/liboil/liboilfuncs-doc.h
@@ -18,6 +18,7 @@ void oil_argb_paint_u8 (uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s
void oil_average2_u8 (uint8_t * d, int dstr, const uint8_t * s1, int sstr1, const uint8_t * s2, int sstr2, int n);
void oil_avg2_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
void oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n);
void oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
void oil_ayuv2argb_u8 (uint8_t * d_4xn, const uint8_t * s_4xn, int n);
void oil_ayuv2uyvy (uint32_t * d_n, const uint32_t * s_n, int n);
@@ -93,6 +94,7 @@ void oil_combine2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, i
void oil_combine2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const int16_t * s3_4, int n);
void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n);
void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h
index e326a77..cd03099 100644
--- a/liboil/liboilfuncs.h
+++ b/liboil/liboilfuncs.h
@@ -90,6 +90,9 @@ typedef void (*_oil_type_avg2_12xn_u8)(uint8_t * d_12xn, int ds1, const uint8_t
OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_16xn_u8;
typedef void (*_oil_type_avg2_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
#define oil_avg2_16xn_u8 ((_oil_type_avg2_16xn_u8)(*(void **)oil_function_class_ptr_avg2_16xn_u8))
+OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_32xn_u8;
+typedef void (*_oil_type_avg2_32xn_u8)(uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n);
+#define oil_avg2_32xn_u8 ((_oil_type_avg2_32xn_u8)(*(void **)oil_function_class_ptr_avg2_32xn_u8))
OIL_EXPORT OilFunctionClass *oil_function_class_ptr_avg2_8xn_u8;
typedef void (*_oil_type_avg2_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
#define oil_avg2_8xn_u8 ((_oil_type_avg2_8xn_u8)(*(void **)oil_function_class_ptr_avg2_8xn_u8))
@@ -315,6 +318,9 @@ typedef void (*_oil_type_combine4_12xn_u8)(uint8_t * d_12xn, int ds1, const uint
OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_16xn_u8;
typedef void (*_oil_type_combine4_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
#define oil_combine4_16xn_u8 ((_oil_type_combine4_16xn_u8)(*(void **)oil_function_class_ptr_combine4_16xn_u8))
+OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_32xn_u8;
+typedef void (*_oil_type_combine4_32xn_u8)(uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_32xn_u8 ((_oil_type_combine4_32xn_u8)(*(void **)oil_function_class_ptr_combine4_32xn_u8))
OIL_EXPORT OilFunctionClass *oil_function_class_ptr_combine4_8xn_u8;
typedef void (*_oil_type_combine4_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
#define oil_combine4_8xn_u8 ((_oil_type_combine4_8xn_u8)(*(void **)oil_function_class_ptr_combine4_8xn_u8))
diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c
index 9009dff..7096766 100644
--- a/liboil/liboiltrampolines.c
+++ b/liboil/liboiltrampolines.c
@@ -211,6 +211,16 @@ oil_avg2_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, c
((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n))(_oil_function_class_avg2_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, n);
}
+#undef oil_avg2_32xn_u8
+void
+oil_avg2_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n)
+{
+ if (_oil_function_class_avg2_32xn_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_avg2_32xn_u8);
+ }
+ ((void (*)(uint8_t * d_32xn, int ds1, const uint8_t * s1_32xn, int ss1, const uint8_t * s2_32xn, int ss2, int n))(_oil_function_class_avg2_32xn_u8.func))(d_32xn, ds1, s1_32xn, ss1, s2_32xn, ss2, n);
+}
+
#undef oil_avg2_8xn_u8
void
oil_avg2_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n)
@@ -961,6 +971,16 @@ oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss
((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, s3_16xn, ss3, s4_16xn, ss4, s5_6, n);
}
+#undef oil_combine4_32xn_u8
+void
+oil_combine4_32xn_u8 (uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n)
+{
+ if (_oil_function_class_combine4_32xn_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_combine4_32xn_u8);
+ }
+ ((void (*)(uint8_t * d_32xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_32xn, int ss2, const uint8_t * s3_32xn, int ss3, const uint8_t * s4_32xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_32xn_u8.func))(d_32xn, ds1, s1_16xn, ss1, s2_32xn, ss2, s3_32xn, ss3, s4_32xn, ss4, s5_6, n);
+}
+
#undef oil_combine4_8xn_u8
void
oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n)
diff --git a/liboil/ref/wavelet.c b/liboil/ref/wavelet.c
index 77f5e54..7295545 100644
--- a/liboil/ref/wavelet.c
+++ b/liboil/ref/wavelet.c
@@ -149,6 +149,9 @@ OIL_DEFINE_CLASS_FULL (combine4_12xn_u8, "uint8_t *d_12xn, int ds1, "
OIL_DEFINE_CLASS_FULL (combine4_16xn_u8, "uint8_t *d_16xn, int ds1, "
"uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, uint8_t *s3_16xn, "
"int ss3, uint8_t *s4_16xn, int ss4, int16_t *s5_6, int n", combine4_test);
+OIL_DEFINE_CLASS_FULL (combine4_32xn_u8, "uint8_t *d_32xn, int ds1, "
+ "uint8_t *s1_16xn, int ss1, uint8_t *s2_32xn, int ss2, uint8_t *s3_32xn, "
+ "int ss3, uint8_t *s4_32xn, int ss4, int16_t *s5_6, int n", combine4_test);
OIL_DEFINE_CLASS_FULL (add2_rshift_add_s16, "int16_t *d, int16_t *s1, "
"int16_t *s2, int16_t *s3, int16_t *s4_2, int n", add2_test);
OIL_DEFINE_CLASS_FULL (add2_rshift_sub_s16, "int16_t *d, int16_t *s1, "
@@ -159,6 +162,8 @@ OIL_DEFINE_CLASS (avg2_12xn_u8, "uint8_t *d_12xn, int ds1, "
"uint8_t *s1_12xn, int ss1, uint8_t *s2_12xn, int ss2, int n");
OIL_DEFINE_CLASS (avg2_16xn_u8, "uint8_t *d_16xn, int ds1, "
"uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, int n");
+OIL_DEFINE_CLASS (avg2_32xn_u8, "uint8_t *d_32xn, int ds1, "
+ "uint8_t *s1_32xn, int ss1, uint8_t *s2_32xn, int ss2, int n");
void
deinterleave_ref (int16_t *d_2xn, int16_t *s_2xn, int n)
@@ -791,6 +796,34 @@ combine4_16xn_u8_ref (uint8_t *d, int ds1,
OIL_DEFINE_IMPL_REF (combine4_16xn_u8_ref, combine4_16xn_u8);
void
+combine4_32xn_u8_ref (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int i;
+ int j;
+ for(j=0;j<n;j++){
+ for(i=0;i<32;i++){
+ int x = 0;
+ x += s5_6[0] * s1[i];
+ x += s5_6[1] * s2[i];
+ x += s5_6[2] * s3[i];
+ x += s5_6[3] * s4[i];
+ d[i] = (x + s5_6[4]) >> s5_6[5];
+ }
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+}
+OIL_DEFINE_IMPL_REF (combine4_32xn_u8_ref, combine4_32xn_u8);
+
+void
combine2_8xn_u8_ref (uint8_t *d, int ds1,
uint8_t *s1, int ss1,
uint8_t *s2, int ss2,
@@ -929,3 +962,19 @@ avg2_16xn_u8_ref (uint8_t *d, int ds1, uint8_t *s1, int ss1,
}
OIL_DEFINE_IMPL_REF (avg2_16xn_u8_ref, avg2_16xn_u8);
+void
+avg2_32xn_u8_ref (uint8_t *d, int ds1, uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2, int n)
+{
+ int i;
+ int j;
+ for(j=0;j<n;j++){
+ for(i=0;i<32;i++){
+ d[i] = (s1[i] + s2[i] + 1)>>1;
+ }
+ s1 += ss1;
+ s2 += ss2;
+ d += ds1;
+ }
+}
+OIL_DEFINE_IMPL_REF (avg2_32xn_u8_ref, avg2_32xn_u8);