summaryrefslogtreecommitdiff
path: root/liboil/jpeg
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2004-09-03 21:40:26 +0000
committerDavid Schleef <ds@schleef.org>2004-09-03 21:40:26 +0000
commit8e3494a8806d9a6c67a09ba9b918345e046262a9 (patch)
tree286411011c0524decdb633853925200e4c236b63 /liboil/jpeg
parentd417426a32ee8b6a8e7487c057fec233fcc55196 (diff)
downloadliboil-8e3494a8806d9a6c67a09ba9b918345e046262a9.tar.gz
add
Diffstat (limited to 'liboil/jpeg')
-rw-r--r--liboil/jpeg/zigzag8x8_s16.c205
1 files changed, 205 insertions, 0 deletions
diff --git a/liboil/jpeg/zigzag8x8_s16.c b/liboil/jpeg/zigzag8x8_s16.c
new file mode 100644
index 0000000..f490ec7
--- /dev/null
+++ b/liboil/jpeg/zigzag8x8_s16.c
@@ -0,0 +1,205 @@
+/* liboil - Library of Optimized Inner Loops
+ * Copyright (C) 2003 David A. Schleef <ds@schleef.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of version 2.1 of the GNU Lesser General
+ * Public License as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
+ * Boston, MA 02111-1307 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboilfunction.h>
+#include <liboil/simdpack/simdpack.h>
+
+static const unsigned char zigzag_order[64] = {
+ 0,
+ 8, 1,
+ 2, 9, 16,
+ 24, 17, 10, 3,
+ 4, 11, 18, 25, 32,
+ 40, 33, 26, 19, 12, 5,
+ 6, 13, 20, 27, 34, 41, 48,
+ 56, 49, 42, 35, 28, 21, 14, 7,
+ 15, 22, 29, 36, 43, 50, 57,
+ 58, 51, 44, 37, 30, 23,
+ 31, 38, 45, 52, 59,
+ 60, 53, 46, 39,
+ 47, 54, 61,
+ 62, 55,
+ 63
+};
+
+static void zigzag8x8_s16_ref(int16_t *dest, int16_t *src, int sstr)
+{
+ int i,j;
+ unsigned int z;
+
+ for(j=0;j<8;j++){
+ for(i=0;i<8;i++){
+ z = zigzag_order[j*8+i];
+ dest[j*8+i] = OIL_GET(src, sstr*(z>>3)+(z&7), int16_t);
+ }
+ }
+}
+OIL_DEFINE_IMPL (zigzag8x8_s16_ref, zigzag8x8_s16_class);
+
+
+#ifdef HAVE_CPU_POWERPC
+/* 00 indicates that the element can't be handled by vperm, and needs
+ * to be fixed up later. */
+u8 mangle[128] __attribute__ ((__aligned__ (16))) = {
+ 00,00, 2, 3, 0, 1,16,17,18,19,20,21,00,00,00,00, /* 1, 2 */
+ 2, 3, 0, 1,16,17,18,19,20,21,22,23,24,25,00,00, /* 3, 4 */
+ 8, 9, 6, 7, 4, 5, 2, 3, 0, 1,16,17,18,19,20,21, /* 5, 6 */
+ 6, 7, 8, 9,10,11,12,13,30,31,28,29,26,27,24,25, /* 6, 7 */
+ 6, 7, 4, 5, 2, 3, 0, 1,18,19,20,21,22,23,24,25, /* 7, 0 */
+ 10,11,12,13,14,15,30,31,28,29,26,27,24,25,22,23, /* 0, 1 */
+ 00,00, 6, 7, 8, 9,10,11,12,13,14,15,30,31,28,29, /* 2, 3 */
+ 00,00,00,00,10,11,12,13,14,15,30,31,28,29,00,00, /* 4, 5 */
+};
+
+/* IMPL zigzag8x8_s16_a16_altivec defined(SIMDPACK_USE_ALTIVEC) */
+SL_zigzag8x8_s16_storage
+void zigzag8x8_s16_a16_altivec(int16_t *dest, int16_t *src, int sstr)
+{
+ sl_altivec_load8_0(src,sstr);
+
+ /* "slide" vectors to right */
+ __asm__ __volatile__(
+ "vsldoi 0,0,0,0\n"
+ "vsldoi 1,1,1,14\n"
+ "vsldoi 2,2,2,12\n"
+ "vsldoi 3,3,3,10\n"
+ "vsldoi 4,4,4,8\n"
+ "vsldoi 5,5,5,6\n"
+ "vsldoi 6,6,6,4\n"
+ "vsldoi 7,7,7,2\n"
+ );
+
+ /* transpose */
+ __asm__ __volatile__(
+ "vmrghh 8, 0, 4\n"
+ "\tvmrglh 9, 0, 4\n"
+ "\tvmrghh 10, 1, 5\n"
+ "\tvmrglh 11, 1, 5\n"
+ "\tvmrghh 12, 2, 6\n"
+ "\tvmrglh 13, 2, 6\n"
+ "\tvmrghh 14, 3, 7\n"
+ "\tvmrglh 15, 3, 7\n"
+
+ "\tvmrghh 16, 8, 12\n"
+ "\tvmrglh 17, 8, 12\n"
+ "\tvmrghh 18, 9, 13\n"
+ "\tvmrglh 19, 9, 13\n"
+ "\tvmrghh 20, 10, 14\n"
+ "\tvmrglh 21, 10, 14\n"
+ "\tvmrghh 22, 11, 15\n"
+ "\tvmrglh 23, 11, 15\n"
+
+ "\tvmrghh 0, 16, 20\n"
+ "\tvmrglh 1, 16, 20\n"
+ "\tvmrghh 2, 17, 21\n"
+ "\tvmrglh 3, 17, 21\n"
+ "\tvmrghh 4, 18, 22\n"
+ "\tvmrglh 5, 18, 22\n"
+ "\tvmrghh 6, 19, 23\n"
+ "\tvmrglh 7, 19, 23\n"
+ );
+
+ sl_altivec_load8_8(mangle,16);
+
+ __asm__ __volatile__(
+ "\n"
+ "\tvperm 16,1,2,8\n"
+ "\tvperm 17,3,4,9\n"
+ "\tvperm 18,5,6,10\n"
+ "\tvperm 19,6,7,11\n"
+ "\tvperm 20,7,0,12\n"
+ "\tvperm 21,0,1,13\n"
+ "\tvperm 22,2,3,14\n"
+ "\tvperm 23,4,5,15\n"
+ );
+
+ sl_altivec_store8_16(dest,16);
+
+ /* fix up the elements that were missed */
+
+ block8x8_s16(dest,16,0,0) = block8x8_s16(src,sstr,0,0);
+ block8x8_s16(dest,16,0,6) = block8x8_s16(src,sstr,3,0);
+ block8x8_s16(dest,16,0,7) = block8x8_s16(src,sstr,2,1);
+ block8x8_s16(dest,16,1,7) = block8x8_s16(src,sstr,5,0);
+
+ block8x8_s16(dest,16,6,0) = block8x8_s16(src,sstr,2,7);
+ block8x8_s16(dest,16,7,0) = block8x8_s16(src,sstr,5,6);
+ block8x8_s16(dest,16,7,1) = block8x8_s16(src,sstr,4,7);
+ block8x8_s16(dest,16,7,7) = block8x8_s16(src,sstr,7,7);
+}
+#endif
+
+
+#ifdef TEST_zigzag8x8_s16
+int TEST_zigzag8x8_s16(void)
+{
+ int i;
+ int pass;
+ int failures = 0;
+ int16_t *src, *dest_ref, *dest_test;
+ struct sl_profile_struct t;
+
+#ifdef ALIGNED16
+ src = sl_malloc_s16_a16(64);
+ dest_ref = sl_malloc_s16_a16(64);
+ dest_test = sl_malloc_s16_a16(64);
+#else
+ src = sl_malloc_s16(64);
+ dest_ref = sl_malloc_s16(64);
+ dest_test = sl_malloc_s16(64);
+#endif
+
+ sl_profile_init(t);
+ srand(20020326);
+
+ printf("I: " sl_stringify(zigzag8x8_s16_FUNC) "\n");
+
+ for(pass=0;pass<N_PASS;pass++){
+ for(i=0;i<64;i++)src[i] = sl_rand_s16();
+
+ zigzag8x8_s16_ref(dest_test, src, 16);
+ sl_profile_start(t);
+ zigzag8x8_s16_FUNC(dest_ref, src, 16);
+ sl_profile_stop(t);
+
+ for(i=0;i<64;i++){
+ if(dest_test[i]!=dest_ref[i]){
+ failures++;
+ }
+ }
+ }
+
+ sl_free(src);
+ sl_free(dest_ref);
+ sl_free(dest_test);
+
+ if(failures){
+ printf("E: %d failures\n",failures);
+ }
+
+ sl_profile_print(t);
+
+ return failures;
+}
+#endif
+
+